163 changed files with 2791 additions and 17653 deletions
--- a/.claude/ai_notes/docs_todos.md
+++ b/.claude/ai_notes/docs_todos.md
@ -1,38 +0,0 @@
 # Docs TODOs
 ## Auto-sync README code examples with source
 The `docs/README.rst` has inline code blocks that
 duplicate actual example files (e.g.
 `examples/infected_asyncio_echo_server.py`). Every time
 the public API changes we have to manually sync both.
 Sphinx's `literalinclude` directive can pull code directly
 from source files:
 ```rst
 .. literalinclude:: ../examples/infected_asyncio_echo_server.py
   :language: python
   :caption: examples/infected_asyncio_echo_server.py
 ```
 Or to include only a specific function/section:
 ```rst
 .. literalinclude:: ../examples/infected_asyncio_echo_server.py
   :language: python
   :pyobject: aio_echo_server
 ```
 This way the docs always reflect the actual code without
 manual syncing.
 ### Considerations
 - `README.rst` is also rendered on GitHub/PyPI which do
  NOT support `literalinclude` - so we'd need a build
  step or a separate `_sphinx_readme.rst` (which already
  exists at `docs/github_readme/_sphinx_readme.rst`).
 - Could use a pre-commit hook or CI step to extract code
  from examples into the README for GitHub rendering.
 - Another option: `sphinx-autodoc` style approach where
  docstrings from the actual module are pulled in.
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@ -1,42 +0,0 @@
 {
  "permissions": {
    "allow": [
      "Bash(cp .claude/*)",
      "Read(.claude/**)",
      "Read(.claude/skills/run-tests/**)",
      "Write(.claude/**/*commit_msg*)",
      "Write(.claude/git_commit_msg_LATEST.md)",
      "Skill(run-tests)",
      "Skill(close-wkt)",
      "Skill(open-wkt)",
      "Skill(prompt-io)",
      "Bash(date *)",
      "Bash(git diff *)",
      "Bash(git log *)",
      "Bash(git status)",
      "Bash(git remote:*)",
      "Bash(git stash:*)",
      "Bash(git mv:*)",
      "Bash(git rev-parse:*)",
      "Bash(test:*)",
      "Bash(ls:*)",
      "Bash(grep:*)",
      "Bash(find:*)",
      "Bash(ln:*)",
      "Bash(cat:*)",
      "Bash(mkdir:*)",
      "Bash(gh pr:*)",
      "Bash(gh api:*)",
      "Bash(gh issue:*)",
      "Bash(UV_PROJECT_ENVIRONMENT=py* uv sync:*)",
      "Bash(UV_PROJECT_ENVIRONMENT=py* uv run:*)",
      "Bash(echo EXIT:$?:*)",
      "Bash(echo \"EXIT=$?\")",
      "Read(//tmp/**)"
    ],
    "deny": [],
    "ask": []
  },
  "prefersReducedMotion": false,
  "outputStyle": "default"
 }
--- a/.claude/skills/commit-msg/style-guide-reference.md
+++ b/.claude/skills/commit-msg/style-guide-reference.md
@ -1,225 +0,0 @@
 # Commit Message Style Guide for `tractor`
 Analysis based on 500 recent commits from the `tractor` repository.
 ## Core Principles
 Write commit messages that are technically precise yet casual in
 tone. Use abbreviations and informal language while maintaining
 clarity about what changed and why.
 ## Subject Line Format
 ### Length and Structure
 - Target: ~50 chars with a hard-max of 67.
 - Use backticks around code elements (72.2% of commits)
 - Rarely use colons (5.2%), except for file prefixes
 - End with '?' for uncertain changes (rare: 0.8%)
 - End with '!' for important changes (rare: 2.0%)
 ### Opening Verbs (Present Tense)
 Most common verbs from analysis:
 - `Add` (14.4%) - wholly new features/functionality
 - `Use` (4.4%) - adopt new approach/tool
 - `Drop` (3.6%) - remove code/feature
 - `Fix` (2.4%) - bug fixes
 - `Move`/`Mv` (3.6%) - relocate code
 - `Adjust` (2.0%) - minor tweaks
 - `Update` (1.6%) - enhance existing feature
 - `Bump` (1.2%) - dependency updates
 - `Rename` (1.2%) - identifier changes
 - `Set` (1.2%) - configuration changes
 - `Handle` (1.0%) - add handling logic
 - `Raise` (1.0%) - add error raising
 - `Pass` (0.8%) - pass parameters/values
 - `Support` (0.8%) - add support for something
 - `Hide` (1.4%) - make private/internal
 - `Always` (1.4%) - enforce consistent behavior
 - `Mk` (1.4%) - make/create (abbreviated)
 - `Start` (1.0%) - begin implementation
 Other frequent verbs: `More`, `Change`, `Extend`, `Disable`, `Log`,
 `Enable`, `Ensure`, `Expose`, `Allow`
 ### Backtick Usage
 Always use backticks for:
 - Module names: `trio`, `asyncio`, `msgspec`, `greenback`, `stackscope`
 - Class names: `Context`, `Actor`, `Address`, `PldRx`, `SpawnSpec`
 - Method names: `.pause_from_sync()`, `._pause()`, `.cancel()`
 - Function names: `breakpoint()`, `collapse_eg()`, `open_root_actor()`
 - Decorators: `@acm`, `@context`
 - Exceptions: `Cancelled`, `TransportClosed`, `MsgTypeError`
 - Keywords: `finally`, `None`, `False`
 - Variable names: `tn`, `debug_mode`
 - Complex expressions: `trio.Cancelled`, `asyncio.Task`
 Most backticked terms in tractor:
 `trio`, `asyncio`, `Context`, `.pause_from_sync()`, `tn`,
 `._pause()`, `breakpoint()`, `collapse_eg()`, `Actor`, `@acm`,
 `.cancel()`, `Cancelled`, `open_root_actor()`, `greenback`
 ### Examples
 Good subject lines:
 ```
 Add `uds` to `._multiaddr`, tweak typing
 Drop `DebugStatus.shield` attr, add `.req_finished`
 Use `stackscope` for all actor-tree rendered "views"
 Fix `.to_asyncio` inter-task-cancellation!
 Bump `ruff.toml` to target py313
 Mv `load_module_from_path()` to new `._code_load` submod
 Always use `tuple`-cast for singleton parent addrs
 ```
 ## Body Format
 ### General Structure
 - 43.2% of commits have no body (simple changes)
 - Use blank line after subject
 - Max line length: 67 chars
 - Use `-` bullets for lists (28.0% of commits)
 - Rarely use `*` bullets (2.4%)
 ### Section Markers
 Use these markers to organize longer commit bodies:
 - `Also,` (most common: 26 occurrences)
 - `Other,` (13 occurrences)
 - `Deats,` (11 occurrences) - for implementation details
 - `Further,` (7 occurrences)
 - `TODO,` (3 occurrences)
 - `Impl details,` (2 occurrences)
 - `Notes,` (1 occurrence)
 ### Common Abbreviations
 Use these freely (sorted by frequency):
 - `msg` (63) - message
 - `bg` (37) - background
 - `ctx` (30) - context
 - `impl` (27) - implementation
 - `mod` (26) - module
 - `obvi` (17) - obviously
 - `tn` (16) - task name
 - `fn` (15) - function
 - `vs` (15) - versus
 - `bc` (14) - because
 - `var` (14) - variable
 - `prolly` (9) - probably
 - `ep` (6) - entry point
 - `OW` (5) - otherwise
 - `rn` (4) - right now
 - `sig` (4) - signal/signature
 - `deps` (3) - dependencies
 - `iface` (2) - interface
 - `subproc` (2) - subprocess
 - `tho` (2) - though
 - `ofc` (2) - of course
 ### Tone and Style
 - Casual but technical (use `XD` for humor: 23 times)
 - Use `..` for trailing thoughts (108 occurrences)
 - Use `Woops,` to acknowledge mistakes (4 subject lines)
 - Don't be afraid to show personality while being precise
 ### Example Bodies
 Simple with bullets:
 ```
 Add `multiaddr` and bump up some deps
 Since we're planning to use it for (discovery)
 addressing, allowing replacement of the hacky (pretend)
 attempt in `tractor._multiaddr` Bp
 Also pin some deps,
 - make us py312+
 - use `pdbp` with my frame indexing fix.
 - mv to latest `xonsh` for fancy cmd/suggestion injections.
 Bump lock file to match obvi!
 ```
 With section markers:
 ```
 Use `stackscope` for all actor-tree rendered "views"
 Instead of the (much more) limited and hacky `.devx._code`
 impls, move to using the new `.devx._stackscope` API which
 wraps the `stackscope` project.
 Deats,
 - make new `stackscope.extract_stack()` wrapper
 - port over frame-descing to `_stackscope.pformat_stack()`
 - move `PdbREPL` to use `stackscope` render approach
 - update tests for new stack output format
 Also,
 - tweak log formatting for consistency
 - add typing hints throughout
 ```
 ## Special Patterns
 ### WIP Commits
 Rare (0.2%) - avoid committing WIP if possible
 ### Merge Commits
 Auto-generated (4.4%), don't worry about style
 ### File References
 - Use `module.py` or `.submodule` style
 - Rarely use `file.py:line` references (0 in analysis)
 ### Links
 - GitHub links used sparingly (3 total)
 - Prefer code references over external links
 ## Footer
 The default footer should credit `claude` (you) for helping generate
 the commit msg content:
 ```
 (this commit msg was generated in some part by [`claude-code`][claude-code-gh])
 [claude-code-gh]: https://github.com/anthropics/claude-code
 ```
 Further, if the patch was solely or in part written
 by `claude`, instead add:
 ```
 (this patch was generated in some part by [`claude-code`][claude-code-gh])
 [claude-code-gh]: https://github.com/anthropics/claude-code
 ```
 ## Summary Checklist
 Before committing, verify:
 - [ ] Subject line uses present tense verb
 - [ ] Subject line ~50 chars (hard max 67)
 - [ ] Code elements wrapped in backticks
 - [ ] Body lines ≤67 chars
 - [ ] Abbreviations used where natural
 - [ ] Casual yet precise tone
 - [ ] Section markers if body >3 paragraphs
 - [ ] Technical accuracy maintained
 ## Analysis Metadata
 ```
 Source: tractor repository
 Commits analyzed: 500
 Date range: 2019-2025
 Analysis date: 2026-02-08
 ```
 ---
 (this style guide was generated by [`claude-code`][claude-code-gh]
 analyzing commit history)
 [claude-code-gh]: https://github.com/anthropics/claude-code
--- a/.claude/skills/conc-anal/SKILL.md
+++ b/.claude/skills/conc-anal/SKILL.md
@ -1,297 +0,0 @@
 ---
 name: conc-anal
 description: >
  Concurrency analysis for tractor's trio-based
  async primitives. Trace task scheduling across
  checkpoint boundaries, identify race windows in
  shared mutable state, and verify synchronization
  correctness. Invoke on code segments the user
  points at, OR proactively when reviewing/writing
  concurrent cache, lock, or multi-task acm code.
 argument-hint: "[file:line-range or function name]"
 allowed-tools:
  - Read
  - Grep
  - Glob
  - Task
 ---
 Perform a structured concurrency analysis on the
 target code. This skill should be invoked:
 - **On demand**: user points at a code segment
  (file:lines, function name, or pastes a snippet)
 - **Proactively**: when writing or reviewing code
  that touches shared mutable state across trio
  tasks — especially `_Cache`, locks, events, or
  multi-task `@acm` lifecycle management
 ## 0. Identify the target
 If the user provides a file:line-range or function
 name, read that code. If not explicitly provided,
 identify the relevant concurrent code from context
 (e.g. the current diff, a failing test, or the
 function under discussion).
 ## 1. Inventory shared mutable state
 List every piece of state that is accessed by
 multiple tasks. For each, note:
 - **What**: the variable/dict/attr (e.g.
  `_Cache.values`, `_Cache.resources`,
  `_Cache.users`)
 - **Scope**: class-level, module-level, or
  closure-captured
 - **Writers**: which tasks/code-paths mutate it
 - **Readers**: which tasks/code-paths read it
 - **Guarded by**: which lock/event/ordering
  protects it (or "UNGUARDED" if none)
 Format as a table:
 ```
 | State               | Writers         | Readers         | Guard          |
 |---------------------|-----------------|-----------------|----------------|
 | _Cache.values       | run_ctx, moc¹   | moc             | ctx_key lock   |
 | _Cache.resources    | run_ctx, moc    | moc, run_ctx    | UNGUARDED      |
 ```
 ¹ `moc` = `maybe_open_context`
 ## 2. Map checkpoint boundaries
 For each code path through the target, mark every
 **checkpoint** — any `await` expression where trio
 can switch to another task. Use line numbers:
 ```
 L325: await lock.acquire()        ← CHECKPOINT
 L395: await service_tn.start(...) ← CHECKPOINT
 L411: lock.release()              ← (not a checkpoint, but changes lock state)
 L414: yield (False, yielded)      ← SUSPEND (caller runs)
 L485: no_more_users.set()         ← (wakes run_ctx, no switch yet)
 ```
 **Key trio scheduling rules to apply:**
 - `Event.set()` makes waiters *ready* but does NOT
  switch immediately
 - `lock.release()` is not a checkpoint
 - `await sleep(0)` IS a checkpoint
 - Code in `finally` blocks CAN have checkpoints
  (unlike asyncio)
 - `await` inside `except` blocks can be
  `trio.Cancelled`-masked
 ## 3. Trace concurrent task schedules
 Write out the **interleaved execution trace** for
 the problematic scenario. Number each step and tag
 which task executes it:
 ```
 [Task A]  1. acquires lock
 [Task A]  2. cache miss → allocates resources
 [Task A]  3. releases lock
 [Task A]  4. yields to caller
 [Task A]  5. caller exits → finally runs
 [Task A]  6. users-- → 0, sets no_more_users
 [Task A]  7. pops lock from _Cache.locks
 [run_ctx] 8. wakes from no_more_users.wait()
 [run_ctx] 9. values.pop(ctx_key)
 [run_ctx] 10. acm __aexit__ → CHECKPOINT
 [Task B]  11. creates NEW lock (old one popped)
 [Task B]  12. acquires immediately
 [Task B]  13. values[ctx_key] → KeyError
 [Task B]  14. resources[ctx_key] → STILL EXISTS
 [Task B]  15. 💥 RuntimeError
 ```
 Identify the **race window**: the range of steps
 where state is inconsistent. In the example above,
 steps 9–10 are the window (values gone, resources
 still alive).
 ## 4. Classify the bug
 Categorize what kind of concurrency issue this is:
 - **TOCTOU** (time-of-check-to-time-of-use): state
  changes between a check and the action based on it
 - **Stale reference**: a task holds a reference to
  state that another task has invalidated
 - **Lifetime mismatch**: a synchronization primitive
  (lock, event) has a shorter lifetime than the
  state it's supposed to protect
 - **Missing guard**: shared state is accessed
  without any synchronization
 - **Atomicity gap**: two operations that should be
  atomic have a checkpoint between them
 ## 5. Propose fixes
 For each proposed fix, provide:
 - **Sketch**: pseudocode or diff showing the change
 - **How it closes the window**: which step(s) from
  the trace it eliminates or reorders
 - **Tradeoffs**: complexity, perf, new edge cases,
  impact on other code paths
 - **Risk**: what could go wrong (deadlocks, new
  races, cancellation issues)
 Rate each fix: `[simple|moderate|complex]` impl
 effort.
 ## 6. Output format
 Structure the full analysis as:
 ```markdown
 ## Concurrency analysis: `<target>`
 ### Shared state
 <table from step 1>
 ### Checkpoints
 <list from step 2>
 ### Race trace
 <interleaved trace from step 3>
 ### Classification
 <bug type from step 4>
 ### Fixes
 <proposals from step 5>
 ```
 ## Tractor-specific patterns to watch
 These are known problem areas in tractor's
 concurrency model. Flag them when encountered:
 ### `_Cache` lock vs `run_ctx` lifetime
 The `_Cache.locks` entry is managed by
 `maybe_open_context` callers, but `run_ctx` runs
 in `service_tn` — a different task tree. Lock
 pop/release in the caller's `finally` does NOT
 wait for `run_ctx` to finish tearing down. Any
 state that `run_ctx` cleans up in its `finally`
 (e.g. `resources.pop()`) is vulnerable to
 re-entry races after the lock is popped.
 ### `values.pop()` → acm `__aexit__` → `resources.pop()` gap
 In `_Cache.run_ctx`, the inner `finally` pops
 `values`, then the acm's `__aexit__` runs (which
 has checkpoints), then the outer `finally` pops
 `resources`. This creates a window where `values`
 is gone but `resources` still exists — a classic
 atomicity gap.
 ### Global vs per-key counters
 `_Cache.users` as a single `int` (pre-fix) meant
 that users of different `ctx_key`s inflated each
 other's counts, preventing teardown when one key's
 users hit zero. Always verify that per-key state
 (`users`, `locks`) is actually keyed on `ctx_key`
 and not on `fid` or some broader key.
 ### `Event.set()` wakes but doesn't switch
 `trio.Event.set()` makes waiting tasks *ready* but
 the current task continues executing until its next
 checkpoint. Code between `.set()` and the next
 `await` runs atomically from the scheduler's
 perspective. Use this to your advantage (or watch
 for bugs where code assumes the woken task runs
 immediately).
 ### `except` block checkpoint masking
 `await` expressions inside `except` handlers can
 be masked by `trio.Cancelled`. If a `finally`
 block runs from an `except` and contains
 `lock.release()`, the release happens — but any
 `await` after it in the same `except` may be
 swallowed. This is why `maybe_open_context`'s
 cache-miss path does `lock.release()` in a
 `finally` inside the `except KeyError`.
 ### Cancellation in `finally`
 Unlike asyncio, trio allows checkpoints in
 `finally` blocks. This means `finally` cleanup
 that does `await` can itself be cancelled (e.g.
 by nursery shutdown). Watch for cleanup code that
 assumes it will run to completion.
 ### Unbounded waits in cleanup paths
 Any `await <event>.wait()` in a teardown path is
 a latent deadlock unless the event's setter is
 GUARANTEED to fire. If the setter depends on
 external state (peer disconnects, child process
 exit, subsequent task completion) that itself
 depends on the current task's progress, you have
 a mutual wait.
 Rule: **bound every `await X.wait()` in cleanup
 paths with `trio.move_on_after()`** unless you
 can prove the setter is unconditionally reachable
 from the state at the await site. Concrete recent
 example: `ipc_server.wait_for_no_more_peers()` in
 `async_main`'s finally (see
 `ai/conc-anal/subint_forkserver_test_cancellation_leak_issue.md`
 "probe iteration 3") — it was unbounded, and when
 one peer-handler was stuck the wait-for-no-more-
 peers event never fired, deadlocking the whole
 actor-tree teardown cascade.
 ### The capture-pipe-fill hang pattern (grep this first)
 When investigating any hang in the test suite
 **especially under fork-based backends**, first
 check whether the hang reproduces under `pytest
 -s` (`--capture=no`). If `-s` makes it go away
 you're not looking at a trio concurrency bug —
 you're looking at a Linux pipe-buffer fill.
 Mechanism: pytest replaces fds 1,2 with pipe
 write-ends. Fork-child subactors inherit those
 fds. High-volume error-log tracebacks (cancel
 cascade spew) fill the 64KB pipe buffer. Child
 `write()` blocks. Child can't exit. Parent's
 `waitpid`/pidfd wait blocks. Deadlock cascades up
 the tree.
 Pre-existing guards in `tests/conftest.py` encode
 this knowledge — grep these BEFORE blaming
 concurrency:
 ```python
 # tests/conftest.py:258
 if loglevel in ('trace', 'debug'):
    # XXX: too much logging will lock up the subproc (smh)
    loglevel: str = 'info'
 # tests/conftest.py:316
 # can lock up on the `_io.BufferedReader` and hang..
 stderr: str = proc.stderr.read().decode()
 ```
 Full post-mortem +
 `ai/conc-anal/subint_forkserver_test_cancellation_leak_issue.md`
 for the canonical reproduction. Cost several
 investigation sessions before catching it —
 because the capture-pipe symptom was masked by
 deeper cascade-deadlocks. Once the cascades were
 fixed, the tree tore down enough to generate
 pipe-filling log volume → capture-pipe finally
 surfaced. Grep-note for future-self: **if a
 multi-subproc tractor test hangs, `pytest -s`
 first, conc-anal second.**
--- a/.claude/skills/pr-msg/format-reference.md
+++ b/.claude/skills/pr-msg/format-reference.md
@ -1,241 +0,0 @@
 # PR/Patch-Request Description Format Reference
 Canonical structure for `tractor` patch-request
 descriptions, designed to work across GitHub,
 Gitea, SourceHut, and GitLab markdown renderers.
 **Line length: wrap at 72 chars** for all prose
 content (Summary bullets, Motivation paragraphs,
 Scopes bullets, etc.). Fill lines *to* 72 — don't
 stop short at 50-65. Only raw URLs in
 reference-link definitions may exceed this.
 ## Template
 ```markdown
 <!-- pr-msg-meta
 branch: <branch-name>
 base: <base-branch>
 submitted:
  github: ___
  gitea: ___
  srht: ___
 -->
 ## <Title: present-tense verb + backticked code>
 ### Summary
 - [<hash>][<hash>] Description of change ending
  with period.
 - [<hash>][<hash>] Another change description
  ending with period.
 - [<hash>][<hash>] [<hash>][<hash>] Multi-commit
  change description.
 ### Motivation
 <1-2 paragraphs: problem/limitation first,
 then solution. Hard-wrap at 72 chars.>
 ### Scopes changed
 - [<hash>][<hash>] `pkg.mod.func()` — what
  changed.
  * [<hash>][<hash>] Also adjusts
    `.related_thing()` in same module.
 - [<hash>][<hash>] `tests.test_mod` — new/changed
  test coverage.
 <!--
 ### Cross-references
 Also submitted as
 [github-pr][] | [gitea-pr][] | [srht-patch][].
 ### Links
 - [relevant-issue-or-discussion](url)
 - [design-doc-or-screenshot](url)
 -->
 (this pr content was generated in some part by
 [`claude-code`][claude-code-gh])
 [<hash>]: https://<service>/<owner>/<repo>/commit/<hash>
 [claude-code-gh]: https://github.com/anthropics/claude-code
 <!-- cross-service pr refs (fill after submit):
 [github-pr]: https://github.com/<owner>/<repo>/pull/___
 [gitea-pr]: https://<host>/<owner>/<repo>/pulls/___
 [srht-patch]: https://git.sr.ht/~<owner>/<repo>/patches/___
 -->
 ```
 ## Markdown Reference-Link Strategy
 Use reference-style links for ALL commit hashes
 and cross-service PR refs to ensure cross-service
 compatibility:
 **Inline usage** (in bullets):
 ```markdown
 - [f3726cf9][f3726cf9] Add `reg_err_types()`
  for custom exc lookup.
 ```
 **Definition** (bottom of document):
 ```markdown
 [f3726cf9]: https://github.com/goodboy/tractor/commit/f3726cf9
 ```
 ### Why reference-style?
 - Keeps prose readable without long inline URLs.
 - All URLs in one place — trivially swappable
  per-service.
 - Most git services auto-link bare SHAs anyway,
  but explicit refs guarantee it works in *any*
  md renderer.
 - The `[hash][hash]` form is self-documenting —
  display text matches the ref ID.
 - Cross-service PR refs use the same mechanism:
  `[github-pr][]` resolves via a ref-link def
  at the bottom, trivially fillable post-submit.
 ## Cross-Service PR Placeholder Mechanism
 The generated description includes three layers
 of cross-service support, all using native md
 reference-links:
 ### 1. Metadata comment (top of file)
 ```markdown
 <!-- pr-msg-meta
 branch: remote_exc_type_registry
 base: main
 submitted:
  github: ___
  gitea: ___
  srht: ___
 -->
 ```
 A YAML-ish HTML comment block. The `___`
 placeholders get filled with PR/patch numbers
 after submission. Machine-parseable for tooling
 (e.g. `gish`) but invisible in rendered md.
 ### 2. Cross-references section (in body)
 ```markdown
 <!--
 ### Cross-references
 Also submitted as
 [github-pr][] | [gitea-pr][] | [srht-patch][].
 -->
 ```
 Commented out at generation time. After submitting
 to multiple services, uncomment and the ref-links
 resolve via the stubs at the bottom.
 ### 3. Ref-link stubs (bottom of file)
 ```markdown
 <!-- cross-service pr refs (fill after submit):
 [github-pr]: https://github.com/goodboy/tractor/pull/___
 [gitea-pr]: https://pikers.dev/goodboy/tractor/pulls/___
 [srht-patch]: https://git.sr.ht/~goodboy/tractor/patches/___
 -->
 ```
 Commented out with `___` number placeholders.
 After submission: uncomment, replace `___` with
 the actual number. Each service-specific copy
 fills in all services' numbers so any copy can
 cross-reference the others.
 ### Post-submission file layout
 ```
 pr_msg_LATEST.md                    # latest draft (skill root)
 msgs/
  20260325T002027Z_mybranch_pr_msg.md  # timestamped
  github/
    42_pr_msg.md        # github PR #42
  gitea/
    17_pr_msg.md        # gitea PR #17
  srht/
    5_pr_msg.md         # srht patch #5
 ```
 Each `<service>/<num>_pr_msg.md` is a copy with:
 - metadata `submitted:` fields filled in
 - cross-references section uncommented
 - ref-link stubs uncommented with real numbers
 - all services cross-linked in each copy
 This mirrors the `gish` skill's
 `<backend>/<num>.md` pattern.
 ## Commit-Link URL Patterns by Service
 | Service   | Pattern                             |
 |-----------|-------------------------------------|
 | GitHub    | `https://github.com/<o>/<r>/commit/<h>` |
 | Gitea     | `https://<host>/<o>/<r>/commit/<h>` |
 | SourceHut | `https://git.sr.ht/~<o>/<r>/commit/<h>` |
 | GitLab    | `https://gitlab.com/<o>/<r>/-/commit/<h>` |
 ## PR/Patch URL Patterns by Service
 | Service   | Pattern                             |
 |-----------|-------------------------------------|
 | GitHub    | `https://github.com/<o>/<r>/pull/<n>` |
 | Gitea     | `https://<host>/<o>/<r>/pulls/<n>`  |
 | SourceHut | `https://git.sr.ht/~<o>/<r>/patches/<n>` |
 | GitLab    | `https://gitlab.com/<o>/<r>/-/merge_requests/<n>` |
 ## Scope Naming Convention
 Use Python namespace-resolution syntax for
 referencing changed code scopes:
 | File path                 | Scope reference               |
 |---------------------------|-------------------------------|
 | `tractor/_exceptions.py`  | `tractor._exceptions`         |
 | `tractor/_state.py`       | `tractor._state`              |
 | `tests/test_foo.py`       | `tests.test_foo`              |
 | Function in module        | `tractor._exceptions.func()`  |
 | Method on class           | `.RemoteActorError.src_type`  |
 | Class                     | `tractor._exceptions.RAE`     |
 Prefix with the package path for top-level refs;
 use leading-dot shorthand (`.ClassName.method()`)
 for sub-bullets where the parent module is already
 established.
 ## Title Conventions
 Same verb vocabulary as commit messages:
 - `Add` — wholly new feature/API
 - `Fix` — bug fix
 - `Drop` — removal
 - `Use` — adopt new approach
 - `Move`/`Mv` — relocate code
 - `Adjust` — minor tweak
 - `Update` — enhance existing feature
 - `Support` — add support for something
 Target 50 chars, hard max 70. Always backtick
 code elements.
 ## Tone
 Casual yet technically precise — matching the
 project's commit-msg style. Terse but every bullet
 carries signal. Use project abbreviations freely
 (msg, bg, ctx, impl, mod, obvi, fn, bc, var,
 prolly, ep, etc.).
 ---
 (this format reference was generated by
 [`claude-code`][claude-code-gh])
 [claude-code-gh]: https://github.com/anthropics/claude-code
--- a/.claude/skills/run-tests/SKILL.md
+++ b/.claude/skills/run-tests/SKILL.md
@ -1,523 +0,0 @@
 ---
 name: run-tests
 description: >
  Run tractor test suite (or subsets). Use when the user wants
  to run tests, verify changes, or check for regressions.
 argument-hint: "[test-path-or-pattern] [--opts]"
 allowed-tools:
  - Bash(python -m pytest *)
  - Bash(python -c *)
  - Bash(python --version *)
  - Bash(UV_PROJECT_ENVIRONMENT=py* uv run python *)
  - Bash(UV_PROJECT_ENVIRONMENT=py* uv run pytest *)
  - Bash(UV_PROJECT_ENVIRONMENT=py* uv sync *)
  - Bash(UV_PROJECT_ENVIRONMENT=py* uv pip show *)
  - Bash(git rev-parse *)
  - Bash(ls *)
  - Bash(cat *)
  - Bash(jq * .pytest_cache/*)
  - Read
  - Grep
  - Glob
  - Task
  - AskUserQuestion
 ---
 Run the `tractor` test suite using `pytest`. Follow this
 process:
 ## 1. Parse user intent
 From the user's message and any arguments, determine:
 - **scope**: full suite, specific file(s), specific
  test(s), or a keyword pattern (`-k`).
 - **transport**: which IPC transport protocol to test
  against (default: `tcp`, also: `uds`).
 - **options**: any extra pytest flags the user wants
  (e.g. `--ll debug`, `--tpdb`, `-x`, `-v`).
 If the user provides a bare path or pattern as argument,
 treat it as the test target. Examples:
 - `/run-tests` → full suite
 - `/run-tests test_local.py` → single file
 - `/run-tests test_registrar -v` → file + verbose
 - `/run-tests -k cancel` → keyword filter
 - `/run-tests tests/ipc/ --tpt-proto uds` → subdir + UDS
 ## 2. Construct the pytest command
 Base command:
 ```
 python -m pytest
 ```
 ### Default flags (always include unless user overrides):
 - `-x` (stop on first failure)
 - `--tb=short` (concise tracebacks)
 - `--no-header` (reduce noise)
 ### Path resolution:
 - If the user gives a bare filename like `test_local.py`,
  resolve it under `tests/`.
 - If the user gives a subdirectory like `ipc/`, resolve
  under `tests/ipc/`.
 - Glob if needed: `tests/**/test_*<pattern>*.py`
 ### Key pytest options for this project:
 | Flag | Purpose |
 |---|---|
 | `--ll <level>` | Set tractor log level (e.g. `debug`, `info`, `runtime`) |
 | `--tpdb` / `--debug-mode` | Enable tractor's multi-proc debugger |
 | `--tpt-proto <key>` | IPC transport: `tcp` (default) or `uds` |
 | `--spawn-backend <be>` | Spawn method: `trio` (default), `mp_spawn`, `mp_forkserver` |
 | `-k <expr>` | pytest keyword filter |
 | `-v` / `-vv` | Verbosity |
 | `-s` | No output capture (useful with `--tpdb`) |
 ### Common combos:
 ```sh
 # quick smoke test of core modules
 python -m pytest tests/test_local.py tests/test_rpc.py -x --tb=short --no-header
 # full suite, stop on first failure
 python -m pytest tests/ -x --tb=short --no-header
 # specific test with debug
 python -m pytest tests/discovery/test_registrar.py::test_reg_then_unreg -x -s --tpdb --ll debug
 # run with UDS transport
 python -m pytest tests/ -x --tb=short --no-header --tpt-proto uds
 # keyword filter
 python -m pytest tests/ -x --tb=short --no-header -k "cancel and not slow"
 ```
 ## 3. Pre-flight: venv detection (MANDATORY)
 **Always verify a `uv` venv is active before running
 `python` or `pytest`.** This project uses
 `UV_PROJECT_ENVIRONMENT=py<MINOR>` naming (e.g.
 `py313`) — never `.venv`.
 ### Step 1: detect active venv
 Run this check first:
 ```sh
 python -c "
 import sys, os
 venv = os.environ.get('VIRTUAL_ENV', '')
 prefix = sys.prefix
 print(f'VIRTUAL_ENV={venv}')
 print(f'sys.prefix={prefix}')
 print(f'executable={sys.executable}')
 "
 ```
 ### Step 2: interpret results
 **Case A — venv is active** (`VIRTUAL_ENV` is set
 and points to a `py<MINOR>/` dir under the project
 root or worktree):
 Use bare `python` / `python -m pytest` for all
 commands. This is the normal, fast path.
 **Case B — no venv active** (`VIRTUAL_ENV` is empty
 or `sys.prefix` points to a system Python):
 Use `AskUserQuestion` to ask the user:
 > "No uv venv is active. Should I activate one
 > via `UV_PROJECT_ENVIRONMENT=py<MINOR> uv sync`,
 > or would you prefer to activate your shell venv
 > first?"
 Options:
 1. **"Create/sync venv"** — run
   `UV_PROJECT_ENVIRONMENT=py<MINOR> uv sync` where
   `<MINOR>` is detected from `python --version`
   (e.g. `313` for 3.13). Then use
   `py<MINOR>/bin/python` for all subsequent
   commands in this session.
 2. **"I'll activate it myself"** — stop and let the
   user `source py<MINOR>/bin/activate` or similar.
 **Case C — inside a git worktree** (`git rev-parse
 --git-common-dir` differs from `--git-dir`):
 Verify Python resolves from the **worktree's own
 venv**, not the main repo's:
 ```sh
 python -c "import tractor; print(tractor.__file__)"
 ```
 If the path points outside the worktree, create a
 worktree-local venv:
 ```sh
 UV_PROJECT_ENVIRONMENT=py<MINOR> uv sync
 ```
 Then use `py<MINOR>/bin/python` for all commands.
 **Why this matters**: without the correct venv,
 subprocesses spawned by tractor resolve modules
 from the wrong editable install, causing spurious
 `AttributeError` / `ModuleNotFoundError`.
 ### Fallback: `uv run`
 If the user can't or won't activate a venv, all
 `python` and `pytest` commands can be prefixed
 with `UV_PROJECT_ENVIRONMENT=py<MINOR> uv run`:
 ```sh
 # instead of: python -m pytest tests/ -x
 UV_PROJECT_ENVIRONMENT=py313 uv run pytest tests/ -x
 # instead of: python -c 'import tractor'
 UV_PROJECT_ENVIRONMENT=py313 uv run python -c 'import tractor'
 ```
 `uv run` auto-discovers the project and venv,
 but is slower than a pre-activated venv due to
 lock-file resolution on each invocation. Prefer
 activating the venv when possible.
 ### Step 3: import + collection checks
 After venv is confirmed, always run these
 (especially after refactors or module moves):
 ```sh
 # 1. package import smoke check
 python -c 'import tractor; print(tractor)'
 # 2. verify all tests collect (no import errors)
 python -m pytest tests/ -x -q --co 2>&1 | tail -5
 ```
 If either fails, fix the import error before running
 any actual tests.
 ### Step 4: zombie-actor / stale-registry check (MANDATORY)
 The tractor runtime's default registry address is
 **`127.0.0.1:1616`** (TCP) / `/tmp/registry@1616.sock`
 (UDS). Whenever any prior test run — especially one
 using a fork-based backend like `subint_forkserver` —
 leaks a child actor process, that zombie keeps the
 registry port bound and **every subsequent test
 session fails to bind**, often presenting as 50+
 unrelated failures ("all tests broken"!) across
 backends.
 **This has to be checked before the first run AND
 after any cancelled/SIGINT'd run** — signal failures
 in the middle of a test can leave orphan children.
 ```sh
 # 1. TCP registry — any listener on :1616? (primary signal)
 ss -tlnp 2>/dev/null | grep ':1616' || echo 'TCP :1616 free'
 # 2. leftover actor/forkserver procs — scoped to THIS
 #    repo's python path, so we don't false-flag legit
 #    long-running tractor-using apps (e.g. `piker`,
 #    downstream projects that embed tractor).
 pgrep -af "$(pwd)/py[0-9]*/bin/python.*_actor_child_main|subint-forkserv" \
  | grep -v 'grep\|pgrep' \
  || echo 'no leaked actor procs from this repo'
 # 3. stale UDS registry sockets
 ls -la /tmp/registry@*.sock 2>/dev/null \
  || echo 'no leaked UDS registry sockets'
 ```
 **Interpretation:**
 - **TCP :1616 free AND no stale sockets** → clean,
  proceed. The actor-procs probe is secondary — false
  positives are common (piker, any other tractor-
  embedding app); only cleanup if `:1616` is bound or
  sockets linger.
 - **TCP :1616 bound OR stale sockets present** →
  surface PIDs + cmdlines to the user, offer cleanup:
  ```sh
  # 1. GRACEFUL FIRST (tractor is structured concurrent — it
  #    catches SIGINT as an OS-cancel in `_trio_main` and
  #    cascades Portal.cancel_actor via IPC to every descendant.
  #    So always try SIGINT first with a bounded timeout; only
  #    escalate to SIGKILL if graceful cleanup doesn't complete).
  pkill -INT -f "$(pwd)/py[0-9]*/bin/python.*_actor_child_main|subint-forkserv"
  # 2. bounded wait for graceful teardown (usually sub-second).
  #    Loop until the processes exit, or timeout. Keep the
  #    bound tight — hung/abrupt-killed descendants usually
  #    hang forever, so don't wait more than a few seconds.
  for i in $(seq 1 10); do
    pgrep -f "$(pwd)/py[0-9]*/bin/python.*_actor_child_main|subint-forkserv" >/dev/null || break
    sleep 0.3
  done
  # 3. ESCALATE TO SIGKILL only if graceful didn't finish.
  if pgrep -f "$(pwd)/py[0-9]*/bin/python.*_actor_child_main|subint-forkserv" >/dev/null; then
    echo 'graceful teardown timed out — escalating to SIGKILL'
    pkill -9 -f "$(pwd)/py[0-9]*/bin/python.*_actor_child_main|subint-forkserv"
  fi
  # 4. if a test zombie holds :1616 specifically and doesn't
  #    match the above pattern, find its PID the hard way:
  ss -tlnp 2>/dev/null | grep ':1616'   # prints `users:(("<name>",pid=NNNN,...))`
  # then (same SIGINT-first ladder):
  # kill -INT <NNNN>; sleep 1; kill -9 <NNNN> 2>/dev/null
  # 5. remove stale UDS sockets
  rm -f /tmp/registry@*.sock
  # 6. re-verify
  ss -tlnp 2>/dev/null | grep ':1616' || echo 'TCP :1616 now free'
  ```
 **Never ignore stale registry state.** If you see the
 "all tests failing" pattern — especially
 `trio.TooSlowError` / connection refused / address in
 use on many unrelated tests — check registry **before**
 spelunking into test code. The failure signature will
 be identical across backends because they're all
 fighting for the same port.
 **False-positive warning for step 2:** a plain
 `pgrep -af '_actor_child_main'` will also match
 legit long-running tractor-embedding apps (e.g.
 `piker` at `~/repos/piker/py*/bin/python3 -m
 tractor._child ...`). Always scope to the current
 repo's python path, or only use step 1 (`:1616`) as
 the authoritative signal.
 ## 4. Run and report
 - Run the constructed command.
 - Use a timeout of **600000ms** (10min) for full suite
  runs, **120000ms** (2min) for single-file runs.
 - If the suite is large (full `tests/`), consider running
  in the background and checking output when done.
 - Use `--lf` (last-failed) to re-run only previously
  failing tests when iterating on a fix.
 ### On failure:
 - Show the failing test name(s) and short traceback.
 - If the failure looks related to recent changes, point
  out the likely cause and suggest a fix.
 - **Check the known-flaky list** (section 8) before
  investigating — don't waste time on pre-existing
  timeout issues.
 - **NEVER auto-commit fixes.** If you apply a code fix
  during test iteration, leave it unstaged. Tell the
  user what changed and suggest they review the
  worktree state, stage files manually, and use
  `/commit-msg` (inline or in a separate session) to
  generate the commit message. The human drives all
  `git add` and `git commit` operations.
 ### On success:
 - Report the pass/fail/skip counts concisely.
 ## 5. Test directory layout (reference)
 ```
 tests/
 ├── conftest.py          # root fixtures, daemon, signals
 ├── devx/                # debugger/tooling tests
 ├── ipc/                 # transport protocol tests
 ├── msg/                 # messaging layer tests
 ├── discovery/           # discovery subsystem tests
 │   ├── test_multiaddr.py  # multiaddr construction
 │   └── test_registrar.py  # registry/discovery protocol
 ├── test_local.py        # registrar + local actor basics
 ├── test_rpc.py          # RPC error handling
 ├── test_spawning.py     # subprocess spawning
 ├── test_multi_program.py  # multi-process tree tests
 ├── test_cancellation.py # cancellation semantics
 ├── test_context_stream_semantics.py  # ctx streaming
 ├── test_inter_peer_cancellation.py   # peer cancel
 ├── test_infected_asyncio.py  # trio-in-asyncio
 └── ...
 ```
 ## 6. Change-type → test mapping
 After modifying specific modules, run the corresponding
 test subset first for fast feedback:
 | Changed module(s) | Run these tests first |
 |---|---|
 | `runtime/_runtime.py`, `runtime/_state.py` | `test_local.py test_rpc.py test_spawning.py test_root_runtime.py` |
 | `discovery/` (`_registry`, `_discovery`, `_addr`) | `tests/discovery/ test_multi_program.py test_local.py` |
 | `_context.py`, `_streaming.py` | `test_context_stream_semantics.py test_advanced_streaming.py` |
 | `ipc/` (`_chan`, `_server`, `_transport`) | `tests/ipc/ test_2way.py` |
 | `runtime/_portal.py`, `runtime/_rpc.py` | `test_rpc.py test_cancellation.py` |
 | `spawn/` (`_spawn`, `_entry`) | `test_spawning.py test_multi_program.py` |
 | `devx/debug/` | `tests/devx/test_debugger.py` (slow!) |
 | `to_asyncio.py` | `test_infected_asyncio.py test_root_infect_asyncio.py` |
 | `msg/` | `tests/msg/` |
 | `_exceptions.py` | `test_remote_exc_relay.py test_inter_peer_cancellation.py` |
 | `runtime/_supervise.py` | `test_cancellation.py test_spawning.py` |
 ## 7. Quick-check shortcuts
 ### After refactors (fastest first-pass):
 ```sh
 # import + collect check
 python -c 'import tractor' && python -m pytest tests/ -x -q --co 2>&1 | tail -3
 # core subset (~10s)
 python -m pytest tests/test_local.py tests/test_rpc.py tests/test_spawning.py tests/discovery/test_registrar.py -x --tb=short --no-header
 ```
 ### Inspect last failures (without re-running):
 When the user asks "what failed?", "show failures",
 or wants to check the last-failed set before
 re-running — read the pytest cache directly. This
 is instant and avoids test collection overhead.
 ```sh
 python -c "
 import json, pathlib, sys
 p = pathlib.Path('.pytest_cache/v/cache/lastfailed')
 if not p.exists():
    print('No lastfailed cache found.'); sys.exit()
 data = json.loads(p.read_text())
 # filter to real test node IDs (ignore junk
 # entries that can accumulate from system paths)
 tests = sorted(k for k in data if k.startswith('tests/'))
 if not tests:
    print('No failures recorded.')
 else:
    print(f'{len(tests)} last-failed test(s):')
    for t in tests:
        print(f'  {t}')
 "
 ```
 **Why not `--cache-show` or `--co --lf`?**
 - `pytest --cache-show 'cache/lastfailed'` works
  but dumps raw dict repr including junk entries
  (stale system paths that leak into the cache).
 - `pytest --co --lf` actually *collects* tests which
  triggers import resolution and is slow (~0.5s+).
  Worse, when cached node IDs don't exactly match
  current parametrize IDs (e.g. param names changed
  between runs), pytest falls back to collecting
  the *entire file*, giving false positives.
 - Reading the JSON directly is instant, filterable
  to `tests/`-prefixed entries, and shows exactly
  what pytest recorded — no interpretation.
 **After inspecting**, re-run the failures:
 ```sh
 python -m pytest --lf -x --tb=short --no-header
 ```
 ### Full suite in background:
 When core tests pass and you want full coverage while
 continuing other work, run in background:
 ```sh
 python -m pytest tests/ -x --tb=short --no-header -q
 ```
 (use `run_in_background=true` on the Bash tool)
 ## 8. Known flaky tests
 These tests have **pre-existing** timing/environment
 sensitivity. If they fail with `TooSlowError` or
 pexpect `TIMEOUT`, they are almost certainly NOT caused
 by your changes — note them and move on.
 | Test | Typical error | Notes |
 |---|---|---|
 | `devx/test_debugger.py::test_multi_nested_subactors_error_through_nurseries` | pexpect TIMEOUT | Debugger pexpect timing |
 | `test_cancellation.py::test_cancel_via_SIGINT_other_task` | TooSlowError | Signal handling race |
 | `test_inter_peer_cancellation.py::test_peer_spawns_and_cancels_service_subactor` | TooSlowError | Async timing (both param variants) |
 | `test_docs_examples.py::test_example[we_are_processes.py]` | `assert None == 0` | `__main__` missing `__file__` in subproc |
 **Rule of thumb**: if a test fails with `TooSlowError`,
 `trio.TooSlowError`, or `pexpect.TIMEOUT` and you didn't
 touch the relevant code path, it's flaky — skip it.
 ## 9. The pytest-capture hang pattern (CHECK THIS FIRST)
 **Symptom:** a tractor test hangs indefinitely under
 default `pytest` but passes instantly when you add
 `-s` (`--capture=no`).
 **Cause:** tractor subactors (especially under fork-
 based backends) inherit pytest's stdout/stderr
 capture pipes via fds 1,2. Under high-volume error
 logging (e.g. multi-level cancel cascade, nested
 `run_in_actor` failures, anything triggering
 `RemoteActorError` + `ExceptionGroup` traceback
 spew), the **64KB Linux pipe buffer fills** faster
 than pytest drains it. Subactor writes block → can't
 finish exit → parent's `waitpid`/pidfd wait blocks →
 deadlock cascades up the tree.
 **Pre-existing guards in the tractor harness** that
 encode this same knowledge — grep these FIRST
 before spelunking:
 - `tests/conftest.py:258-260` (in the `daemon`
  fixture): `# XXX: too much logging will lock up
  the subproc (smh)` — downgrades `trace`/`debug`
  loglevel to `info` to prevent the hang.
 - `tests/conftest.py:316`: `# can lock up on the
  _io.BufferedReader and hang..` — noted on the
  `proc.stderr.read()` post-SIGINT.
 **Debug recipe (in priority order):**
 1. **Try `-s` first.** If the hang disappears with
   `pytest -s`, you've confirmed it's capture-pipe
   fill. Skip spelunking.
 2. **Lower the loglevel.** Default `--ll=error` on
   this project; if you've bumped it to `debug` /
   `info`, try dropping back. Each log level
   multiplies pipe-pressure under fault cascades.
 3. **If you MUST use default capture + high log
   volume**, redirect subactor stdout/stderr in the
   child prelude (e.g.
   `tractor.spawn._subint_forkserver._child_target`
   post-`_close_inherited_fds`) to `/dev/null` or a
   file.
 **Signature tells you it's THIS bug (vs. a real
 code hang):**
 - Multi-actor test under fork-based backend
  (`subint_forkserver`, eventually `trio_proc` too
  under enough log volume).
 - Multiple `RemoteActorError` / `ExceptionGroup`
  tracebacks in the error path.
 - Test passes with `-s` in the 5-10s range, hangs
  past pytest-timeout (usually 30+ s) without `-s`.
 - Subactor processes visible via `pgrep -af
  subint-forkserv` or similar after the hang —
  they're alive but blocked on `write()` to an
  inherited stdout fd.
 **Historical reference:** this deadlock cost a
 multi-session investigation (4 genuine cascade
 fixes landed along the way) that only surfaced the
 capture-pipe issue AFTER the deeper fixes let the
 tree actually tear down enough to produce pipe-
 filling log volume. Full post-mortem in
 `ai/conc-anal/subint_forkserver_test_cancellation_leak_issue.md`.
 Lesson codified here so future-me grep-finds the
 workaround before digging.
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -1,18 +1,10 @@
 name: CI
 # NOTE distilled from,
 # https://github.com/orgs/community/discussions/26276
 on:
-  # any time a new update to 'main'
+  # any time someone pushes a new branch to origin
  push:
    branches:
      - main
-  # for on all (forked) PRs to repo
+  # Allows you to run this workflow manually from the Actions tab
  # NOTE, use a draft PR if you just want CI triggered..
  pull_request:
  # to run workflow manually from the "Actions" tab
  workflow_dispatch:
 jobs:
@ -82,44 +74,24 @@ jobs:
  #       run: mypy tractor/ --ignore-missing-imports --show-traceback
-  testing:
+  testing-linux:
-    name: '${{ matrix.os }} Python${{ matrix.python-version }} spawn_backend=${{ matrix.spawn_backend }} tpt_proto=${{ matrix.tpt_proto }}'
+    name: '${{ matrix.os }} Python ${{ matrix.python }} - ${{ matrix.spawn_backend }}'
-    timeout-minutes: 16
+    timeout-minutes: 10
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
-        os: [
+        os: [ubuntu-latest]
-          ubuntu-latest,
+        python-version: ['3.13']
          macos-latest,
        ]
        python-version: [
          '3.13',
          # '3.14',
        ]
        spawn_backend: [
          'trio',
          # 'mp_spawn',
          # 'mp_forkserver',
          # ?TODO^ is it worth it to get these running again?
          #
          # - [ ] next-gen backends, on 3.13+
          #   https://github.com/goodboy/tractor/issues/379
          # 'subinterpreter',
          # 'subint',
        ]
        tpt_proto: [
          'tcp',
          'uds',
        ]
        # https://github.com/orgs/community/discussions/26253#discussioncomment-3250989
        exclude:
          # don't do UDS run on macOS (for now)
          - os: macos-latest
            tpt_proto: 'uds'
    steps:
      - uses: actions/checkout@v4
      - name: 'Install uv + py-${{ matrix.python-version }}'
@ -146,11 +118,7 @@ jobs:
        run: uv tree
      - name: Run tests
-        run: >
+        run: uv run pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rsx
          uv run
          pytest tests/ -rsx
          --spawn-backend=${{ matrix.spawn_backend }}
          --tpt-proto=${{ matrix.tpt_proto }}
  # XXX legacy NOTE XXX
  #
--- a/.gitignore
+++ b/.gitignore
@ -102,69 +102,3 @@ venv.bak/
 # mypy
 .mypy_cache/
 # all files under
 .git/
 # require very explicit staging for anything we **really**
 # want put/kept in repo.
 notes_to_self/
 snippets/
 # ------- AI shiz -------
 # `ai.skillz` symlinks,
 # (machine-local, deploy via deploy-skill.sh)
 .claude/skills/py-codestyle
 .claude/skills/close-wkt
 .claude/skills/plan-io
 .claude/skills/prompt-io
 .claude/skills/resolve-conflicts
 .claude/skills/inter-skill-review
 # /open-wkt specifics
 .claude/skills/open-wkt
 .claude/wkts/
 claude_wkts
 # /code-review-changes specifics
 .claude/skills/code-review-changes
 # review-skill ephemeral ctx (per-PR, single-use)
 .claude/review_context.md
 .claude/review_regression.md
 # /pr-msg specifics
 .claude/skills/pr-msg/*
 # repo-specific
 !.claude/skills/pr-msg/format-reference.md
 # XXX, so u can nvim-telescope this file.
 # !.claude/skills/pr-msg/pr_msg_LATEST.md
 # /commit-msg specifics
 # - any commit-msg gen tmp files
 .claude/*_commit_*.md
 .claude/*_commit*.txt
 .claude/skills/commit-msg/*
 !.claude/skills/commit-msg/style-duie-reference.md
 # use prompt-io instead?
 .claude/plans
 # nix develop --profile .nixdev
 .nixdev*
 # :Obsession .
 Session.vim
 # `gish` local `.md`-files
 # TODO? better all around automation!
 # -[ ] it'd be handy to also commit and sync with wtv git service?
 # -[ ] everything should be put under a `.gish/` no?
 gitea/
 gh/
 # ------ macOS ------
 # Finder metadata
 **/.DS_Store
 # LLM conversations that should remain private
 docs/conversations/
--- a/ai/conc-anal/subint_cancel_delivery_hang_issue.md
+++ b/ai/conc-anal/subint_cancel_delivery_hang_issue.md
@ -1,161 +0,0 @@
 # `subint` backend: parent trio loop parks after subint teardown (Ctrl-C works; not a CPython-level issue)
 Follow-up to the Phase B subint spawn-backend PR (see
 `tractor.spawn._subint`, issue #379). Distinct from the
 `subint_sigint_starvation_issue.md` (SIGINT-unresponsive
 starvation hang): this one is **Ctrl-C-able**, which means
 it's *not* the shared-GIL-hostage class and is ours to fix
 from inside tractor rather than waiting on upstream CPython
 / msgspec progress.
 ## TL;DR
 After a stuck-subint subactor is torn down via the
 hard-kill path, a parent-side trio task parks on an
 *orphaned resource* (most likely a `chan.recv()` /
 `process_messages` loop on the now-dead subint's IPC
 channel) and waits forever for bytes that can't arrive —
 because the channel was torn down without emitting a clean
 EOF/`BrokenResourceError` to the waiting receiver.
 Unlike `subint_sigint_starvation_issue.md`, the main trio
 loop **is** iterating normally — SIGINT delivers cleanly
 and the test unhangs. But absent Ctrl-C, the test suite
 wedges indefinitely.
 ## Symptom
 Running `test_subint_non_checkpointing_child` under
 `--spawn-backend=subint` (in
 `tests/test_subint_cancellation.py`):
 1. Test spawns a subactor whose main task runs
   `threading.Event.wait(1.0)` in a loop — releases the
   GIL but never inserts a trio checkpoint.
 2. Parent does `an.cancel_scope.cancel()`. Our
   `subint_proc` cancel path fires: soft-kill sends
   `Portal.cancel_actor()` over the live IPC channel →
   subint's trio loop *should* process the cancel msg on
   its IPC dispatcher task (since the GIL releases are
   happening).
 3. Expected: subint's `trio.run()` unwinds, driver thread
   exits naturally, parent returns.
 4. Actual: parent `trio.run()` never completes. Test
   hangs past its `trio.fail_after()` deadline.
 ## Evidence
 ### `strace` on the hung pytest process during SIGINT
 ```
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(17, "\2", 1)                      = 1
 ```
 Contrast with the SIGINT-starvation hang (see
 `subint_sigint_starvation_issue.md`) where that same
 `write()` returned `EAGAIN`. Here the SIGINT byte is
 written successfully → Python's signal handler pipe is
 being drained → main trio loop **is** iterating → SIGINT
 gets turned into `trio.Cancelled` → the test unhangs (if
 the operator happens to be there to hit Ctrl-C).
 ### Stack dump (via `tractor.devx.dump_on_hang`)
 Single main thread visible, parked in
 `trio._core._io_epoll.get_events` inside `trio.run` at the
 test's `trio.run(...)` call site. No subint driver thread
 (subint was destroyed successfully — this is *after* the
 hard-kill path, not during it).
 ## Root cause hypothesis
 Most consistent with the evidence: a parent-side trio
 task is awaiting a `chan.recv()` / `process_messages` loop
 on the dead subint's IPC channel. The sequence:
 1. Soft-kill in `subint_proc` sends `Portal.cancel_actor()`
   over the channel. The subint's trio dispatcher *may* or
   may not have processed the cancel msg before the subint
   was destroyed — timing-dependent.
 2. Hard-kill timeout fires (because the subint's main
   task was in `threading.Event.wait()` with no trio
   checkpoint — cancel-msg processing couldn't race the
   timeout).
 3. Driver thread abandoned, `_interpreters.destroy()`
   runs. Subint is gone.
 4. But the parent-side trio task holding a
   `chan.recv()` / `process_messages` loop against that
   channel was **not** explicitly cancelled. The channel's
   underlying socket got torn down, but without a clean
   EOF delivered to the waiting recv, the task parks
   forever on `trio.lowlevel.wait_readable` (or similar).
 This matches the "main loop fine, task parked on
 orphaned I/O" signature.
 ## Why this is ours to fix (not CPython's)
 - Main trio loop iterates normally → GIL isn't starved.
 - SIGINT is deliverable → not a signal-pipe-full /
  wakeup-fd contention scenario.
 - The hang is in *our* supervision code, specifically in
  how `subint_proc` tears down its side of the IPC when
  the subint is abandoned/destroyed.
 ## Possible fix directions
 1. **Explicit parent-side channel abort on subint
   abandon.** In `subint_proc`'s teardown block, after the
   hard-kill timeout fires, explicitly close the parent's
   end of the IPC channel to the subint. Any waiting
   `chan.recv()` / `process_messages` task sees
   `BrokenResourceError` (or `ClosedResourceError`) and
   unwinds.
 2. **Cancel parent-side RPC tasks tied to the dead
   subint's channel.** The `Actor._rpc_tasks` / nursery
   machinery should have a handle on any
   `process_messages` loops bound to a specific peer
   channel. Iterate those and cancel explicitly.
 3. **Bound the top-level `await actor_nursery
   ._join_procs.wait()` shield in `subint_proc`** (same
   pattern as the other bounded shields the hard-kill
   patch added). If the nursery never sets `_join_procs`
   because a child task is parked, the bound would at
   least let the teardown proceed.
 Of these, (1) is the most surgical and directly addresses
 the root cause. (2) is a defense-in-depth companion. (3)
 is a band-aid but cheap to add.
 ## Current workaround
 None in-tree. The test's `trio.fail_after()` bound
 currently fires and raises `TooSlowError`, so the test
 visibly **fails** rather than hangs — which is
 intentional (an unbounded cancellation-audit test would
 defeat itself). But in interactive test runs the operator
 has to hit Ctrl-C to move past the parked state before
 pytest reports the failure.
 ## Reproducer
 ```
 ./py314/bin/python -m pytest \
  tests/test_subint_cancellation.py::test_subint_non_checkpointing_child \
  --spawn-backend=subint --tb=short --no-header -v
 ```
 Expected: hangs until `trio.fail_after(15)` fires, or
 Ctrl-C unwedges it manually.
 ## References
 - `tractor.spawn._subint.subint_proc` — current subint
  teardown code; see the `_HARD_KILL_TIMEOUT` bounded
  shields + `daemon=True` driver-thread abandonment
  (commit `b025c982`).
 - `ai/conc-anal/subint_sigint_starvation_issue.md` — the
  sibling CPython-level hang (GIL-starvation,
  SIGINT-unresponsive) which is **not** this issue.
 - Phase B tracking: issue #379.
--- a/ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md
+++ b/ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md
@ -1,337 +0,0 @@
 # `os.fork()` from a non-main sub-interpreter aborts the child (CPython refuses post-fork cleanup)
 Third `subint`-class analysis in this project. Unlike its
 two siblings (`subint_sigint_starvation_issue.md`,
 `subint_cancel_delivery_hang_issue.md`), this one is not a
 hang — it's a **hard CPython-level refusal** of an
 experimental spawn strategy we wanted to try.
 ## TL;DR
 An in-process sub-interpreter cannot be used as a
 "launchpad" for `os.fork()` on current CPython. The fork
 syscall succeeds in the parent, but the forked CHILD
 process is aborted immediately by CPython's post-fork
 cleanup with:
 ```
 Fatal Python error: _PyInterpreterState_DeleteExceptMain: not main interpreter
 ```
 This is enforced by a hard `PyStatus_ERR` gate in
 `Python/pystate.c`. The CPython devs acknowledge the
 fragility with an in-source comment (`// Ideally we could
 guarantee tstate is running main.`) but provide no
 mechanism to satisfy the precondition from user code.
 **Implication for tractor**: the `subint_fork` backend
 sketched in `tractor.spawn._subint_fork` is structurally
 dead on current CPython. The submodule is kept as
 documentation of the attempt; `--spawn-backend=subint_fork`
 raises `NotImplementedError` pointing here.
 ## Context — why we tried this
 The motivation is issue #379's "Our own thoughts, ideas
 for `fork()`-workaround/hacks..." section. The existing
 trio-backend (`tractor.spawn._trio.trio_proc`) spawns
 subactors via `trio.lowlevel.open_process()` → ultimately
 `posix_spawn()` or `fork+exec`, from the parent's main
 interpreter that is currently running `trio.run()`. This
 brushes against a known-fragile interaction between
 `trio` and `fork()` tracked in
 [python-trio/trio#1614](https://github.com/python-trio/trio/issues/1614)
 and siblings — mostly mitigated in `tractor`'s case only
 incidentally (we `exec()` immediately post-fork).
 The idea was:
 1. Create a subint that has *never* imported `trio`.
 2. From a worker thread in that subint, call `os.fork()`.
 3. In the child, `execv()` back into
   `python -m tractor._child` — same as `trio_proc` does.
 4. The fork is from a trio-free context → trio+fork
   hazards avoided regardless of downstream behavior.
 The parent-side orchestration (`ipc_server.wait_for_peer`,
 `SpawnSpec`, `Portal` yield) would reuse
 `trio_proc`'s flow verbatim, with only the subproc-spawn
 mechanics swapped.
 ## Symptom
 Running the prototype (`tractor.spawn._subint_fork.subint_fork_proc`,
 see git history prior to the stub revert) on py3.14:
 ```
 Fatal Python error: _PyInterpreterState_DeleteExceptMain: not main interpreter
 Python runtime state: initialized
 Current thread 0x00007f6b71a456c0 [subint-fork-lau] (most recent call first):
  File "<script>", line 2 in <module>
 <script>:2: DeprecationWarning: This process (pid=802985) is multi-threaded, use of fork() may lead to deadlocks in the child.
 ```
 Key clues:
 - The **`DeprecationWarning`** fires in the parent (before
  fork completes) — fork *is* executing, we get that far.
 - The **`Fatal Python error`** comes from the child — it
  aborts during CPython's post-fork C initialization
  before any user Python runs in the child.
 - The thread name `subint-fork-lau[nchpad]` is ours —
  confirms the fork is being called from the launchpad
  subint's driver thread.
 ## CPython source walkthrough
 ### Call site — `Modules/posixmodule.c:728-793`
 The post-fork-child hook CPython runs in the child process:
 ```c
 void
 PyOS_AfterFork_Child(void)
 {
    PyStatus status;
    _PyRuntimeState *runtime = &_PyRuntime;
    // re-creates runtime->interpreters.mutex (HEAD_UNLOCK)
    status = _PyRuntimeState_ReInitThreads(runtime);
    ...
    PyThreadState *tstate = _PyThreadState_GET();
    _Py_EnsureTstateNotNULL(tstate);
    ...
    // Ideally we could guarantee tstate is running main.   ← !!!
    _PyInterpreterState_ReinitRunningMain(tstate);
    status = _PyEval_ReInitThreads(tstate);
    ...
    status = _PyInterpreterState_DeleteExceptMain(runtime);
    if (_PyStatus_EXCEPTION(status)) {
        goto fatal_error;
    }
    ...
 fatal_error:
    Py_ExitStatusException(status);
 }
 ```
 The `// Ideally we could guarantee tstate is running
 main.` comment is a flashing warning sign — the CPython
 devs *know* this path is fragile when fork is called from
 a non-main subint, but they've chosen to abort rather than
 silently corrupt state. Arguably the right call.
 ### The refusal — `Python/pystate.c:1035-1075`
 ```c
 /*
 * Delete all interpreter states except the main interpreter.  If there
 * is a current interpreter state, it *must* be the main interpreter.
 */
 PyStatus
 _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime)
 {
    struct pyinterpreters *interpreters = &runtime->interpreters;
    PyThreadState *tstate = _PyThreadState_Swap(runtime, NULL);
    if (tstate != NULL && tstate->interp != interpreters->main) {
        return _PyStatus_ERR("not main interpreter");       ← our error
    }
    HEAD_LOCK(runtime);
    PyInterpreterState *interp = interpreters->head;
    interpreters->head = NULL;
    while (interp != NULL) {
        if (interp == interpreters->main) {
            interpreters->main->next = NULL;
            interpreters->head = interp;
            interp = interp->next;
            continue;
        }
        // XXX Won't this fail since PyInterpreterState_Clear() requires
        // the "current" tstate to be set?
        PyInterpreterState_Clear(interp);  // XXX must activate?
        zapthreads(interp);
        ...
    }
    ...
 }
 ```
 The comment in the docstring (`If there is a current
 interpreter state, it *must* be the main interpreter.`) is
 the formal API contract. The `XXX` comments further in
 suggest the CPython team is already aware this function
 has latent issues even in the happy path.
 ## Chain summary
 1. Our launchpad subint's driver OS-thread calls
   `os.fork()`.
 2. `fork()` succeeds. Child wakes up with:
   - The parent's full memory image (including all
     subints).
   - Only the *calling* thread alive (the driver thread).
   - `_PyThreadState_GET()` on that thread returns the
     **launchpad subint's tstate**, *not* main's.
 3. CPython runs `PyOS_AfterFork_Child()`.
 4. It reaches `_PyInterpreterState_DeleteExceptMain()`.
 5. Gate check fails: `tstate->interp != interpreters->main`.
 6. `PyStatus_ERR("not main interpreter")` → `fatal_error`
   goto → `Py_ExitStatusException()` → child aborts.
 Parent-side consequence: `os.fork()` in the subint
 bootstrap returned successfully with the child's PID, but
 the child died before connecting back. Our parent's
 `ipc_server.wait_for_peer(uid)` would hang forever — the
 child never gets to `_actor_child_main`.
 ## Definitive answer to "Open Question 1"
 From the (now-stub) `subint_fork_proc` docstring:
 > Does CPython allow `os.fork()` from a non-main
 > sub-interpreter under the legacy config?
 **No.** Not in a usable-by-user-code sense. The fork
 syscall is not blocked, but the child cannot survive
 CPython's post-fork initialization. This is enforced, not
 accidental, and the CPython devs have acknowledged the
 fragility in-source.
 ## What we'd need from CPython to unblock
 Any one of these, from least-to-most invasive:
 1. **A pre-fork hook mechanism** that lets user code (or
   tractor itself via `os.register_at_fork(before=...)`)
   swap the current tstate to main before fork runs. The
   swap would need to work across the subint→main
   boundary, which is the actual hard part —
   `_PyThreadState_Swap()` exists but is internal.
 2. **A `_PyInterpreterState_DeleteExceptFor(tstate->interp)`
   variant** that cleans up all *other* subints while
   preserving the calling subint's state. Lets the child
   continue executing in the subint after fork; a
   subsequent `execv()` clears everything at the OS
   level anyway.
 3. **A cleaner error** than `Fatal Python error` aborting
   the child. Even without fixing the underlying
   capability, a raised Python-level exception in the
   parent's `fork()` call (rather than a silent child
   abort) would at least make the failure mode
   debuggable.
 ## Upstream-report draft (for CPython issue tracker)
 ### Title
 > `os.fork()` from a non-main sub-interpreter aborts the
 > child with a fatal error in `PyOS_AfterFork_Child`; can
 > we at least make it a clean `RuntimeError` in the
 > parent?
 ### Body
 > **Version**: Python 3.14.x
 >
 > **Summary**: Calling `os.fork()` from a thread currently
 > executing inside a sub-interpreter causes the forked
 > child process to abort during CPython's post-fork
 > cleanup, with the following output in the child:
 >
 > ```
 > Fatal Python error: _PyInterpreterState_DeleteExceptMain: not main interpreter
 > ```
 >
 > From the **parent's** point of view the fork succeeded
 > (returned a valid child PID). The failure is completely
 > opaque to parent-side Python code — unless the parent
 > does `os.waitpid()` it won't even notice the child
 > died.
 >
 > **Root cause** (as I understand it from reading sources):
 > `Modules/posixmodule.c::PyOS_AfterFork_Child()` calls
 > `_PyInterpreterState_DeleteExceptMain()` with a
 > precondition that `_PyThreadState_GET()->interp` be the
 > main interpreter. When `fork()` is called from a thread
 > executing inside a subinterpreter, the child wakes up
 > with its tstate still pointing at the subint, and the
 > gate in `Python/pystate.c:1044-1047` fails.
 >
 > A comment in the source
 > (`Modules/posixmodule.c:753` — `// Ideally we could
 > guarantee tstate is running main.`) suggests this is a
 > known-fragile path rather than an intentional
 > invariant.
 >
 > **Use case**: I was experimenting with using a
 > sub-interpreter as a "fork launchpad" — have a subint
 > that has never imported `trio`, call `os.fork()` from
 > that subint's thread, and in the child `execv()` back
 > into a fresh Python interpreter process. The goal was
 > to sidestep known issues with `trio` + `fork()`
 > interaction (see
 > [python-trio/trio#1614](https://github.com/python-trio/trio/issues/1614))
 > by guaranteeing the forking context had never been
 > "contaminated" by trio's imports or globals. This
 > approach would allow `trio`-using applications to
 > combine `fork`-based subprocess spawning with
 > per-worker `trio.run()` runtimes — a fairly common
 > pattern that currently requires workarounds.
 >
 > **Request**:
 >
 > Ideally: make fork-from-subint work (e.g., by swapping
 > the caller's tstate to main in the pre-fork hook), or
 > provide a `_PyInterpreterState_DeleteExceptFor(interp)`
 > variant that permits the caller's subint to survive
 > post-fork so user code can subsequently `execv()`.
 >
 > Minimally: convert the fatal child-side abort into a
 > clean `RuntimeError` (or similar) raised in the
 > parent's `fork()` call. Even if the capability isn't
 > expanded, the failure mode should be debuggable by
 > user-code in the parent — right now it's a silent
 > child death with an error message buried in the
 > child's stderr that parent code can't programmatically
 > see.
 >
 > **Related**: PEP 684 (per-interpreter GIL), PEP 734
 > (`concurrent.interpreters` public API). The private
 > `_interpreters` module is what I used to create the
 > launchpad — behavior is the same whether using
 > `_interpreters.create('legacy')` or
 > `concurrent.interpreters.create()` (the latter was not
 > tested but the gate is identical).
 >
 > Happy to contribute a minimal reproducer + test case if
 > this is something the team wants to pursue.
 ## References
 - `Modules/posixmodule.c:728` —
  [`PyOS_AfterFork_Child`](https://github.com/python/cpython/blob/main/Modules/posixmodule.c#L728)
 - `Python/pystate.c:1040` —
  [`_PyInterpreterState_DeleteExceptMain`](https://github.com/python/cpython/blob/main/Python/pystate.c#L1040)
 - PEP 684 (per-interpreter GIL):
  <https://peps.python.org/pep-0684/>
 - PEP 734 (`concurrent.interpreters` public API):
  <https://peps.python.org/pep-0734/>
 - [python-trio/trio#1614](https://github.com/python-trio/trio/issues/1614)
  — the original motivation for the launchpad idea.
 - tractor issue #379 — "Our own thoughts, ideas for
  `fork()`-workaround/hacks..." section where this was
  first sketched.
 - `tractor.spawn._subint_fork` — in-tree stub preserving
  the attempted impl's shape in git history.
--- a/ai/conc-anal/subint_fork_from_main_thread_smoketest.py
+++ b/ai/conc-anal/subint_fork_from_main_thread_smoketest.py
@ -1,373 +0,0 @@
 #!/usr/bin/env python3
 '''
 Standalone CPython-level feasibility check for the "main-interp
 worker-thread forkserver + subint-hosted trio" architecture
 proposed as a workaround to the CPython-level refusal
 documented in
 `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`.
 Purpose
 -------
 Deliberately NOT a `tractor` test. Zero `tractor` imports.
 Uses `_interpreters` (private stdlib) + `os.fork()` directly so
 the signal is unambiguous — pass/fail here is a property of
 CPython alone, independent of our runtime.
 Run each scenario in isolation; the child's fate is observable
 only via `os.waitpid()` of the parent and the scenario's own
 status prints.
 Scenarios (pick one with `--scenario <name>`)
 ---------------------------------------------
 - `control_subint_thread_fork` — the KNOWN-BROKEN case we
  documented in `subint_fork_blocked_by_cpython_post_fork_issue.md`:
  drive a subint from a thread, call `os.fork()` inside its
  `_interpreters.exec()`, watch the child abort. **Included as
  a control** — if this scenario DOESN'T abort the child, our
  analysis is wrong and we should re-check everything.
 - `main_thread_fork` — baseline sanity. Call `os.fork()` from
  the process's main thread. Must always succeed; if this
  fails something much bigger is broken.
 - `worker_thread_fork` — the architectural assertion. Spawn a
  regular `threading.Thread` (attached to main interp, NOT a
  subint), have IT call `os.fork()`. Child should survive
  post-fork cleanup.
 - `full_architecture` — end-to-end: main-interp worker thread
  forks. In the child, fork-thread (still main-interp) creates
  a subint, drives a second worker thread inside it that runs
  a trivial `trio.run()`. Validates the "root runtime lives in
  a subint in the child" piece of the proposed arch.
 All scenarios print a self-contained pass/fail banner. Exit
 code 0 on expected outcome (which for `control_*` means "child
 aborted", not "child succeeded"!).
 Requires Python 3.14+.
 Usage
 -----
 ::
    python subint_fork_from_main_thread_smoketest.py \\
        --scenario main_thread_fork
    python subint_fork_from_main_thread_smoketest.py \\
        --scenario full_architecture
 '''
 from __future__ import annotations
 import argparse
 import os
 import sys
 import threading
 import time
 # Hard-require py3.14 for the public `concurrent.interpreters`
 # API (we still drop to `_interpreters` internally, same as
 # `tractor.spawn._subint`).
 try:
    from concurrent import interpreters as _public_interpreters  # noqa: F401
    import _interpreters  # type: ignore
 except ImportError:
    print(
        'FAIL (setup): requires Python 3.14+ '
        '(missing `concurrent.interpreters`)',
        file=sys.stderr,
    )
    sys.exit(2)
 # The actual primitives this script exercises live in
 # `tractor.spawn._subint_forkserver` — we re-import them here
 # rather than inlining so the module and the validation stay
 # in sync. (Early versions of this file had them inline for
 # the "zero tractor imports" isolation guarantee; now that
 # CPython-level feasibility is confirmed, the validated
 # primitives have moved into tractor proper.)
 from tractor.spawn._subint_forkserver import (
    fork_from_worker_thread,
    run_subint_in_worker_thread,
    wait_child,
 )
 # ----------------------------------------------------------------
 # small observability helpers (test-harness only)
 # ----------------------------------------------------------------
 def _banner(title: str) -> None:
    line = '=' * 60
    print(f'\n{line}\n{title}\n{line}', flush=True)
 def _report(
    label: str,
    *,
    ok: bool,
    status_str: str,
    expect_exit_ok: bool,
 ) -> None:
    verdict: str = 'PASS' if ok else 'FAIL'
    expected_str: str = (
        'normal exit (rc=0)'
        if expect_exit_ok
        else 'abnormal death (signal or nonzero exit)'
    )
    print(
        f'[{verdict}] {label}: '
        f'expected {expected_str}; observed {status_str}',
        flush=True,
    )
 # ----------------------------------------------------------------
 # scenario: `control_subint_thread_fork` (known-broken)
 # ----------------------------------------------------------------
 def scenario_control_subint_thread_fork() -> int:
    _banner(
        '[control] fork from INSIDE a subint (expected: child aborts)'
    )
    interp_id = _interpreters.create('legacy')
    print(f'  created subint {interp_id}', flush=True)
    # Shared flag: child writes a sentinel file we can detect from
    # the parent. If the child manages to write this, CPython's
    # post-fork refusal is NOT happening → analysis is wrong.
    sentinel = '/tmp/subint_fork_smoketest_control_child_ran'
    try:
        os.unlink(sentinel)
    except FileNotFoundError:
        pass
    bootstrap = (
        'import os\n'
        'pid = os.fork()\n'
        'if pid == 0:\n'
        # child — if CPython's refusal fires this code never runs
        f'    with open({sentinel!r}, "w") as f:\n'
        '        f.write("ran")\n'
        '    os._exit(0)\n'
        'else:\n'
        # parent side (inside the launchpad subint) — stash the
        # forked PID on a shareable dict so we can waitpid()
        # from the outer main interp. We can't just return it;
        # _interpreters.exec() returns nothing useful.
        '    import builtins\n'
        '    builtins._forked_child_pid = pid\n'
    )
    # NOTE, we can't easily pull state back from the subint.
    # For the CONTROL scenario we just time-bound the fork +
    # check the sentinel. If sentinel exists → child ran →
    # analysis wrong. If not → child aborted → analysis
    # confirmed.
    done = threading.Event()
    def _drive() -> None:
        try:
            _interpreters.exec(interp_id, bootstrap)
        except Exception as err:
            print(
                f'  subint bootstrap raised (expected on some '
                f'CPython versions): {type(err).__name__}: {err}',
                flush=True,
            )
        finally:
            done.set()
    t = threading.Thread(
        target=_drive,
        name='control-subint-fork-launchpad',
        daemon=True,
    )
    t.start()
    done.wait(timeout=5.0)
    t.join(timeout=2.0)
    # Give the (possibly-aborted) child a moment to die.
    time.sleep(0.5)
    sentinel_present = os.path.exists(sentinel)
    verdict = (
        # "PASS" for our analysis means sentinel NOT present.
        'PASS' if not sentinel_present else 'FAIL (UNEXPECTED)'
    )
    print(
        f'[{verdict}] control: sentinel present={sentinel_present} '
        f'(analysis predicts False — child should abort before '
        f'writing)',
        flush=True,
    )
    if sentinel_present:
        os.unlink(sentinel)
    try:
        _interpreters.destroy(interp_id)
    except _interpreters.InterpreterError:
        pass
    return 0 if not sentinel_present else 1
 # ----------------------------------------------------------------
 # scenario: `main_thread_fork` (baseline sanity)
 # ----------------------------------------------------------------
 def scenario_main_thread_fork() -> int:
    _banner(
        '[baseline] fork from MAIN thread (expected: child exits normally)'
    )
    pid = os.fork()
    if pid == 0:
        os._exit(0)
    return 0 if _wait_child(
        pid,
        label='main_thread_fork',
        expect_exit_ok=True,
    ) else 1
 # ----------------------------------------------------------------
 # scenario: `worker_thread_fork` (architectural assertion)
 # ----------------------------------------------------------------
 def _run_worker_thread_fork_scenario(
    label: str,
    *,
    child_target=None,
 ) -> int:
    '''
    Thin wrapper: delegate the actual fork to the
    `tractor.spawn._subint_forkserver` primitive, then wait
    on the child and render a pass/fail banner.
    '''
    try:
        pid: int = fork_from_worker_thread(
            child_target=child_target,
            thread_name=f'worker-fork-thread[{label}]',
        )
    except RuntimeError as err:
        print(f'[FAIL] {label}: {err}', flush=True)
        return 1
    print(f'  forked child pid={pid}', flush=True)
    ok, status_str = wait_child(pid, expect_exit_ok=True)
    _report(
        label,
        ok=ok,
        status_str=status_str,
        expect_exit_ok=True,
    )
    return 0 if ok else 1
 def scenario_worker_thread_fork() -> int:
    _banner(
        '[arch] fork from MAIN-INTERP WORKER thread '
        '(expected: child exits normally — this is the one '
        'that matters)'
    )
    return _run_worker_thread_fork_scenario(
        'worker_thread_fork',
    )
 # ----------------------------------------------------------------
 # scenario: `full_architecture`
 # ----------------------------------------------------------------
 _CHILD_TRIO_BOOTSTRAP: str = (
    'import trio\n'
    'async def _main():\n'
    '    await trio.sleep(0.05)\n'
    '    return 42\n'
    'result = trio.run(_main)\n'
    'assert result == 42, f"trio.run returned {result}"\n'
    'print("  CHILD subint: trio.run OK, result=42", '
    'flush=True)\n'
 )
 def _child_trio_in_subint() -> int:
    '''
    CHILD-side `child_target`: drive a trivial `trio.run()`
    inside a fresh legacy-config subint on a worker thread,
    using the `tractor.spawn._subint_forkserver.run_subint_in_worker_thread`
    primitive. Returns 0 on success.
    '''
    try:
        run_subint_in_worker_thread(
            _CHILD_TRIO_BOOTSTRAP,
            thread_name='child-subint-trio-thread',
        )
    except RuntimeError as err:
        print(
            f'  CHILD: run_subint_in_worker_thread timed out / thread '
            f'never returned: {err}',
            flush=True,
        )
        return 3
    except BaseException as err:
        print(
            f'  CHILD: subint bootstrap raised: '
            f'{type(err).__name__}: {err}',
            flush=True,
        )
        return 4
    return 0
 def scenario_full_architecture() -> int:
    _banner(
        '[arch-full] worker-thread fork + child runs trio in a '
        'subint (end-to-end proposed arch)'
    )
    return _run_worker_thread_fork_scenario(
        'full_architecture',
        child_target=_child_trio_in_subint,
    )
 # ----------------------------------------------------------------
 # main
 # ----------------------------------------------------------------
 SCENARIOS: dict[str, Callable[[], int]] = {
    'control_subint_thread_fork': scenario_control_subint_thread_fork,
    'main_thread_fork': scenario_main_thread_fork,
    'worker_thread_fork': scenario_worker_thread_fork,
    'full_architecture': scenario_full_architecture,
 }
 def main() -> int:
    ap = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    ap.add_argument(
        '--scenario',
        choices=sorted(SCENARIOS.keys()),
        required=True,
    )
    args = ap.parse_args()
    return SCENARIOS[args.scenario]()
 if __name__ == '__main__':
    sys.exit(main())
--- a/ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md
+++ b/ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md
@ -1,385 +0,0 @@
 # `subint_forkserver` backend: orphaned-subactor SIGINT wedged in `epoll_wait`
 Follow-up to the Phase C `subint_forkserver` spawn-backend
 PR (see `tractor.spawn._subint_forkserver`, issue #379).
 Surfaced by the xfail'd
 `tests/spawn/test_subint_forkserver.py::test_orphaned_subactor_sigint_cleanup_DRAFT`.
 Related-but-distinct from
 `subint_cancel_delivery_hang_issue.md` (orphaned-channel
 park AFTER subint teardown) and
 `subint_sigint_starvation_issue.md` (GIL-starvation,
 SIGINT never delivered): here the SIGINT IS delivered,
 trio's handler IS installed, but trio's event loop never
 wakes — so the KBI-at-checkpoint → `_trio_main` catch path
 (which is the runtime's *intentional* OS-cancel design)
 never fires.
 ## TL;DR
 When a `subint_forkserver`-spawned subactor is orphaned
 (parent `SIGKILL`'d, no IPC cancel path available) and then
 externally `SIGINT`'d, the subactor hangs in
 `trio/_core/_io_epoll.py::get_events` (epoll_wait)
 indefinitely — even though:
 1. `threading.current_thread() is threading.main_thread()`
   post-fork (CPython 3.14 re-designates correctly).
 2. Trio's SIGINT handler IS installed in the subactor
   (`signal.getsignal(SIGINT)` returns
   `<function KIManager.install.<locals>.handler at 0x...>`).
 3. The kernel does deliver SIGINT — the signal arrives at
   the only thread in the process (the fork-inherited
   worker which IS now "main" per Python).
 Yet `epoll_wait` does not return. Trio's wakeup-fd mechanism
 — the machinery that turns SIGINT into an epoll-wake — is
 somehow not firing the wakeup. Until that's fixed, the
 intentional "KBI-as-OS-cancel" path in
 `tractor/spawn/_entry.py::_trio_main:164` is unreachable
 for forkserver-spawned subactors whose parent dies.
 ## Symptom
 Test: `tests/spawn/test_subint_forkserver.py::test_orphaned_subactor_sigint_cleanup_DRAFT`
 (currently marked `@pytest.mark.xfail(strict=True)`).
 1. Harness subprocess brings up a tractor root actor +
   one `run_in_actor(_sleep_forever)` subactor via
   `try_set_start_method('subint_forkserver')`.
 2. Harness prints `CHILD_PID` (subactor) and
   `PARENT_READY` (root actor) markers to stdout.
 3. Test `os.kill(parent_pid, SIGKILL)` + `proc.wait()`
   to fully reap the root-actor harness.
 4. Child (now reparented to pid 1) is still alive.
 5. Test `os.kill(child_pid, SIGINT)` and polls
   `os.kill(child_pid, 0)` for up to 10s.
 6. **Observed**: the child is still alive at deadline —
   SIGINT did not unwedge the trio loop.
 ## What the "intentional" cancel path IS
 `tractor/spawn/_entry.py::_trio_main:157-186` —
 ```python
 try:
    if infect_asyncio:
        actor._infected_aio = True
        run_as_asyncio_guest(trio_main)
    else:
        trio.run(trio_main)
 except KeyboardInterrupt:
    logmeth = log.cancel
    exit_status: str = (
        'Actor received KBI (aka an OS-cancel)\n'
        ...
    )
 ```
 The "KBI == OS-cancel" mapping IS the runtime's
 deliberate, documented design. An OS-level SIGINT should
 flow as: kernel → trio handler → KBI at trio checkpoint
 → unwinds `async_main` → surfaces at `_trio_main`'s
 `except KeyboardInterrupt:` → `log.cancel` + clean `rc=0`.
 **So fixing this hang is not "add a new SIGINT behavior" —
 it's "make the existing designed behavior actually fire in
 this backend config".** That's why option (B) ("fix root
 cause") is aligned with existing design intent, not a
 scope expansion.
 ## Evidence
 ### Positive control: standalone fork-from-worker + `trio.run(sleep_forever)` + SIGINT WORKS
 ```python
 import os, signal, time, trio
 from tractor.spawn._subint_forkserver import (
    fork_from_worker_thread, wait_child,
 )
 def child_target() -> int:
    async def _main():
        try:
            await trio.sleep_forever()
        except KeyboardInterrupt:
            print('CHILD: caught KBI — trio SIGINT works!')
            return
    trio.run(_main)
    return 0
 pid = fork_from_worker_thread(child_target, thread_name='trio-sigint-test')
 time.sleep(1.0)
 os.kill(pid, signal.SIGINT)
 wait_child(pid)
 ```
 Result: `CHILD: caught KBI — trio SIGINT works!` + clean
 exit. So the fork-child + trio signal plumbing IS healthy
 in isolation. The hang appears only with the full tractor
 subactor runtime on top.
 ### Negative test: full tractor subactor + orphan-SIGINT
 Equivalent to the xfail test. Traceback dump via
 `faulthandler.register(SIGUSR1, all_threads=True)` at the
 stuck moment:
 ```
 Current thread 0x00007... [subint-forkserv] (most recent call first):
  File ".../trio/_core/_io_epoll.py", line 245 in get_events
  File ".../trio/_core/_run.py", line 2415 in run
  File "tractor/spawn/_entry.py", line 162 in _trio_main
  File "tractor/_child.py", line 72 in _actor_child_main
  File "tractor/spawn/_subint_forkserver.py", line 650 in _child_target
  File "tractor/spawn/_subint_forkserver.py", line 308 in _worker
  File ".../threading.py", line 1024 in run
 ```
 ### Thread + signal-mask inventory of the stuck subactor
 Single thread (`tid == pid`, comm `'subint-forkserv'`,
 which IS `threading.main_thread()` post-fork):
 ```
 SigBlk:  0000000000000000  # nothing blocked
 SigIgn:  0000000001001000  # SIGPIPE etc (Python defaults)
 SigCgt:  0000000108000202  # bit 1 = SIGINT caught
 ```
 Bit 1 set in `SigCgt` → SIGINT handler IS installed. So
 trio's handler IS in place at the kernel level — not a
 "handler missing" situation.
 ### Handler identity
 Inside the subactor's RPC body, `signal.getsignal(SIGINT)`
 returns `<function KIManager.install.<locals>.handler at
 0x...>` — trio's own `KIManager` handler. tractor's only
 SIGINT touches are `signal.getsignal()` *reads* (to stash
 into `debug.DebugStatus._trio_handler`); nothing writes
 over trio's handler outside the debug-REPL shielding path
 (`devx/debug/_tty_lock.py::shield_sigint`) which isn't
 engaged here (no debug_mode).
 ## Ruled out
 - **GIL starvation / signal-pipe-full** (class A,
  `subint_sigint_starvation_issue.md`): subactor runs on
  its own GIL (separate OS process), not sharing with the
  parent → no cross-process GIL contention. And `strace`-
  equivalent in the signal mask shows SIGINT IS caught,
  not queued.
 - **Orphaned channel park** (`subint_cancel_delivery_hang_issue.md`):
  different failure mode — that one has trio iterating
  normally and getting wedged on an orphaned
  `chan.recv()` AFTER teardown. Here trio's event loop
  itself never wakes.
 - **Tractor explicitly catching + swallowing KBI**:
  greppable — the one `except KeyboardInterrupt:` in the
  runtime is the INTENTIONAL cancel-path catch at
  `_trio_main:164`. `async_main` uses `except Exception`
  (not BaseException), so KBI should propagate through
  cleanly if it ever fires.
 - **Missing `signal.set_wakeup_fd` (main-thread
  restriction)**: post-fork, the fork-worker thread IS
  `threading.main_thread()`, so trio's main-thread check
  passes and its wakeup-fd install should succeed.
 ## Root cause hypothesis (unverified)
 The SIGINT handler fires but trio's wakeup-fd write does
 not wake `epoll_wait`. Candidate causes, ranked by
 plausibility:
 1. **Wakeup-fd lifecycle race around tractor IPC setup.**
   `async_main` spins up an IPC server + `process_messages`
   loops early. Somewhere in that path the wakeup-fd that
   trio registered with its epoll instance may be
   closed/replaced/clobbered, so subsequent SIGINT writes
   land on an fd that's no longer in the epoll set.
   Evidence needed: compare
   `signal.set_wakeup_fd(-1)` return value inside a
   post-tractor-bringup RPC body vs. a pre-bringup
   equivalent. If they differ, that's it.
 2. **Shielded cancel scope around `process_messages`.**
   The RPC message loop is likely wrapped in a trio cancel
   scope; if that scope is `shield=True` at any outer
   layer, KBI scheduled at a checkpoint could be absorbed
   by the shield and never bubble out to `_trio_main`.
 3. **Pre-fork wakeup-fd inheritance.** trio in the PARENT
   process registered a wakeup-fd with its own epoll. The
   child inherits the fd number but not the parent's
   epoll instance — if tractor/trio re-uses the parent's
   stale fd number anywhere, writes would go to a no-op
   fd. (This is the least likely — `trio.run()` on the
   child calls `KIManager.install` which should install a
   fresh wakeup-fd from scratch.)
 ## Cross-backend scope question
 **Untested**: does the same orphan-SIGINT hang reproduce
 against the `trio_proc` backend (stock subprocess + exec)?
 If yes → pre-existing tractor bug, independent of
 `subint_forkserver`. If no → something specific to the
 fork-from-worker path (e.g. inherited fds, mid-epoll-setup
 interference).
 **Quick repro for trio_proc**:
 ```python
 # save as /tmp/trio_proc_orphan_sigint_repro.py
 import os, sys, signal, time, glob
 import subprocess as sp
 SCRIPT = '''
 import os, sys, trio, tractor
 async def _sleep_forever():
    print(f"CHILD_PID={os.getpid()}", flush=True)
    await trio.sleep_forever()
 async def _main():
    async with (
        tractor.open_root_actor(registry_addrs=[("127.0.0.1", 12350)]),
        tractor.open_nursery() as an,
    ):
        await an.run_in_actor(_sleep_forever, name="sf-child")
        print(f"PARENT_READY={os.getpid()}", flush=True)
        await trio.sleep_forever()
 trio.run(_main)
 '''
 proc = sp.Popen(
    [sys.executable, '-c', SCRIPT],
    stdout=sp.PIPE, stderr=sp.STDOUT,
 )
 # parse CHILD_PID + PARENT_READY off proc.stdout ...
 # SIGKILL parent, SIGINT child, poll.
 ```
 If that hangs too, open a broader issue; if not, this is
 `subint_forkserver`-specific (likely fd-inheritance-related).
 ## Why this is ours to fix (not CPython's)
 - Signal IS delivered (`SigCgt` bitmask confirms).
 - Handler IS installed (trio's `KIManager`).
 - Thread identity is correct post-fork.
 - `_trio_main` already has the intentional KBI→clean-exit
  path waiting to fire.
 Every CPython-level precondition is met. Something in
 tractor's runtime or trio's integration with it is
 breaking the SIGINT→wakeup→event-loop-wake pipeline.
 ## Possible fix directions
 1. **Audit the wakeup-fd across tractor's IPC bringup.**
   Add a trio startup hook that captures
   `signal.set_wakeup_fd(-1)` at `_trio_main` entry,
   after `async_main` enters, and periodically — assert
   it's unchanged. If it moves, track down the writer.
 2. **Explicit `signal.set_wakeup_fd` reset after IPC
   setup.** Brute force: re-install a fresh wakeup-fd
   mid-bringup. Band-aid, but fast to try.
 3. **Ensure no `shield=True` cancel scope envelopes the
   RPC-message-loop / IPC-server task.** If one does,
   KBI-at-checkpoint never escapes.
 4. **Once fixed, the `child_sigint='trio'` mode on
   `subint_forkserver_proc`** becomes effectively a no-op
   or a doc-only mode — trio's natural handler already
   does the right thing. Might end up removing the flag
   entirely if there's no behavioral difference between
   modes.
 ## Current workaround
 None; `child_sigint` defaults to `'ipc'` (IPC cancel is
 the only reliable cancel path today), and the xfail test
 documents the gap. Operators hitting orphan-SIGINT get a
 hung process that needs `SIGKILL`.
 ## Reproducer
 Inline, standalone (no pytest):
 ```python
 # save as /tmp/orphan_sigint_repro.py  (py3.14+)
 import os, sys, signal, time, glob, trio
 import tractor
 from tractor.spawn._subint_forkserver import (
    fork_from_worker_thread,
 )
 async def _sleep_forever():
    print(f'SUBACTOR[{os.getpid()}]', flush=True)
    await trio.sleep_forever()
 async def _main():
    async with (
        tractor.open_root_actor(
            registry_addrs=[('127.0.0.1', 12349)],
        ),
        tractor.open_nursery() as an,
    ):
        await an.run_in_actor(_sleep_forever, name='sf-child')
        await trio.sleep_forever()
 def child_target() -> int:
    from tractor.spawn._spawn import try_set_start_method
    try_set_start_method('subint_forkserver')
    trio.run(_main)
    return 0
 pid = fork_from_worker_thread(child_target, thread_name='repro')
 time.sleep(3.0)
 # find the subactor pid via /proc
 children = []
 for path in glob.glob(f'/proc/{pid}/task/*/children'):
    with open(path) as f:
        children.extend(int(x) for x in f.read().split() if x)
 subactor_pid = children[0]
 # SIGKILL root → orphan the subactor
 os.kill(pid, signal.SIGKILL)
 os.waitpid(pid, 0)
 time.sleep(0.3)
 # SIGINT the orphan — should cause clean trio exit
 os.kill(subactor_pid, signal.SIGINT)
 # poll for exit
 for _ in range(100):
    try:
        os.kill(subactor_pid, 0)
        time.sleep(0.1)
    except ProcessLookupError:
        print('HARNESS: subactor exited cleanly ✔')
        sys.exit(0)
 os.kill(subactor_pid, signal.SIGKILL)
 print('HARNESS: subactor hung — reproduced')
 sys.exit(1)
 ```
 Expected (current): `HARNESS: subactor hung — reproduced`.
 After fix: `HARNESS: subactor exited cleanly ✔`.
 ## References
 - `tractor/spawn/_entry.py::_trio_main:157-186` — the
  intentional KBI→clean-exit path this bug makes
  unreachable.
 - `tractor/spawn/_subint_forkserver` — the backend whose
  orphan cancel-robustness this blocks.
 - `tests/spawn/test_subint_forkserver.py::test_orphaned_subactor_sigint_cleanup_DRAFT`
  — the xfail'd reproducer in the test suite.
 - `ai/conc-anal/subint_cancel_delivery_hang_issue.md` —
  sibling "orphaned channel park" hang (different class).
 - `ai/conc-anal/subint_sigint_starvation_issue.md` —
  sibling "GIL starvation SIGINT drop" hang (different
  class).
 - tractor issue #379 — subint backend tracking.
--- a/ai/conc-anal/subint_forkserver_test_cancellation_leak_issue.md
+++ b/ai/conc-anal/subint_forkserver_test_cancellation_leak_issue.md
@ -1,849 +0,0 @@
 # `subint_forkserver` backend: `test_cancellation.py` multi-level cancel cascade hang
 Follow-up tracker: surfaced while wiring the new
 `subint_forkserver` spawn backend into the full tractor
 test matrix (step 2 of the post-backend-lands plan).
 See also
 `ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md`
 — sibling tracker for a different forkserver-teardown
 class which probably shares the same fundamental root
 cause (fork-FD-inheritance across nested spawns).
 ## TL;DR
 `tests/test_cancellation.py::test_nested_multierrors[subint_forkserver]`
 hangs indefinitely under our new backend. The hang is
 **inside the graceful IPC cancel cascade** — every actor
 in the multi-level tree parks in `epoll_wait` waiting
 for IPC messages that never arrive. Not a hard-kill /
 tree-reap issue (we don't reach the hard-kill fallback
 path at all).
 Working hypothesis (unverified): **`os.fork()` from a
 subactor inherits the root parent's IPC listener socket
 FDs**. When a first-level subactor forkserver-spawns a
 grandchild, that grandchild inherits both its direct
 spawner's FDs AND the root's FDs — IPC message routing
 becomes ambiguous (or silently sends to the wrong
 channel), so the cancel cascade can't reach its target.
 ## Corrected diagnosis vs. earlier draft
 An earlier version of this doc claimed the root cause
 was **"forkserver teardown doesn't tree-kill
 descendants"** (SIGKILL only reaches the direct child,
 grandchildren survive and hold TCP `:1616`). That
 diagnosis was **wrong**, caused by conflating two
 observations:
 1. *5-zombie leak holding :1616* — happened in my own
   workflow when I aborted a bg pytest task with
   `pkill` (SIGTERM/SIGKILL, not SIGINT). The abrupt
   kill skipped the graceful `ActorNursery.__aexit__`
   cancel cascade entirely, orphaning descendants to
   init. **This was my cleanup bug, not a forkserver
   teardown bug.** Codified the fix (SIGINT-first +
   bounded wait before SIGKILL) in
   `feedback_sc_graceful_cancel_first.md` +
   `.claude/skills/run-tests/SKILL.md`.
 2. *`test_nested_multierrors` hangs indefinitely* —
   the real, separate, forkserver-specific bug
   captured by this doc.
 The two symptoms are unrelated. The tree-kill / setpgrp
 fix direction proposed earlier would not help (1) (SC-
 graceful-cleanup is the right answer there) and would
 not help (2) (the hang is in the cancel cascade, not
 in the hard-kill fallback).
 ## Symptom
 Reproducer (py3.14, clean env):
 ```sh
 # preflight: ensure clean env
 ss -tlnp 2>/dev/null | grep ':1616' && echo 'FOUL — cleanup first!' || echo 'clean'
 ./py314/bin/python -m pytest --spawn-backend=subint_forkserver \
  'tests/test_cancellation.py::test_nested_multierrors[subint_forkserver]' \
  --timeout=30 --timeout-method=thread --tb=short -v
 ```
 Expected: `pytest-timeout` fires at 30s with a thread-
 dump banner, but the process itself **remains alive
 after timeout** and doesn't unwedge on subsequent
 SIGINT. Requires SIGKILL to reap.
 ## Evidence (tree structure at hang point)
 All 5 processes are kernel-level `S` (sleeping) in
 `do_epoll_wait` (trio's event loop waiting on I/O):
 ```
 PID     PPID    THREADS  NAME             ROLE
 333986  1       2        subint-forkserv  pytest main (the test body)
 333993  333986  3        subint-forkserv  "child 1" spawner subactor
  334003 333993 1        subint-forkserv  grandchild errorer under child-1
  334014 333993 1        subint-forkserv  grandchild errorer under child-1
 333999  333986  1        subint-forkserv  "child 2" spawner subactor (NO grandchildren!)
 ```
 ### Asymmetric tree depth
 The test's `spawn_and_error(breadth=2, depth=3)` should
 have BOTH direct children spawning 2 grandchildren
 each, going 3 levels deep. Reality:
 - Child 1 (333993, 3 threads) DID spawn its two
  grandchildren as expected — fully booted trio
  runtime.
 - Child 2 (333999, 1 thread) did NOT spawn any
  grandchildren — clearly never completed its
  nursery's first `run_in_actor`. Its 1-thread state
  suggests the runtime never fully booted (no trio
  worker threads for `waitpid`/IPC).
 This asymmetry is the key clue: the two direct
 children started identically but diverged. Probably a
 race around fork-inherited state (listener FDs,
 subactor-nursery channel state) that happens to land
 differently depending on spawn ordering.
 ### Parent-side state
 Thread-dump of pytest main (333986) at the hang:
 - Main trio thread — parked in
  `trio._core._io_epoll.get_events` (epoll_wait on
  its event loop). Waiting for IPC from children.
 - Two trio-cache worker threads — each parked in
  `outcome.capture(sync_fn)` calling
  `os.waitpid(child_pid, 0)`. These are our
  `_ForkedProc.wait()` off-loads. They're waiting for
  the direct children to exit — but children are
  stuck in their own epoll_wait waiting for IPC from
  the parent.
 **It's a deadlock, not a leak:** the parent is
 correctly running `soft_kill(proc, _ForkedProc.wait,
 portal)` (graceful IPC cancel via
 `Portal.cancel_actor()`), but the children never
 acknowledge the cancel message (or the message never
 reaches them through the tangled post-fork IPC).
 ## What's NOT the cause (ruled out)
 - **`_ForkedProc.kill()` only SIGKILLs direct pid /
  missing tree-kill**: doesn't apply — we never reach
  the hard-kill path. The deadlock is in the graceful
  cancel cascade.
 - **Port `:1616` contention**: ruled out after the
  `reg_addr` fixture-wiring fix; each test session
  gets a unique port now.
 - **GIL starvation / SIGINT pipe filling** (class-A,
  `subint_sigint_starvation_issue.md`): doesn't apply
  — each subactor is its own OS process with its own
  GIL (not legacy-config subint).
 - **Child-side `_trio_main` absorbing KBI**: grep
  confirmed; `_trio_main` only catches KBI at the
  `trio.run()` callsite, which is reached only if the
  trio loop exits normally. The children here never
  exit trio.run() — they're wedged inside.
 ## Hypothesis: FD inheritance across nested forks
 `subint_forkserver_proc` calls
 `fork_from_worker_thread()` which ultimately does
 `os.fork()` from a dedicated worker thread. Standard
 Linux/POSIX fork semantics: **the child inherits ALL
 open FDs from the parent**, including listener
 sockets, epoll fds, trio wakeup pipes, and the
 parent's IPC channel sockets.
 At root-actor fork-spawn time, the root's IPC server
 listener FDs are open in the parent. Those get
 inherited by child 1. Child 1 then forkserver-spawns
 its OWN subactor (grandchild). The grandchild
 inherits FDs from child 1 — but child 1's address
 space still contains **the root's IPC listener FDs
 too** (inherited at first fork). So the grandchild
 has THREE sets of FDs:
 1. Its own (created after becoming a subactor).
 2. Its direct parent child-1's.
 3. The ROOT's (grandparent's) — inherited transitively.
 IPC message routing may be ambiguous in this tangled
 state. Or a listener socket that the root thinks it
 owns is actually open in multiple processes, and
 messages sent to it go to an arbitrary one. That
 would exactly match the observed "graceful cancel
 never propagates".
 This hypothesis predicts the bug **scales with fork
 depth**: single-level forkserver spawn
 (`test_subint_forkserver_spawn_basic`) works
 perfectly, but any test that spawns a second level
 deadlocks. Matches observations so far.
 ## Fix directions (to validate)
 ### 1. `close_fds=True` equivalent in `fork_from_worker_thread()`
 `subprocess.Popen` / `trio.lowlevel.open_process` have
 `close_fds=True` by default on POSIX — they
 enumerate open FDs in the child post-fork and close
 everything except stdio + any explicitly-passed FDs.
 Our raw `os.fork()` doesn't. Adding the equivalent to
 our `_worker` prelude would isolate each fork
 generation's FD set.
 Implementation sketch in
 `tractor.spawn._subint_forkserver.fork_from_worker_thread._worker`:
 ```python
 def _worker() -> None:
    pid: int = os.fork()
    if pid == 0:
        # CHILD: close inherited FDs except stdio + the
        # pid-pipe we just opened.
        keep: set[int] = {0, 1, 2, rfd, wfd}
        import resource
        soft, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
        os.closerange(3, soft)  # blunt; or enumerate /proc/self/fd
        # ... then child_target() as before
 ```
 Problem: overly aggressive — closes FDs the
 grandchild might legitimately need (e.g. its parent's
 IPC channel for the spawn-spec handshake, if we rely
 on that). Needs thought about which FDs are
 "inheritable and safe" vs. "inherited by accident".
 ### 2. Cloexec on tractor's own FDs
 Set `FD_CLOEXEC` on tractor-created sockets (listener
 sockets, IPC channel sockets, pipes). This flag
 causes automatic close on `execve`, but since we
 `fork()` without `exec()`, this alone doesn't help.
 BUT — combined with a child-side explicit close-
 non-cloexec loop, it gives us a way to mark "my
 private FDs" vs. "safe to inherit". Most robust, but
 requires tractor-wide audit.
 ### 3. Explicit FD cleanup in `_ForkedProc`/`_child_target`
 Have `subint_forkserver_proc`'s `_child_target`
 closure explicitly close the parent-side IPC listener
 FDs before calling `_actor_child_main`. Requires
 being able to enumerate "the parent's listener FDs
 that the child shouldn't keep" — plausible via
 `Actor.ipc_server`'s socket objects.
 ### 4. Use `os.posix_spawn` with explicit `file_actions`
 Instead of raw `os.fork()`, use `os.posix_spawn()`
 which supports explicit file-action specifications
 (close this FD, dup2 that FD). Cleaner semantics, but
 probably incompatible with our "no exec" requirement
 (subint_forkserver is a fork-without-exec design).
 **Likely correct answer: (3) — targeted FD cleanup
 via `actor.ipc_server` handle.** (1) is too blunt,
 (2) is too wide-ranging, (4) changes the spawn
 mechanism.
 ## Reproducer (standalone, no pytest)
 ```python
 # save as /tmp/forkserver_nested_hang_repro.py  (py3.14+)
 import trio, tractor
 async def assert_err():
    assert 0
 async def spawn_and_error(breadth: int = 2, depth: int = 1):
    async with tractor.open_nursery() as n:
        for i in range(breadth):
            if depth > 0:
                await n.run_in_actor(
                    spawn_and_error,
                    breadth=breadth,
                    depth=depth - 1,
                    name=f'spawner_{i}_{depth}',
                )
            else:
                await n.run_in_actor(
                    assert_err,
                    name=f'errorer_{i}',
                )
 async def _main():
    async with tractor.open_nursery() as n:
        for i in range(2):
            await n.run_in_actor(
                spawn_and_error,
                name=f'top_{i}',
                breadth=2,
                depth=1,
            )
 if __name__ == '__main__':
    from tractor.spawn._spawn import try_set_start_method
    try_set_start_method('subint_forkserver')
    with trio.fail_after(20):
        trio.run(_main)
 ```
 Expected (current): hangs on `trio.fail_after(20)`
 — children never ack the error-propagation cancel
 cascade. Pattern: top 2 direct children, 4
 grandchildren, 1 errorer deadlocks while trying to
 unwind through its parent chain.
 After fix: `trio.TooSlowError`-free completion; the
 root's `open_nursery` receives the
 `BaseExceptionGroup` containing the `AssertionError`
 from the errorer and unwinds cleanly.
 ## Update — 2026-04-23: partial fix landed, deeper layer surfaced
 Three improvements landed as separate commits in the
 `subint_forkserver_backend` branch (see `git log`):
 1. **`_close_inherited_fds()` in fork-child prelude**
   (`tractor/spawn/_subint_forkserver.py`). POSIX
   close-fds-equivalent enumeration via
   `/proc/self/fd` (or `RLIMIT_NOFILE` fallback), keep
   only stdio. This is fix-direction (1) from the list
   above — went with the blunt form rather than the
   targeted enum-via-`actor.ipc_server` form, turns
   out the aggressive close is safe because every
   inheritable resource the fresh child needs
   (IPC-channel socket, etc.) is opened AFTER the
   fork anyway.
 2. **`_ForkedProc.wait()` via `os.pidfd_open()` +
   `trio.lowlevel.wait_readable()`** — matches the
   `trio.Process.wait` / `mp.Process.sentinel` pattern
   used by `trio_proc` and `proc_waiter`. Gives us
   fully trio-cancellable child-wait (prior impl
   blocked a cache thread on a sync `os.waitpid` that
   was NOT trio-cancellable due to
   `abandon_on_cancel=False`).
 3. **`_parent_chan_cs` wiring** in
   `tractor/runtime/_runtime.py`: capture the shielded
   `loop_cs` for the parent-channel `process_messages`
   task in `async_main`; explicitly cancel it in
   `Actor.cancel()` teardown. This breaks the shield
   during teardown so the parent-chan loop exits when
   cancel is issued, instead of parking on a parent-
   socket EOF that might never arrive under fork
   semantics.
 **Concrete wins from (1):** the sibling
 `subint_forkserver_orphan_sigint_hang_issue.md` class
 is **now fixed** — `test_orphaned_subactor_sigint_cleanup_DRAFT`
 went from strict-xfail to pass. The xfail mark was
 removed; the test remains as a regression guard.
 **test_nested_multierrors STILL hangs** though.
 ### Updated diagnosis (narrowed)
 DIAGDEBUG instrumentation of `process_messages` ENTER/
 EXIT pairs + `_parent_chan_cs.cancel()` call sites
 showed (captured during a 20s-timeout repro):
 - 80 `process_messages` ENTERs, 75 EXITs → 5 stuck.
 - **All 40 `shield=True` ENTERs matched EXIT** — every
  shielded parent-chan loop exits cleanly. The
  `_parent_chan_cs` wiring works as intended.
 - **The 5 stuck loops are all `shield=False`** — peer-
  channel handlers (inbound connections handled by
  `handle_stream_from_peer` in stream_handler_tn).
 - After our `_parent_chan_cs.cancel()` fires, NEW
  shielded process_messages loops start (on the
  session reg_addr port — probably discovery-layer
  reconnection attempts). These don't block teardown
  (they all exit) but indicate the cancel cascade has
  more moving parts than expected.
 ### Remaining unknown
 Why don't the 5 peer-channel loops exit when
 `service_tn.cancel_scope.cancel()` fires? They're in
 `stream_handler_tn` which IS `service_tn` in the
 current configuration (`open_ipc_server(parent_tn=
 service_tn, stream_handler_tn=service_tn)`). A
 standard nursery-scope-cancel should propagate through
 them — no shield, no special handler. Something
 specific to the fork-spawned configuration keeps them
 alive.
 Candidate follow-up experiments:
 - Dump the trio task tree at the hang point (via
  `stackscope` or direct trio introspection) to see
  what each stuck loop is awaiting. `chan.__anext__`
  on a socket recv? An inner lock? A shielded sub-task?
 - Compare peer-channel handler lifecycle under
  `trio_proc` vs `subint_forkserver` with equivalent
  logging to spot the divergence.
 - Investigate whether the peer handler is caught in
  the `except trio.Cancelled:` path at
  `tractor/ipc/_server.py:448` that re-raises — but
  re-raise means it should still exit. Unless
  something higher up swallows it.
 ### Attempted fix (DID NOT work) — hypothesis (3)
 Tried: in `_serve_ipc_eps` finally, after closing
 listeners, also iterate `server._peers` and
 sync-close each peer channel's underlying stream
 socket fd:
 ```python
 for _uid, _chans in list(server._peers.items()):
    for _chan in _chans:
        try:
            _stream = _chan._transport.stream if _chan._transport else None
            if _stream is not None:
                _stream.socket.close()  # sync fd close
        except (AttributeError, OSError):
            pass
 ```
 Theory: closing the socket fd from outside the stuck
 recv task would make the recv see EBADF /
 ClosedResourceError and unblock.
 Result: `test_nested_multierrors[subint_forkserver]`
 still hangs identically. Either:
 - The sync `socket.close()` doesn't propagate into
  trio's in-flight `recv_some()` the way I expected
  (trio may hold an internal reference that keeps the
  fd open even after an external close), or
 - The stuck recv isn't even the root blocker and the
  peer handlers never reach the finally for some
  reason I haven't understood yet.
 Either way, the sync-close hypothesis is **ruled
 out**. Reverted the experiment, restored the skip-
 mark on the test.
 ### Aside: `-s` flag does NOT change `test_nested_multierrors` behavior
 Tested explicitly: both with and without `-s`, the
 test hangs identically. So the capture-pipe-fill
 hypothesis is **ruled out** for this test.
 The earlier `test_context_stream_semantics.py` `-s`
 observation was most likely caused by a competing
 pytest run in my session (confirmed via process list
 — my leftover pytest was alive at that time and
 could have been holding state on the default
 registry port).
 ## Update — 2026-04-23 (late): cancel delivery ruled in, nursery-wait ruled BLOCKER
 **New diagnostic run** instrumented
 `handle_stream_from_peer` at ENTER / `except
 trio.Cancelled:` / finally, plus `Actor.cancel()`
 just before `self._parent_chan_cs.cancel()`. Result:
 - **40 `handle_stream_from_peer` ENTERs**.
 - **0 `except trio.Cancelled:` hits** — cancel
  never fires on any peer-handler.
 - **35 finally hits** — those handlers exit via
  peer-initiated EOF (normal return), NOT cancel.
 - **5 handlers never reach finally** — stuck forever.
 - **`Actor.cancel()` fired in 12 PIDs** — but the
  PIDs with peer handlers that DIDN'T fire
  Actor.cancel are exactly **root + 2 direct
  spawners**. These 3 actors have peer handlers
  (for their own subactors) that stay stuck because
  **`Actor.cancel()` at these levels never runs**.
 ### The actual deadlock shape
 `Actor.cancel()` lives in
 `open_root_actor.__aexit__` / `async_main` teardown.
 That only runs when the enclosing `async with
 tractor.open_nursery()` exits. The nursery's
 `__aexit__` calls the backend `*_proc` spawn target's
 teardown, which does `soft_kill() →
 _ForkedProc.wait()` on its child PID. That wait is
 trio-cancellable via pidfd now (good) — but nothing
 CANCELS it because the outer scope only cancels when
 `Actor.cancel()` runs, which only runs when the
 nursery completes, which waits on the child.
 It's a **multi-level mutual wait**:
 ```
 root              blocks on spawner.wait()
  spawner         blocks on grandchild.wait()
    grandchild    blocks on errorer.wait()
      errorer     Actor.cancel() ran, but process
                  may not have fully exited yet
                  (something in root_tn holding on?)
 ```
 Each level waits for the level below. The bottom
 level (errorer) reaches Actor.cancel(), but its
 process may not fully exit — meaning its pidfd
 doesn't go readable, meaning the grandchild's
 waitpid doesn't return, meaning the grandchild's
 nursery doesn't unwind, etc. all the way up.
 ### Refined question
 **Why does an errorer process not exit after its
 `Actor.cancel()` completes?**
 Possibilities:
 1. `_parent_chan_cs.cancel()` fires (shielded
   parent-chan loop unshielded), but the task is
   stuck INSIDE the shielded loop's recv in a way
   that cancel still can't break.
 2. After `Actor.cancel()` returns, `async_main`
   still has other tasks in `root_tn` waiting for
   something that never arrives (e.g. outbound
   IPC reply delivery).
 3. The `os._exit(rc)` in `_worker` (at
   `_subint_forkserver.py`) doesn't run because
   `_child_target` never returns.
 Next-session candidate probes (in priority order):
 1. **Instrument `_worker`'s fork-child branch** to
   confirm whether `child_target()` returns (and
   thus `os._exit(rc)` is reached) for errorer
   PIDs. If yes → process should die; if no →
   trace back into `_actor_child_main` /
   `_trio_main` / `async_main` to find the stuck
   spot.
 2. **Instrument `async_main`'s final unwind** to
   see which await in the teardown doesn't
   complete.
 3. **Compare under `trio_proc` backend** at the
   same `_worker`-equivalent level to see where
   the flows diverge.
 ### Rule-out: NOT a stuck peer-chan recv
 Earlier hypothesis was that the 5 stuck peer-chan
 loops were blocked on a socket recv that cancel
 couldn't interrupt. This pass revealed the real
 cause: cancel **never reaches those tasks** because
 their owning actor's `Actor.cancel()` never runs.
 The recvs are fine — they're just parked because
 nothing is telling them to stop.
 ## Update — 2026-04-23 (very late): leaves exit, middle actors stuck in `trio.run`
 Yet another instrumentation pass — this time
 printing at:
 - `_worker` child branch: `pre child_target()` /
  `child_target RETURNED rc=N` / `about to
  os._exit(rc)` 
 - `_trio_main`: `about to trio.run` /
  `trio.run RETURNED NORMALLY` / `FINALLY`
 **Fresh-run results** (`test_nested_multierrors[
 subint_forkserver]`, depth=1/breadth=2, 1 root + 14
 forked = 15 actors total):
 - **9 processes completed the full flow** —
  `trio.run RETURNED NORMALLY` → `child_target
  RETURNED rc=0` → `about to os._exit(0)`. These
  are the LEAVES of the tree (errorer actors) plus
  their direct parents (depth-0 spawners). They
  actually exit their processes.
 - **5 processes are stuck INSIDE `trio.run(trio_main)`**
  — they hit "about to trio.run" but NEVER see
  "trio.run RETURNED NORMALLY". These are root +
  top-level spawners + one intermediate.
 **What this means:** `async_main` itself is the
 deadlock holder, not the peer-channel loops.
 Specifically, the outer `async with root_tn:` in
 `async_main` never exits for the 5 stuck actors.
 Their `trio.run` never returns → `_trio_main`
 catch/finally never runs → `_worker` never reaches
 `os._exit(rc)` → the PROCESS never dies → its
 parent's `_ForkedProc.wait()` blocks → parent's
 nursery hangs → parent's `async_main` hangs → ...
 ### The new precise question
 **What task in the 5 stuck actors' `async_main`
 never completes?** Candidates:
 1. The shielded parent-chan `process_messages`
   task in `root_tn` — but we explicitly cancel it
   via `_parent_chan_cs.cancel()` in `Actor.cancel()`.
   However, `Actor.cancel()` only runs during
   `open_root_actor.__aexit__`, which itself runs
   only after `async_main`'s outer unwind — which
   doesn't happen. So the shield isn't broken.
 2. `await actor_nursery._join_procs.wait()` or
   similar in the inline backend `*_proc` flow.
 3. `_ForkedProc.wait()` on a grandchild that
   actually DID exit — but the pidfd_open watch
   didn't fire for some reason (race between
   pidfd_open and the child exiting?).
 The most specific next probe: **add DIAG around
 `_ForkedProc.wait()` enter/exit** to see whether
 the pidfd-based wait returns for every grandchild
 exit. If a stuck parent's `_ForkedProc.wait()`
 NEVER returns despite its child exiting, the
 pidfd mechanism has a race bug under nested
 forkserver.
 Alternative probe: instrument `async_main`'s outer
 nursery exits to find which nursery's `__aexit__`
 is stuck, drilling down from `trio.run` to the
 specific `async with` that never completes.
 ### Cascade summary (updated tree view)
 ```
 ROOT (pytest)                       STUCK in trio.run
 ├── top_0 (spawner, d=1)            STUCK in trio.run
 │   ├── spawner_0_d1_0 (d=0)        exited (os._exit 0)
 │   │   ├── errorer_0_0             exited (os._exit 0)
 │   │   └── errorer_0_1             exited (os._exit 0)
 │   └── spawner_0_d1_1 (d=0)        exited (os._exit 0)
 │       ├── errorer_0_2             exited (os._exit 0)
 │       └── errorer_0_3             exited (os._exit 0)
 └── top_1 (spawner, d=1)            STUCK in trio.run
    ├── spawner_1_d1_0 (d=0)        STUCK in trio.run (sibling race?)
    │   ├── errorer_1_0             exited
    │   └── errorer_1_1             exited
    └── spawner_1_d1_1 (d=0)        STUCK in trio.run
        ├── errorer_1_2             exited
        └── errorer_1_3             exited
 ```
 Grandchildren (d=0 spawners) exit OR stick —
 asymmetric. Not purely depth-determined. Some race
 condition in nursery teardown when multiple
 siblings error simultaneously.
 ## Update — 2026-04-23 (late, probe iteration 3): hang pinpointed to `wait_for_no_more_peers()`
 Further DIAGDEBUG at every milestone in `async_main`
 (runtime UP / EXITED service_tn / EXITED root_tn /
 FINALLY ENTER / RETURNING) plus `_ForkedProc.wait`
 ENTER/RETURNED per-pidfd. Result:
 **Every stuck actor reaches `async_main: FINALLY
 ENTER` but NOT `async_main: RETURNING`.**
 That isolates the hang to a specific await in
 `async_main`'s finally block at
 `tractor/runtime/_runtime.py:1837+`. The suspect:
 ```python
 # Ensure all peers (actors connected to us as clients) are finished
 if ipc_server := actor.ipc_server and ipc_server.has_peers(check_chans=True):
    ...
    await ipc_server.wait_for_no_more_peers()  # ← UNBOUNDED, blocks forever
 ```
 `_no_more_peers` is an `Event` set only when
 `server._peers` empties (see
 `ipc/_server.py:526-530`). If ANY peer-handler is
 stuck (the 5 unclosed loops from the earlier pass),
 it keeps its channel in `server._peers`, so the
 event never fires, so the wait hangs.
 ### Applied fix (partial, landed as defensive-in-depth)
 `tractor/runtime/_runtime.py:1981` —
 `wait_for_no_more_peers()` call now wrapped in
 `trio.move_on_after(3.0)` + a warning log when the
 timeout fires. Commented with the full rationale.
 **Verified:** with this fix, ALL 15 actors reach
 `async_main: RETURNING` cleanly (up from 10/15
 reaching end before).
 **Unfortunately:** the test still hangs past 45s
 total — meaning there's YET ANOTHER unbounded wait
 downstream of `async_main`. The bounded
 `wait_for_no_more_peers` unblocks one level, but
 the cascade has another level above it.
 ### Candidates for the remaining hang
 1. `open_root_actor`'s own finally / post-
   `async_main` flow in `_root.py` — specifically
   `await actor.cancel(None)` which has its own
   internal waits.
 2. The `trio.run()` itself doesn't return even
   after the root task completes because trio's
   nursery still has background tasks running.
 3. Maybe `_serve_ipc_eps`'s finally has an await
   that blocks when peers aren't clearing.
 ### Current stance
 - Defensive `wait_for_no_more_peers` bound landed
  (good hygiene regardless). Revealing a real
  deadlock-avoidance gap in tractor's cleanup.
 - Test still hangs → skip-mark restored on
  `test_nested_multierrors[subint_forkserver]`.
 - The full chain of unbounded waits needs another
  session of drilling, probably at
  `open_root_actor` / `actor.cancel` level.
 ### Summary of this investigation's wins
 1. **FD hygiene fix** (`_close_inherited_fds`) —
   correct, closed orphan-SIGINT sibling issue.
 2. **pidfd-based `_ForkedProc.wait`** — cancellable,
   matches trio_proc pattern.
 3. **`_parent_chan_cs` wiring** —
   `Actor.cancel()` now breaks the shielded parent-
   chan `process_messages` loop.
 4. **`wait_for_no_more_peers` bounded** —
   prevents the actor-level finally hang.
 5. **Ruled-out hypotheses:** tree-kill missing
   (wrong), stuck socket recv (wrong).
 6. **Pinpointed remaining unknown:** at least one
   more unbounded wait in the teardown cascade
   above `async_main`. Concrete candidates
   enumerated above.
 ## Update — 2026-04-23 (VERY late): pytest capture pipe IS the final gate
 After landing fixes 1-4 and instrumenting every
 layer down to `tractor_test`'s `trio.run(_main)`:
 **Empirical result: with `pytest -s` the test PASSES
 in 6.20s.** Without `-s` (default `--capture=fd`) it
 hangs forever.
 DIAG timeline for the root pytest PID (with `-s`
 implied from later verification):
 ```
 tractor_test: about to trio.run(_main)
 open_root_actor: async_main task started, yielding to test body
 _main: about to await wrapped test fn
 _main: wrapped RETURNED cleanly        ← test body completed!
 open_root_actor: about to actor.cancel(None)
 Actor.cancel ENTER req_chan=False
 Actor.cancel RETURN
 open_root_actor: actor.cancel RETURNED
 open_root_actor: outer FINALLY
 open_root_actor: finally END (returning from ctxmgr)
 tractor_test: trio.run FINALLY (returned or raised)  ← trio.run fully returned!
 ```
 `trio.run()` fully returns. The test body itself
 completes successfully (pytest.raises absorbed the
 expected `BaseExceptionGroup`). What blocks is
 **pytest's own stdout/stderr capture** — under
 `--capture=fd` default, pytest replaces the parent
 process's fd 1,2 with pipe write-ends it's reading
 from. Fork children inherit those pipe fds
 (because `_close_inherited_fds` correctly preserves
 stdio). High-volume subactor error-log tracebacks
 (7+ actors each logging multiple
 `RemoteActorError`/`ExceptionGroup` tracebacks on
 the error-propagation cascade) fill the 64KB Linux
 pipe buffer. Subactor writes block. Subactor can't
 progress. Process doesn't exit. Parent's
 `_ForkedProc.wait` (now pidfd-based and
 cancellable, but nothing's cancelling here since
 the test body already completed) keeps the pipe
 reader alive... but pytest isn't draining its end
 fast enough because test-teardown/fixture-cleanup
 is in progress.
 **Actually** the exact mechanism is slightly
 different: pytest's capture fixture MIGHT be
 actively reading, but faster-than-writer subactors
 overflow its internal buffer. Or pytest might be
 blocked itself on the finalization step.
 Either way, `-s` conclusively fixes it.
 ### Why I ruled this out earlier (and shouldn't have)
 Earlier in this investigation I tested
 `test_nested_multierrors` with/without `-s` and
 both hung. That's because AT THAT TIME, fixes 1-4
 weren't all in place yet. The test was hanging at
 multiple deeper levels long before reaching the
 "generate lots of error-log output" phase. Once
 the cascade actually tore down cleanly, enough
 output was produced to hit the capture-pipe limit.
 **Classic order-of-operations mistake in
 debugging:** ruling something out too early based
 on a test that was actually failing for a
 different reason.
 ### Fix direction (next session)
 Redirect subactor stdout/stderr to `/dev/null` (or
 a session-scoped log file) in the fork-child
 prelude, right after `_close_inherited_fds()`. This
 severs the inherited pytest-capture pipes and lets
 subactor output flow elsewhere. Under normal
 production use (non-pytest), stdout/stderr would
 be the TTY — we'd want to keep that. So the
 redirect should be conditional or opt-in via the
 `child_sigint`/proc_kwargs flag family.
 Alternative: document as a gotcha and recommend
 `pytest -s` for any tests using the
 `subint_forkserver` backend with multi-level actor
 trees. Simpler, user-visible, no code change.
 ### Current state
 - Skip-mark on `test_nested_multierrors[subint_forkserver]`
  restored with reason pointing here.
 - Test confirmed passing with `-s` after all 4
  cascade fixes applied.
 - The 4 cascade fixes are NOT wasted — they're
  correct hardening regardless of the capture-pipe
  issue, AND without them we'd never reach the
  "actually produces enough output to fill the
  pipe" state.
 ## Stopgap (landed)
 `test_nested_multierrors` skip-marked under
 `subint_forkserver` via
 `@pytest.mark.skipon_spawn_backend('subint_forkserver',
 reason='...')`, cross-referenced to this doc. Mark
 should be dropped once the peer-channel-loop exit
 issue is fixed.
 ## References
 - `tractor/spawn/_subint_forkserver.py::fork_from_worker_thread`
  — the primitive whose post-fork FD hygiene is
  probably the culprit.
 - `tractor/spawn/_subint_forkserver.py::subint_forkserver_proc`
  — the backend function that orchestrates the
  graceful cancel path hitting this bug.
 - `tractor/spawn/_subint_forkserver.py::_ForkedProc`
  — the `trio.Process`-compatible shim; NOT the
  failing component (confirmed via thread-dump).
 - `tests/test_cancellation.py::test_nested_multierrors`
  — the test that surfaced the hang.
 - `ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md`
  — sibling hang class; probably same underlying
  fork-FD-inheritance root cause.
 - tractor issue #379 — subint backend tracking.
--- a/ai/conc-anal/subint_forkserver_thread_constraints_on_pep684_issue.md
+++ b/ai/conc-anal/subint_forkserver_thread_constraints_on_pep684_issue.md
@ -1,184 +0,0 @@
 # Revisit `subint_forkserver` thread-cache constraints once msgspec PEP 684 support lands
 Follow-up tracker for cleanup work gated on the msgspec
 PEP 684 adoption upstream ([jcrist/msgspec#563](https://github.com/jcrist/msgspec/issues/563)).
 Context — why this exists
 -------------------------
 The `tractor.spawn._subint_forkserver` submodule currently
 carries two "non-trio" thread-hygiene constraints whose
 necessity is tangled with issues that *should* dissolve
 under PEP 684 isolated-mode subinterpreters:
 1. `fork_from_worker_thread()` / `run_subint_in_worker_thread()`
   internally allocate a **dedicated `threading.Thread`**
   rather than using `trio.to_thread.run_sync()`.
 2. The test helper is named
   `run_fork_in_non_trio_thread()` — the
   `non_trio` qualifier is load-bearing today.
 This doc catalogs *why* those constraints exist, which of
 them isolated-mode would fix, and what the
 audit-and-cleanup path looks like once msgspec #563 is
 resolved.
 The three reasons the constraints exist
 ---------------------------------------
 ### 1. GIL-starvation class → fixed by PEP 684 isolated mode
 The class-A hang documented in
 `subint_sigint_starvation_issue.md` is entirely about
 legacy-config subints **sharing the main GIL**. Once
 msgspec #563 lands and tractor flips
 `tractor.spawn._subint` to
 `concurrent.interpreters.create()` (isolated config), each
 subint gets its own GIL. Abandoned subint threads can't
 contend for main's GIL → can't starve the main trio loop
 → signal-wakeup-pipe drains normally → no SIGINT-drop.
 This class of hazard **dissolves entirely**. The
 non-trio-thread requirement for *this reason* disappears.
 ### 2. Destroy race / tstate-recycling → orthogonal; unclear
 The `subint_proc` dedicated-thread fix (commit `26fb8206`)
 addressed a different issue: `_interpreters.destroy(interp_id)`
 was blocking on a trio-cache worker that had run an
 earlier `interp.exec()` for that subint. Working
 hypothesis at the time was "the cached thread retains the
 subint's tstate".
 But tstate-handling is **not specific to GIL mode** —
 `_PyXI_Enter` / `_PyXI_Exit` (the C-level machinery both
 configs use to enter/leave a subint from a thread) should
 restore the caller's tstate regardless of GIL config. So
 isolated mode **doesn't obviously fix this**. It might be:
 - A py3.13 bug fixed in later versions — we saw the race
  first on 3.13 and never re-tested on 3.14 after moving
  to dedicated threads.
 - A genuine CPython quirk around cached threads that
  exec'd into a subint, persisting across GIL modes.
 - Something else we misdiagnosed — the empirical fix
  (dedicated thread) worked but the analysis may have
  been incomplete.
 Only way to know: once we're on isolated mode, empirically
 retry `trio.to_thread.run_sync(interp.exec, ...)` and see
 if `destroy()` still blocks. If it does, keep the
 dedicated thread; if not, one constraint relaxed.
 ### 3. Fork-from-main-interp-tstate (the constraint in this module's helper names)
 The fork-from-main-interp-tstate invariant — CPython's
 `PyOS_AfterFork_Child` →
 `_PyInterpreterState_DeleteExceptMain` gate documented in
 `subint_fork_blocked_by_cpython_post_fork_issue.md` — is
 about the calling thread's **current** tstate at the
 moment `os.fork()` runs. If trio's cache threads never
 enter subints at all, their tstate is plain main-interp,
 and fork from them would be fine.
 The reason the smoke test +
 `run_fork_in_non_trio_thread` test helper
 currently use a dedicated `threading.Thread` is narrow:
 **we don't want to risk a trio cache thread that has
 previously been used as a subint driver being the one that
 picks up the fork job**. If cached tstate doesn't get
 cleared (back to reason #2), the fork's child-side
 post-init would see the wrong interp and abort.
 In an isolated-mode world where msgspec works:
 - `subint_proc` would use the public
  `concurrent.interpreters.create()` + `Interpreter.exec()`
  / `Interpreter.close()` — which *should* handle tstate
  cleanly (they're the "blessed" API).
 - If so, trio's cache threads are safe to fork from
  regardless of whether they've previously driven subints.
 - → the `non_trio` qualifier in
  `run_fork_in_non_trio_thread` becomes
  *overcautious* rather than load-bearing, and the
  dedicated-thread primitives in `_subint_forkserver.py`
  can likely be replaced with straight
  `trio.to_thread.run_sync()` wrappers.
 TL;DR
 -----
 | constraint | fixed by isolated mode? |
 |---|---|
 | GIL-starvation (class A) | **yes** |
 | destroy race on cached worker | unclear — empirical test on py3.14 + isolated API required |
 | fork-from-main-tstate requirement on worker | **probably yes, conditional on the destroy-race question above** |
 If #2 also resolves on py3.14+ with isolated mode,
 tractor could drop the `non_trio` qualifier from the fork
 helper's name and just use `trio.to_thread.run_sync(...)`
 for everything. But **we shouldn't do that preemptively**
 — the current cautious design is cheap (one dedicated
 thread per fork / per subint-exec) and correct.
 Audit plan when msgspec #563 lands
 ----------------------------------
 Assuming msgspec grows `Py_mod_multiple_interpreters`
 support:
 1. **Flip `tractor.spawn._subint` to isolated mode.** Drop
   the `_interpreters.create('legacy')` call in favor of
   the public API (`concurrent.interpreters.create()` +
   `Interpreter.exec()` / `Interpreter.close()`). Run the
   three `ai/conc-anal/subint_*_issue.md` reproducers —
   class-A (`test_stale_entry_is_deleted` etc.) should
   pass without the `skipon_spawn_backend('subint')` marks
   (revisit the marker inventory).
 2. **Empirical destroy-race retest.** In `subint_proc`,
   swap the dedicated `threading.Thread` back to
   `trio.to_thread.run_sync(Interpreter.exec, ...,
   abandon_on_cancel=False)` and run the full subint test
   suite. If `Interpreter.close()` (or the backing
   destroy) blocks the same way as the legacy version
   did, revert and keep the dedicated thread.
 3. **If #2 clean**, audit `_subint_forkserver.py`:
   - Rename `run_fork_in_non_trio_thread` → drop the
     `_non_trio_` qualifier (e.g. `run_fork_in_thread`) or
     inline the two-line `trio.to_thread.run_sync` call at
     the call sites and drop the helper entirely.
   - Consider whether `fork_from_worker_thread` +
     `run_subint_in_worker_thread` still warrant being
     separate module-level primitives or whether they
     collapse into a compound
     `trio.to_thread.run_sync`-driven pattern inside the
     (future) `subint_forkserver_proc` backend.
 4. **Doc fallout.** `subint_sigint_starvation_issue.md`
   and `subint_cancel_delivery_hang_issue.md` both cite
   the legacy-GIL-sharing architecture as the root cause.
   Close them with commit-refs to the isolated-mode
   migration. This doc itself should get a closing
   post-mortem section noting which of #1/#2/#3 actually
   resolved vs persisted.
 References
 ----------
 - `tractor.spawn._subint_forkserver` — the in-tree module
  whose constraints this doc catalogs.
 - `ai/conc-anal/subint_sigint_starvation_issue.md` — the
  GIL-starvation class.
 - `ai/conc-anal/subint_cancel_delivery_hang_issue.md` —
  sibling Ctrl-C-able hang class.
 - `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`
  — why fork-from-subint is blocked (this drives the
  forkserver-via-non-subint-thread workaround).
 - `ai/conc-anal/subint_fork_from_main_thread_smoketest.py`
  — empirical validation for the workaround.
 - [PEP 684 — per-interpreter GIL](https://peps.python.org/pep-0684/)
 - [PEP 734 — `concurrent.interpreters` public API](https://peps.python.org/pep-0734/)
 - [jcrist/msgspec#563 — PEP 684 support tracker](https://github.com/jcrist/msgspec/issues/563)
 - tractor issue #379 — subint backend tracking.
--- a/ai/conc-anal/subint_sigint_starvation_issue.md
+++ b/ai/conc-anal/subint_sigint_starvation_issue.md
@ -1,350 +0,0 @@
 # `subint` backend: abandoned-subint thread can wedge main trio event loop (Ctrl-C unresponsive)
 Follow-up to the Phase B subint spawn-backend PR (see
 `tractor.spawn._subint`, issue #379). The hard-kill escape
 hatch we landed (`_HARD_KILL_TIMEOUT`, bounded shields,
 `daemon=True` driver-thread abandonment) handles *most*
 stuck-subint scenarios cleanly, but there's one class of
 hang that can't be fully escaped from within tractor: a
 still-running abandoned sub-interpreter can starve the
 **parent's** trio event loop to the point where **SIGINT is
 effectively dropped by the kernel ↔ Python boundary** —
 making the pytest process un-Ctrl-C-able.
 ## Symptom
 Running `test_stale_entry_is_deleted[subint]` under
 `--spawn-backend=subint`:
 1. Test spawns a subactor (`transport_fails_actor`) which
   kills its own IPC server and then
   `trio.sleep_forever()`.
 2. Parent tries `Portal.cancel_actor()` → channel
   disconnected → fast return.
 3. Nursery teardown triggers our `subint_proc` cancel path.
   Portal-cancel fails (dead channel),
   `_HARD_KILL_TIMEOUT` fires, driver thread is abandoned
   (`daemon=True`), `_interpreters.destroy(interp_id)`
   raises `InterpreterError` (because the subint is still
   running).
 4. Test appears to hang indefinitely at the *outer*
   `async with tractor.open_nursery() as an:` exit.
 5. `Ctrl-C` at the terminal does nothing. The pytest
   process is un-interruptable.
 ## Evidence
 ### `strace` on the hung pytest process
 ```
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(37, "\2", 1) = -1 EAGAIN (Resource temporarily unavailable)
 rt_sigreturn({mask=[WINCH]}) = 140585542325792
 ```
 Translated:
 - Kernel delivers `SIGINT` to pytest.
 - CPython's C-level signal handler fires and tries to
  write the signal number byte (`0x02` = SIGINT) to fd 37
  — the **Python signal-wakeup fd** (set via
  `signal.set_wakeup_fd()`, which trio uses to wake its
  event loop on signals).
 - Write returns `EAGAIN` — **the pipe is full**. Nothing
  is draining it.
 - `rt_sigreturn` with the signal masked off — signal is
  "handled" from the kernel's perspective but the actual
  Python-level handler (and therefore trio's
  `KeyboardInterrupt` delivery) never runs.
 ### Stack dump (via `tractor.devx.dump_on_hang`)
 At 20s into the hang, only the **main thread** is visible:
 ```
 Thread 0x...7fdca0191780 [python] (most recent call first):
  File ".../trio/_core/_io_epoll.py", line 245 in get_events
  File ".../trio/_core/_run.py", line 2415 in run
  File ".../tests/discovery/test_registrar.py", line 575 in test_stale_entry_is_deleted
  ...
 ```
 No driver thread shows up. The abandoned-legacy-subint
 thread still exists from the OS's POV (it's still running
 inside `_interpreters.exec()` driving the subint's
 `trio.run()` on `trio.sleep_forever()`) but the **main
 interp's faulthandler can't see threads currently executing
 inside a sub-interpreter's tstate**. Concretely: the thread
 is alive, holding state we can't introspect from here.
 ## Root cause analysis
 The most consistent explanation for both observations:
 1. **Legacy-config subinterpreters share the main GIL.**
   PEP 734's public `concurrent.interpreters.create()`
   defaults to `'isolated'` (per-interp GIL), but tractor
   uses `_interpreters.create('legacy')` as a workaround
   for C extensions that don't yet support PEP 684
   (notably `msgspec`, see
   [jcrist/msgspec#563](https://github.com/jcrist/msgspec/issues/563)).
   Legacy-mode subints share process-global state
   including the GIL.
 2. **Our abandoned subint thread never exits.** After our
   hard-kill timeout, `driver_thread.join()` is abandoned
   via `abandon_on_cancel=True` and the thread is
   `daemon=True` so proc-exit won't block on it — but the
   thread *itself* is still alive inside
   `_interpreters.exec()`, driving a `trio.run()` that
   will never return (the subint actor is in
   `trio.sleep_forever()`).
 3. **`_interpreters.destroy()` cannot force-stop a running
   subint.** It raises `InterpreterError` on any
   still-running subinterpreter; there is no public
   CPython API to force-destroy one.
 4. **Shared-GIL + non-terminating subint thread → main
   trio loop starvation.** Under enough load (the subint's
   trio event loop iterating in the background, IPC-layer
   tasks still in the subint, etc.) the main trio event
   loop can fail to iterate frequently enough to drain its
   wakeup pipe. Once that pipe fills, `SIGINT` writes from
   the C signal handler return `EAGAIN` and signals are
   silently dropped — exactly what `strace` shows.
 The shielded
 `await actor_nursery._join_procs.wait()` at the top of
 `subint_proc` (inherited unchanged from the `trio_proc`
 pattern) is structurally involved too: if main trio *does*
 get a schedule slice, it'd find the `subint_proc` task
 parked on `_join_procs` under shield — which traps whatever
 `Cancelled` arrives. But that's a second-order effect; the
 signal-pipe-full condition is the primary "Ctrl-C doesn't
 work" cause.
 ## Why we can't fix this from inside tractor
 - **No force-destroy API.** CPython provides neither a
  `_interpreters.force_destroy()` nor a thread-
  cancellation primitive (`pthread_cancel` is actively
  discouraged and unavailable on Windows). A subint stuck
  in pure-Python loops (or worse, C code that doesn't poll
  for signals) is structurally unreachable from outside.
 - **Shared GIL is the root scheduling issue.** As long as
  we're forced into legacy-mode subints for `msgspec`
  compatibility, the abandoned-thread scenario is
  fundamentally a process-global GIL-starvation window.
 - **`signal.set_wakeup_fd()` is process-global.** Even if
  we wanted to put our own drainer on the wakeup pipe,
  only one party owns it at a time.
 ## Current workaround
 - **Fixture-side SIGINT loop on the `daemon` subproc** (in
  this test's `daemon: subprocess.Popen` fixture in
  `tests/conftest.py`). The daemon dying closes its end of
  the registry IPC, which unblocks a pending recv in main
  trio's IPC-server task, which lets the event loop
  iterate, which drains the wakeup pipe, which finally
  delivers the test-harness SIGINT.
 - **Module-level skip on py3.13**
  (`pytest.importorskip('concurrent.interpreters')`) — the
  private `_interpreters` C module exists on 3.13 but the
  multi-trio-task interaction hangs silently there
  independently of this issue.
 ## Path forward
 1. **Primary**: upstream `msgspec` PEP 684 adoption
   ([jcrist/msgspec#563](https://github.com/jcrist/msgspec/issues/563)).
   Unlocks `concurrent.interpreters.create()` isolated
   mode → per-interp GIL → abandoned subint threads no
   longer starve the parent's main trio loop. At that
   point we can flip `_subint.py` back to the public API
   (`create()` / `Interpreter.exec()` / `Interpreter.close()`)
   and drop the private `_interpreters` path.
 2. **Secondary**: watch CPython for a public
   force-destroy primitive. If something like
   `Interpreter.close(force=True)` lands, we can use it as
   a hard-kill final stage and actually tear down
   abandoned subints.
 3. **Harness-level**: document the fixture-side SIGINT
   loop pattern as the "known workaround" for subint-
   backend tests that can leave background state holding
   the main event loop hostage.
 ## References
 - PEP 734 (`concurrent.interpreters`):
  <https://peps.python.org/pep-0734/>
 - PEP 684 (per-interpreter GIL):
  <https://peps.python.org/pep-0684/>
 - `msgspec` PEP 684 tracker:
  <https://github.com/jcrist/msgspec/issues/563>
 - CPython `_interpretersmodule.c` source:
  <https://github.com/python/cpython/blob/main/Modules/_interpretersmodule.c>
 - `tractor.spawn._subint` module docstring (in-tree
  explanation of the legacy-mode choice and its
  tradeoffs).
 ## Reproducer
 ```
 ./py314/bin/python -m pytest \
  tests/discovery/test_registrar.py::test_stale_entry_is_deleted \
  --spawn-backend=subint \
  --tb=short --no-header -v
 ```
 Hangs indefinitely without the fixture-side SIGINT loop;
 with the loop, the test completes (albeit with the
 abandoned-thread warning in logs).
 ## Additional known-hanging tests (same class)
 All three tests below exhibit the same
 signal-wakeup-fd-starvation fingerprint (`write() → EAGAIN`
 on the wakeup pipe after enough SIGINT attempts) and
 share the same structural cause — abandoned legacy-subint
 driver threads contending with the main interpreter for
 the shared GIL until the main trio loop can no longer
 drain its wakeup pipe fast enough to deliver signals.
 They're listed separately because each exposes the class
 under a different load pattern worth documenting.
 ### `tests/discovery/test_registrar.py::test_stale_entry_is_deleted[subint]`
 Original exemplar — see the **Symptom** and **Evidence**
 sections above. One abandoned subint
 (`transport_fails_actor`, stuck in `trio.sleep_forever()`
 after self-cancelling its IPC server) is sufficient to
 tip main into starvation once the harness's `daemon`
 fixture subproc keeps its half of the registry IPC alive.
 ### `tests/test_cancellation.py::test_cancel_while_childs_child_in_sync_sleep[subint-False]`
 Cancel a grandchild that's in sync Python sleep from 2
 nurseries up. The test's own docstring declares the
 dependency: "its parent should issue a 'zombie reaper' to
 hard kill it after sufficient timeout" — which for
 `trio`/`mp_*` is an OS-level `SIGKILL` of the grandchild
 subproc. **Under `subint` there's no equivalent** (no
 public CPython API to force-destroy a running
 sub-interpreter), so the grandchild's sync-sleeping
 `trio.run()` persists inside its abandoned driver thread
 indefinitely. The nested actor-tree (parent → child →
 grandchild, all subints) means a single cancel triggers
 multiple concurrent hard-kill abandonments, each leaving
 a live driver thread.
 This test often only manifests the starvation under
 **full-suite runs** rather than solo execution —
 earlier-in-session subint tests also leave abandoned
 driver threads behind, and the combined population is
 what actually tips main trio into starvation. Solo runs
 may stay Ctrl-C-able with fewer abandoned threads in the
 mix.
 ### `tests/test_cancellation.py::test_multierror_fast_nursery[subint-25-0.5]`
 Nursery-error-path throughput stress-test parametrized
 for **25 concurrent subactors**. When the multierror
 fires and the nursery cancels, every subactor goes
 through our `subint_proc` teardown. The bounded
 hard-kills run in parallel (all `subint_proc` tasks are
 sibling trio tasks), so the timeout budget is ~3s total
 rather than 3s × 25. After that, **25 abandoned
 `daemon=True` driver threads are simultaneously alive** —
 an extreme pressure multiplier on the same mechanism.
 The `strace` fingerprint is striking under this load: six
 or more **successful** `write(16, "\2", 1) = 1` calls
 (main trio getting brief GIL slices, each long enough to
 drain exactly one wakeup-pipe byte) before finally
 saturating with `EAGAIN`:
 ```
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = 1
 rt_sigreturn({mask=[WINCH]})            = 140141623162400
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = 1
 rt_sigreturn({mask=[WINCH]})            = 140141623162400
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = 1
 rt_sigreturn({mask=[WINCH]})            = 140141623162400
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = 1
 rt_sigreturn({mask=[WINCH]})            = 140141623162400
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = 1
 rt_sigreturn({mask=[WINCH]})            = 140141623162400
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = 1
 rt_sigreturn({mask=[WINCH]})            = 140141623162400
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = -1 EAGAIN (Resource temporarily unavailable)
 rt_sigreturn({mask=[WINCH]})            = 140141623162400
 ```
 Those successful writes indicate CPython's
 `sys.getswitchinterval()`-based GIL round-robin *is*
 giving main brief slices — just never long enough to run
 the Python-level signal handler through to the point
 where trio converts the delivered SIGINT into a
 `Cancelled` on the appropriate scope. Once the
 accumulated write rate outpaces main's drain rate, the
 pipe saturates and subsequent signals are silently
 dropped.
 The `pstree` below (pid `530060` = hung `pytest`) shows
 the subint-driver thread population at the moment of
 capture. Even with fewer than the full 25 shown (pstree
 truncates thread names to `subint-driver[<interp_id>` —
 interpreters `3` and `4` visible across 16 thread
 entries), the GIL-contender count is more than enough to
 explain the starvation:
 ```
 >>> pstree -snapt 530060
 systemd,1 --switched-root --system --deserialize=40
  └─login,1545 --
      └─bash,1872
          └─sway,2012
              └─alacritty,70471 -e xonsh
                  └─xonsh,70487 .../bin/xonsh
                      └─uv,70955 run xonsh
                          └─xonsh,70959 .../py314/bin/xonsh
                              └─python,530060 .../py314/bin/pytest -v tests/test_cancellation.py --spawn-backend=subint
                                  ├─{subint-driver[3},531857
                                  ├─{subint-driver[3},531860
                                  ├─{subint-driver[3},531862
                                  ├─{subint-driver[3},531866
                                  ├─{subint-driver[3},531877
                                  ├─{subint-driver[3},531882
                                  ├─{subint-driver[3},531884
                                  ├─{subint-driver[3},531945
                                  ├─{subint-driver[3},531950
                                  ├─{subint-driver[3},531952
                                  ├─{subint-driver[4},531956
                                  ├─{subint-driver[4},531959
                                  ├─{subint-driver[4},531961
                                  ├─{subint-driver[4},531965
                                  ├─{subint-driver[4},531968
                                  └─{subint-driver[4},531979
 ```
 (`pstree` uses `{...}` to denote threads rather than
 processes — these are all the **driver OS-threads** our
 `subint_proc` creates with name
 `f'subint-driver[{interp_id}]'`. Every one of them is
 still alive, executing `_interpreters.exec()` inside a
 sub-interpreter our hard-kill has abandoned. At 16+
 abandoned driver threads competing for the main GIL, the
 main-interpreter trio loop gets starved and signal
 delivery stalls.)
--- a/ai/prompt-io/claude/20260406T172848Z_02b2ef1_prompt_io.md
+++ b/ai/prompt-io/claude/20260406T172848Z_02b2ef1_prompt_io.md
@ -1,54 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 session: (ad-hoc, not tracked via conf.toml)
 timestamp: 2026-04-06T17:28:48Z
 git_ref: 02b2ef1
 scope: tests
 substantive: true
 raw_file: 20260406T172848Z_02b2ef1_prompt_io.raw.md
 ---
 ## Prompt
 User asked to extend `tests/test_resource_cache.py` with a test
 that reproduces the edge case fixed in commit `02b2ef18` (per-key
 locking+user tracking in `maybe_open_context()`). The bug was
 originally triggered in piker's `brokerd.kraken` backend where the
 same `acm_func` was called with different kwargs, and the old
 global `_Cache.users` counter caused:
 - teardown skipped for one `ctx_key` bc another key's users kept
  the global count > 0
 - re-entry hitting `assert not resources.get(ctx_key)` during the
  teardown window
 User requested a test that would fail under the old code and pass
 with the fix.
 ## Response summary
 Designed and implemented `test_per_ctx_key_resource_lifecycle`
 which verifies per-`ctx_key` resource isolation by:
 1. Holding resource `'a'` open in a bg task
 2. Opening+closing resource `'b'` (same `acm_func`, different
   kwargs) while `'a'` is still alive
 3. Re-opening `'b'` and asserting cache MISS — proving `'b'` was
   torn down independently despite `'a'` keeping its own user
   count > 0
 With the old global counter, phase 3 would produce a stale cache
 HIT (leaked resource) or crash on the assert.
 Also added a trivial `acm_with_resource(resource_id)` ACM helper
 at module level.
 ## Files changed
 - `tests/test_resource_cache.py` — add `acm_with_resource` ACM +
  `test_per_ctx_key_resource_lifecycle` test fn
 ## Human edits
 None — committed as generated (pending user review).
--- a/ai/prompt-io/claude/20260406T193125Z_85f9c5d_prompt_io.md
+++ b/ai/prompt-io/claude/20260406T193125Z_85f9c5d_prompt_io.md
@ -1,57 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 session: (ad-hoc, not tracked via conf.toml)
 timestamp: 2026-04-06T19:31:25Z
 git_ref: 85f9c5d
 scope: tests
 substantive: true
 raw_file: 20260406T193125Z_85f9c5d_prompt_io.raw.md
 ---
 ## Prompt
 User asked to reproduce the exact `assert not resources.get(ctx_key)`
 crash originally seen in piker's `brokerd.kraken` backend via
 `open_cached_client('kraken')`. Key constraints from user:
 - In piker, kwargs were the **same** (empty) so all callers
  share one `ctx_key = (fid, ())`
 - The root issue is `_Cache.locks` being indexed by `fid`
  rather than `ctx_key`, plus the race window between
  `values.pop()` and `resources.pop()` in `_Cache.run_ctx`
  (the acm `__aexit__` has checkpoints in between)
 - Mark the test `@pytest.mark.xfail` so it can be committed
  against the unpatched code and later adjusted once the fix
  from `9e49eddd` is applied
 User pointed to piker's `open_cached_client('kraken')` usage
 as the real-world pattern to model.
 ## Response summary
 Added `test_moc_reentry_during_teardown` which deterministically
 reproduces the teardown race:
 1. A `cached_client()` acm (no params, like
   `kraken.api.get_client()`) signals via `in_aexit` event
   when its `__aexit__` starts, then sleeps to hold the
   window open
 2. `use_and_exit` task enters+exits the cached resource
 3. `reenter_during_teardown` waits on `in_aexit` then
   re-enters — landing exactly in the window where
   `values` is popped but `resources` still exists
 4. The old `assert not resources.get(ctx_key)` fires
 5. `@pytest.mark.xfail(raises=AssertionError)` catches it
 Test confirmed: `XFAIL` with the exact traceback at
 `_mngrs.py:327`.
 ## Files changed
 - `tests/test_resource_cache.py` — add
  `test_moc_reentry_during_teardown` xfail test fn
 ## Human edits
 None — committed as generated (pending user review).
--- a/ai/prompt-io/claude/20260406T222648Z_f086222_prompt_io.md
+++ b/ai/prompt-io/claude/20260406T222648Z_f086222_prompt_io.md
@ -1,52 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 session: (ad-hoc, not tracked via conf.toml)
 timestamp: 2026-04-06T22:26:48Z
 git_ref: f086222
 scope: code
 substantive: true
 raw_file: 20260406T222648Z_f086222_prompt_io.raw.md
 ---
 ## Prompt
 User applied the per-key locking patch (`f086222d`),
 removed the `@pytest.mark.xfail` from
 `test_moc_reentry_during_teardown`, and found the
 test STILL fails — now with `RuntimeError` instead
 of `AssertionError`. Asked to diagnose why the fix
 doesn't actually resolve this race.
 ## Response summary
 Traced the exact scheduling sequence across three
 concurrent tasks to show that the per-`ctx_key`
 locking change doesn't close the fundamental gap:
 - `maybe_open_context`'s finally pops the lock and
  sets `no_more_users` in the **exiting caller's
  task**
 - But the actual resource cleanup (`values.pop`,
  acm `__aexit__`, `resources.pop`) runs in the
  **`run_ctx` task** inside `service_tn`
 - A re-entering task creates a fresh lock (old one
  popped), acquires immediately, and hits stale
  `resources[ctx_key]` while `run_ctx` is still in
  `__aexit__`
 Identified three possible fix strategies:
 1. Let `run_ctx` own lock release/pop after
   `resources.pop()`
 2. Add a "teardown complete" event that new callers
   wait on
 3. Move `resources.pop()` into `run_ctx`'s inner
   finally (before acm `__aexit__`)
 ## Files changed
 No code changes — analysis only.
 ## Human edits
 N/A — diagnostic output, no code generated.
--- a/ai/prompt-io/claude/20260406T222648Z_f086222_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260406T222648Z_f086222_prompt_io.raw.md
@ -1,32 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 timestamp: 2026-04-06T22:26:48Z
 git_ref: f086222
 diff_cmd: git diff f086222~1..f086222
 ---
 Here's the problem. The per-key locking doesn't fix this race because the **lock lifetime is decoupled from `run_ctx`'s lifetime**. Trace through what happens:
 **`maybe_open_context` finally** (lines 468-495):
 1. `users[ctx_key] -= 1` → 0
 2. `no_more_users.set()` → wakes `run_ctx` (but it doesn't run yet, just becomes ready)
 3. `_Cache.locks.pop(ctx_key)` → **lock is gone**
 4. `maybe_open_context` exits
 **`run_ctx`** now gets scheduled (lines 237-241):
 5. Inner finally: `values.pop(ctx_key)` → values gone
 6. `mng.__aexit__()` runs → `in_aexit.set()`, `await trio.sleep(10)` → **CHECKPOINT**
 **`reenter_during_teardown`** wakes:
 7. Enters `maybe_open_context` → no lock exists for `ctx_key` → creates **fresh** lock, acquires immediately
 8. `values[ctx_key]` → `KeyError`
 9. `resources.get(ctx_key)` → **still exists!** (step 6 hasn't reached `resources.pop()` in `run_ctx`'s outer finally yet)
 10. `raise RuntimeError('Caching resources ALREADY exist?!')`
 The core issue: `no_more_users.set()` (step 2) and `locks.pop()` (step 3) happen in the **exiting caller's task**, but the actual resource cleanup (`values.pop`, acm `__aexit__`, `resources.pop`) happens in the **`run_ctx` task** inside `service_tn`. There's no synchronization between "lock removed, new callers welcome" and "run_ctx has actually finished tearing down". The per-`ctx_key` lock change just made the lock more granular — it didn't close this gap.
 The fix needs to ensure a new caller can't acquire a lock for `ctx_key` until `run_ctx` has fully exited (including `resources.pop()`). Either:
 - **don't pop the lock in `maybe_open_context`** — let `run_ctx` release/pop it after `resources.pop()`
 - **wait on a "teardown complete" event** in the cache-miss path when stale `resources` exist
 - **pop `resources` before the acm `__aexit__`** (move `resources.pop()` into the inner finally alongside `values.pop()`)
--- a/ai/prompt-io/claude/20260413T192116Z_f851f28_prompt_io.md
+++ b/ai/prompt-io/claude/20260413T192116Z_f851f28_prompt_io.md
@ -1,74 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 session: (ad-hoc, not tracked via conf.toml)
 timestamp: 2026-04-13T19:21:16Z
 git_ref: f851f28
 scope: code
 substantive: true
 ---
 ## Prompt
 User provided a detailed implementation plan for
 a new `test_tpt_bind_addrs.py` test suite covering
 `open_root_actor(tpt_bind_addrs=...)` — the three
 runtime code paths in `_root.py:385-450`:
 1. Non-registrar, no explicit bind -> random addrs
 2. Registrar, no explicit bind -> registry_addrs
 3. Explicit bind given -> `wrap_address()` + merge
 Plan specified 6 test functions (~10 parametrized
 variants), predicted a type-mixing bug at line 446,
 and asked for an in-flight fix if confirmed.
 ## Response summary
 Created `tests/discovery/test_tpt_bind_addrs.py`
 with 9 collected test variants across 6 functions:
 - `test_registrar_root_tpt_bind_addrs` (3 variants:
  `bind-eq-reg`, `bind-subset-reg`,
  `bind-disjoint-reg`)
 - `test_non_registrar_root_tpt_bind_addrs`
  (2 variants: `bind-same-bindspace`,
  `bind-disjoint`)
 - `test_non_registrar_default_random_bind`
  (baseline, no explicit bind)
 - `test_tpt_bind_addrs_as_maddr_str`
  (multiaddr string input)
 - `test_registrar_merge_binds_union`
  (registrar + disjoint bind -> union)
 - `test_open_nursery_forwards_tpt_bind_addrs`
  (`open_nursery(**kwargs)` forwarding)
 Confirmed and fixed the predicted bug at
 `_root.py:446`: the registrar merge path mixed
 `Address` objects (`tpt_bind_addrs`) with raw tuples
 (`uw_reg_addrs`) inside `set()`, preventing
 deduplication and causing double-bind `OSError`.
 Fix: wrap `uw_reg_addrs` before the set union:
 ```python
 # before (broken)
 tpt_bind_addrs = list(set(
    tpt_bind_addrs + uw_reg_addrs
 ))
 # after (fixed)
 tpt_bind_addrs = list(set(
    tpt_bind_addrs
    + [wrap_address(a) for a in uw_reg_addrs]
 ))
 ```
 All 9 tests pass after the fix.
 ## Files changed
 - `tests/discovery/test_tpt_bind_addrs.py` (new)
 - `tractor/_root.py:446` (bug fix, 1 line)
 ## Human edits
 N/A — pending review.
--- a/ai/prompt-io/claude/20260413T205048Z_269d939c_prompt_io.md
+++ b/ai/prompt-io/claude/20260413T205048Z_269d939c_prompt_io.md
@ -1,50 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 session: 76154e65-d8e1-4b5f-9275-0ea45ba7e98a
 timestamp: 2026-04-13T20:50:48Z
 git_ref: 269d939c
 scope: code
 substantive: true
 raw_file: 20260413T205048Z_269d939c_prompt_io.raw.md
 ---
 ## Prompt
 Implement a `parse_endpoints()` API in
 `tractor.discovery._multiaddr` that lets downstream
 projects declare per-actor transport bind addresses
 as a service table of actor-name -> multiaddr
 strings (loaded from config, e.g. TOML `[network]`
 section). Add type aliases `EndpointsTable` and
 `ParsedEndpoints`, plus 7 unit tests covering TCP,
 UDS, tuples, mixed input, unsupported protocols, and
 edge cases.
 A detailed implementation plan was provided
 specifying insertion points, type signatures, test
 names, and verification commands.
 ## Response summary
 Add `parse_endpoints()` function with two supporting
 type aliases to `_multiaddr.py`. The function
 iterates a `dict[str, list[str|tuple]]` service
 table and delegates each entry to the existing
 `wrap_address()` helper, which already handles maddr
 strings, raw tuples, and pre-wrapped `Address`
 objects. Seven pure unit tests (no trio runtime)
 validate all input variants and edge cases. All 22
 multiaddr tests pass.
 ## Files changed
 - `tractor/discovery/_multiaddr.py` — add
  `EndpointsTable`, `ParsedEndpoints` type aliases
  and `parse_endpoints()` function after line 133
 - `tests/discovery/test_multiaddr.py` — add
  `parse_endpoints` import and 7 new test functions
 ## Human edits
 None — committed as generated.
--- a/ai/prompt-io/claude/20260413T205048Z_269d939c_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260413T205048Z_269d939c_prompt_io.raw.md
@ -1,32 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 timestamp: 2026-04-13T20:50:48Z
 git_ref: 269d939c
 diff_cmd: git diff 269d939c~1..269d939c
 ---
 ## Implementation output
 ### `tractor/discovery/_multiaddr.py`
 > `git diff 269d939c~1..269d939c -- tractor/discovery/_multiaddr.py`
 Type aliases `EndpointsTable` and `ParsedEndpoints`
 plus `parse_endpoints()` helper that wraps each
 entry via `wrap_address()`.
 ### `tests/discovery/test_multiaddr.py`
 > `git diff 269d939c~1..269d939c -- tests/discovery/test_multiaddr.py`
 Add import of `parse_endpoints` to existing import
 block, then 7 new test functions.
 ### Test run output
 ```
 22 passed, 1 warning in 0.05s
 ```
 All 22 tests pass (15 existing + 7 new).
--- a/ai/prompt-io/claude/20260414T163300Z_befedc49_prompt_io.md
+++ b/ai/prompt-io/claude/20260414T163300Z_befedc49_prompt_io.md
@ -1,38 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 session: multiaddr-support-rename-prefer
 timestamp: 2026-04-14T16:33:00Z
 git_ref: befedc49
 scope: code
 substantive: true
 raw_file: 20260414T163300Z_befedc49_prompt_io.raw.md
 ---
 ## Prompt
 Create a helper function that determines the best transport given
 actor locality (distributed vs same host). Use PID/hostname
 comparison for locality detection, apply at registry addr selection
 only (not spawn-time).
 ## Response summary
 New `prefer_addr()` + `_is_local_addr()` helpers
 in `_api.py` using `socket.getaddrinfo()` and
 `ipaddress` for PID/hostname locality detection.
 Preference: UDS > local TCP > remote TCP.
 Integrated into `query_actor()` and
 `wait_for_actor()`. Also changed
 `Registrar.find_actor()` to return full addr list
 so callers can apply preference.
 ## Files changed
 - `tractor/discovery/_discovery.py` → `_api.py`
  — renamed + added `prefer_addr()`,
  `_is_local_addr()`; updated `query_actor()` and
  `wait_for_actor()` call sites
 - `tractor/discovery/_registry.py`
  — `Registrar.find_actor()` returns
  `list[UnwrappedAddress]|None`
--- a/ai/prompt-io/claude/20260414T163300Z_befedc49_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260414T163300Z_befedc49_prompt_io.raw.md
@ -1,62 +0,0 @@
 ---
 model: claude-opus-4-6
 service: claude
 timestamp: 2026-04-14T16:33:00Z
 git_ref: befedc49
 diff_cmd: git diff befedc49~1..befedc49
 ---
 ### `tractor/discovery/_api.py`
 > `git diff befedc49~1..befedc49 -- tractor/discovery/_api.py`
 Add `_is_local_addr()` and `prefer_addr()` transport
 preference helpers.
 #### `_is_local_addr(addr: Address) -> bool`
 Determines whether an `Address` is reachable on the
 local host:
 - `UDSAddress`: always returns `True`
  (filesystem-bound, inherently local)
 - `TCPAddress`: checks if `._host` is a loopback IP
  via `ipaddress.ip_address().is_loopback`, then
  falls back to comparing against the machine's own
  interface IPs via
  `socket.getaddrinfo(socket.gethostname(), None)`
 #### `prefer_addr(addrs: list[UnwrappedAddress]) -> UnwrappedAddress`
 Selects the "best" transport address from a
 multihomed actor's address list. Wraps each
 candidate via `wrap_address()` to get typed
 `Address` objects, then classifies into three tiers:
 1. **UDS** (same-host guaranteed, lowest overhead)
 2. **TCP loopback / same-host IP** (local network)
 3. **TCP remote** (only option for distributed)
 Within each tier, the last-registered (latest) entry
 is preferred. Falls back to `addrs[-1]` if no
 heuristic matches.
 ### `tractor/discovery/_registry.py`
 > `git diff befedc49~1..befedc49 -- tractor/discovery/_registry.py`
 `Registrar.find_actor()` return type broadened from
 single addr to `list[UnwrappedAddress]|None` — full
 addr list lets callers apply transport preference.
 #### Integration
 `query_actor()` and `wait_for_actor()` now call
 `prefer_addr(addrs)` instead of `addrs[-1]`.
 ### Verification
 All discovery tests pass (13/13 non-daemon).
 `test_local.py` and `test_multi_program.py` also
 pass (daemon fixture teardown failures are
 pre-existing and unrelated).
--- a/ai/prompt-io/claude/20260417T034918Z_9703210_prompt_io.md
+++ b/ai/prompt-io/claude/20260417T034918Z_9703210_prompt_io.md
@ -1,101 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 session: subints-spawner-design-kickoff
 timestamp: 2026-04-17T03:49:18Z
 git_ref: 9703210
 scope: docs
 substantive: true
 raw_file: 20260417T034918Z_9703210_prompt_io.raw.md
 ---
 ## Prompt
 Drive the "first big boi, from GH issue" task seeded by
 `ai/prompt-io/prompts/subints_spawner.md`: design, plan
 and implement sub-interpreter (subint) spawn-backend
 support per issue #379, including (1) modularizing
 `tractor.spawn._spawn` into per-backend submods, (2) a new
 `._subint` backend, and (3) harness parametrization via the
 existing `--spawn-backend` / `start_method` pytest fixture
 in `tractor._testing.pytest`.
 Follow-up clarifications from the user (this turn):
 1. Pin `<3.15` on this dev branch and feature-gate subint
   tests — chose option (a).
 2. Split Phase A (modularization) into its own PR first.
 3. Defer the `fork()`-via-subint hack to a follow-up.
 4. Harness flag is `pytest --spawn-backend <key>` CLI →
   `start_method` fixture (prompt file updated to match).
 ## Response summary
 Produced a three-phase plan and a concrete Phase A (pure
 modularization) file-split plan for user review; no code
 written yet — the green-light to start Phase A was given
 in this same turn conditional on logging this prompt-io
 entry first.
 Phases:
 - **A — modularize** `tractor/spawn/_spawn.py` (847 LOC):
  keep generic machinery in `_spawn.py`, extract
  `trio_proc` → `spawn/_trio.py`, `mp_proc` →
  `spawn/_mp.py`. No pin bump.
 - **B — `_subint` backend**: bump `pyproject.toml`
  `requires-python` upper to `<3.15`; add `'subint'` to
  `SpawnMethodKey`; reuse existing UDS transport; shm
  escape-hatch deferred.
 - **C — harness**: drive the valid-backend tuple in
  `tractor/_testing/pytest.py:345-349` from
  `typing.get_args(SpawnMethodKey)`; skip subint tests on
  Python < 3.14.
 Key findings surfaced to the user:
 - `pyproject.toml:12` currently pins `<3.14`; PEP 734
  `concurrent.interpreters` only ships in 3.14 — the
  load-bearing constraint.
 - `_testing/pytest.py:345-349` hardcodes valid backends
  as a string tuple (`'mp_spawn'`, `'mp_forkserver'`,
  `'trio'`) — should be `get_args(SpawnMethodKey)`.
 - `_testing/pytest.py:228` already imports
  `try_set_start_method` from `tractor.spawn._spawn` —
  keeping the `_spawn.py` path as the "core" module
  avoids breaking external refs during Phase A.
 Phase A commit breakdown (each green via `/run-tests`):
 1. Drive `_testing/pytest.py` backend-tuple from
   `get_args(SpawnMethodKey)` — prep.
 2. Extract `trio_proc` → `spawn/_trio.py`; `_methods`
   registry imports it.
 3. Extract `mp_proc` → `spawn/_mp.py`; `_methods` imports
   it.
 4. Tidy `spawn/__init__.py` docstring for the new layout.
 Workflow: Phase A runs in a new worktree via `/open-wkt
 spawn_modularize` (branch `wkt/spawn_modularize`) and
 lands as its own PR before Phase B/C returns to the
 current `claude_bun_opus4.7_subint_spawner_backend`
 branch.
 ## Files changed
 None this turn — design/planning only. The user updated
 `ai/prompt-io/prompts/subints_spawner.md` themselves to
 complete the truncated sentence on line 32 (harness-flag
 reference).
 Upcoming (Phase A, next turn):
 - `tractor/spawn/_spawn.py` — trimmed to "core"
 - `tractor/spawn/_trio.py` — new; holds `trio_proc()`
 - `tractor/spawn/_mp.py` — new; holds `mp_proc()`
 - `tractor/spawn/__init__.py` — docstring refresh
 - `tractor/_testing/pytest.py` — backend tuple from
  `get_args(SpawnMethodKey)`
 ## Human edits
 None — committed as generated. User completed the
 truncated sentence on line 32 of
 `ai/prompt-io/prompts/subints_spawner.md` themselves
 *before* this prompt-io log was written; that edit was
 the user's authoring, not an edit to AI output.
--- a/ai/prompt-io/claude/20260417T034918Z_9703210_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260417T034918Z_9703210_prompt_io.raw.md
@ -1,117 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 timestamp: 2026-04-17T03:49:18Z
 git_ref: 9703210
 diff_cmd: git diff HEAD~1..HEAD
 ---
 No code was produced in this turn — this is a design/planning
 exchange for subint spawner support per issue #379. Captured
 verbatim below are the two substantive AI responses, which
 together fix the phased plan (A: modularize, B: subint, C:
 harness) and the concrete Phase A file split before any
 implementation begins.
 ## Turn 1 — initial triage + gating questions
 > Read: issue #379, current `spawn/_spawn.py` (847 lines),
 > and pyproject pins.
 >
 > ## Task summary
 > Design/plan/implement subint backend per #379, plus
 > modularize `spawn/_spawn.py` into per-backend submods, plus
 > harness flag for test parametrization.
 >
 > ## Key finding
 > `pyproject.toml:12` pins `requires-python = ">=3.12, <3.14"`
 > — but `concurrent.interpreters` / PEP 734 **only lands in
 > 3.14**. This is the load-bearing design question before
 > coding.
 >
 > ## Proposed phasing (incremental commits)
 > - **Phase A — modularize** `_spawn.py`: extract `trio_proc`
 >   → `spawn/_trio.py`, `mp_proc` → `spawn/_mp.py`, keep
 >   generic helpers (`hard_kill`/`soft_kill`/`new_proc`
 >   dispatcher/`SpawnMethodKey`) in a slim core. Tests green
 >   at each step.
 > - **Phase B — `_subint` backend**: extend `SpawnMethodKey`
 >   with `'subint'`; reuse UDS transport by default; shm is
 >   escape-hatch follow-up.
 > - **Phase C — harness**: add backend-parametrize flag, skip
 >   subint when Python < 3.14.
 >
 > ## Gating questions before I start
 > 1. **Python pin**: bump `<3.15` and feature-gate subint
 >    tests (a), require 3.14 outright (b), or backport shim
 >    (c)? I'd recommend (a).
 > 2. **PR strategy**: one branch, commits per phase (current
 >    `claude_bun_opus4.7_subint_spawner_backend`)? Or split
 >    Phase A as its own PR first?
 > 3. **`fork()`-via-subint hack** from the issue's "Our own
 >    thoughts" section — v1 or follow-up? I'd defer to
 >    follow-up.
 > 4. **Prompt file line 32 is truncated** — what's the
 >    existing harness flag name you had in mind ("using the
 >    existing ___")?
 >
 > Which of these are already decided vs. open?
 ## User answers (for the raw record)
 1. bump `<3.15`, feature-gate — option (a).
 2. split Phase A as its own PR first.
 3. defer `fork()`-via-subint hack.
 4. harness flag is `--spawn-backend` CLI → `start_method`
   fixture, per `tractor._testing._pytest`. Prompt file
   updated accordingly.
 ## Turn 2 — concrete Phase A plan
 > Good — prompt file now ends cleanly; harness flag
 > confirmed as `--spawn-backend` CLI → `start_method`
 > fixture (via `_testing/pytest.py:193,355`). Also noted
 > `pytest.py:345-349` hardcodes valid backends as a string
 > tuple — `typing.get_args(SpawnMethodKey)` should drive
 > that list.
 >
 > ## Phase A — concrete plan (separate PR)
 >
 > **File split** in `tractor/spawn/`:
 > - `_spawn.py` stays as "core": keeps `SpawnMethodKey`,
 >   `_spawn_method`, `proc_waiter`, `try_set_start_method`,
 >   `exhaust_portal`, `cancel_on_completion`, `hard_kill`,
 >   `soft_kill`, `new_proc` dispatcher, `_methods`
 >   registry. Kept at this path so `_testing/pytest.py:228`
 >   and other external imports don't break.
 > - `_trio.py` ← `trio_proc()` (lines 448–671, ~224 lines)
 > - `_mp.py` ← `mp_proc()` (lines 672–842, ~171 lines)
 > - `_methods` dict imports the two backend fns from the
 >   new modules.
 >
 > **Commits** (each green via `/run-tests`):
 > 1. Drive `_testing/pytest.py` backend-tuple from
 >    `get_args(SpawnMethodKey)` — tiny prep.
 > 2. Extract `trio_proc` → `spawn/_trio.py`; `_methods`
 >    imports it.
 > 3. Extract `mp_proc` → `spawn/_mp.py`; `_methods`
 >    imports it.
 > 4. Tidy `spawn/__init__.py` docstring to describe new
 >    layout.
 >
 > No pyproject pin bump in Phase A (that's Phase B).
 >
 > **Workflow**: I'd run this in a new worktree via
 > `/open-wkt spawn_modularize` (branch
 > `wkt/spawn_modularize`), then open a PR before returning
 > to the current `claude_bun_opus4.7_subint_spawner_backend`
 > branch for Phase B/C.
 >
 > Green-light to `/open-wkt` and start Phase A?
 ## User final response
 > all this first then yes, green light.
 — log this `/prompt-io` entry first, then proceed with
 Phase A in a worktree.
--- a/ai/prompt-io/claude/20260417T035800Z_61a73ba_prompt_io.md
+++ b/ai/prompt-io/claude/20260417T035800Z_61a73ba_prompt_io.md
@ -1,86 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 session: subints-spawner-phase-a-impl
 timestamp: 2026-04-17T03:58:05Z
 git_ref: 61a73ba
 scope: code
 substantive: true
 raw_file: 20260417T035800Z_61a73ba_prompt_io.raw.md
 ---
 ## Prompt
 Follow-up from the design session logged in
 `20260417T034918Z_9703210_prompt_io.md`: the user
 greenlit Phase A after `/prompt-io` tracking, so
 execute the Phase A "modularize `spawn._spawn`" plan
 end-to-end in a new worktree — four logical steps
 (pytest harness prep, `trio_proc` extraction,
 `mp_proc` extraction, docstring refresh), each green
 via `/run-tests`.
 User directives during execution:
 - **Option 3** for the test cadence ("continue A.2–A.4
  first, then run the full suite once at the end of
  Phase A").
 - **One commit** for the whole phase ("can't we just
  commit the whole patch in one commit?") instead of
  the 3/4-commit split I initially proposed.
 - **Don't pre-draft** commit messages — wait for the
  user to invoke `/commit-msg` (captured as feedback
  memory `feedback_no_auto_draft_commit_msgs.md`).
 ## Response summary
 Produced the cohesive Phase A modularization patch,
 landed as commit `61a73bae` (subject: `Mv
 trio_proc`/`mp_proc` to per-backend submods`). Five
 files changed, +565 / -418 lines.
 Key pieces of the patch (generated by claude,
 reviewed by the human before commit):
 - `tractor/spawn/_trio.py` — **new**; receives
  `trio_proc()` verbatim from `_spawn.py`; imports
  cross-backend helpers back from `._spawn`.
 - `tractor/spawn/_mp.py` — **new**; receives
  `mp_proc()` verbatim; uses `from . import _spawn`
  for late-binding access to the mutable `_ctx` /
  `_spawn_method` globals (design decision made
  during impl, not the original plan).
 - `tractor/spawn/_spawn.py` — shrunk 847 → 448 LOC;
  import pruning; bottom-of-module late imports for
  `trio_proc` / `mp_proc` with a one-line comment
  explaining the circular-dep reason.
 - `tractor/spawn/__init__.py` — docstring refresh
  describing the new layout.
 - `tractor/_testing/pytest.py` — the valid-backend
  set now comes from `typing.get_args(SpawnMethodKey)`
  so future additions (`'subint'`) don't need harness
  edits.
 ## Files changed
 See `git diff 61a73ba~1..61a73ba --stat`:
 ```
 tractor/_testing/pytest.py |  12 +-
 tractor/spawn/__init__.py  |  31 +++-
 tractor/spawn/_mp.py       | 235 ++++++++++++++++++++++++
 tractor/spawn/_spawn.py    | 413 +-------------------------------
 tractor/spawn/_trio.py     | 292 ++++++++++++++++++++++++++++
 5 files changed, 565 insertions(+), 418 deletions(-)
 ```
 Validation:
 - import probe + `_methods` wiring check — OK
 - spawn-relevant test subset — 37 passed, 1 skipped
 - full suite — 350 passed, 14 skipped, 7 xfailed, 1
  xpassed
 ## Human edits
 None — committed as generated by claude (no diff
 between `.claude/git_commit_msg_LATEST.md` and the
 committed body, as far as the assistant could
 observe).
--- a/ai/prompt-io/claude/20260417T035800Z_61a73ba_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260417T035800Z_61a73ba_prompt_io.raw.md
@ -1,138 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 timestamp: 2026-04-17T03:58:05Z
 git_ref: 61a73ba
 diff_cmd: git diff 61a73ba~1..61a73ba
 ---
 Code generated in this turn was committed verbatim as
 `61a73bae` ("Mv `trio_proc`/`mp_proc` to per-backend
 submods"). Per diff-ref mode, per-file code is captured
 via the pointers below, each followed by a prose
 summary of what the AI generated. Non-code output
 (sanity-check results, design rationale) is included
 verbatim.
 ## Per-file generated content
 ### `tractor/spawn/_trio.py` (new, 292 lines)
 > `git diff 61a73ba~1..61a73ba -- tractor/spawn/_trio.py`
 Pure lift-and-shift of `trio_proc()` out of
 `tractor/spawn/_spawn.py` (previously lines 448–670).
 Added AGPL header + module docstring describing the
 backend; imports include local `from ._spawn import
 cancel_on_completion, hard_kill, soft_kill` which
 creates the bottom-of-module late-import pattern in
 the core file to avoid a cycle. All call sites,
 log-format strings, and body logic are byte-identical
 to the originals — no semantic change.
 ### `tractor/spawn/_mp.py` (new, 235 lines)
 > `git diff 61a73ba~1..61a73ba -- tractor/spawn/_mp.py`
 Pure lift-and-shift of `mp_proc()` out of
 `tractor/spawn/_spawn.py` (previously lines 672–842).
 Same AGPL header convention. Key difference from
 `_trio.py`: uses `from . import _spawn` (module
 import, not from-import) for `_ctx` and
 `_spawn_method` references — these are mutated at
 runtime by `try_set_start_method()`, so late binding
 via `_spawn._ctx` / `_spawn._spawn_method` is required
 for correctness. Also imports `cancel_on_completion`,
 `soft_kill`, `proc_waiter` from `._spawn`.
 ### `tractor/spawn/_spawn.py` (modified, 847 → 448 LOC)
 > `git diff 61a73ba~1..61a73ba -- tractor/spawn/_spawn.py`
 - removed `trio_proc()` body (moved to `_trio.py`)
 - removed `mp_proc()` body (moved to `_mp.py`)
 - pruned imports now unused in core: `sys`,
  `is_root_process`, `current_actor`,
  `is_main_process`, `_mp_main`, `ActorFailure`,
  `pretty_struct`, `_pformat`
 - added bottom-of-file late imports
  `from ._trio import trio_proc` and
  `from ._mp import mp_proc` with a one-line
  comment explaining why (circular dep)
 - `_methods` dict unchanged structurally; still binds
  `'trio' → trio_proc`, `'mp_spawn' → mp_proc`,
  `'mp_forkserver' → mp_proc`
 ### `tractor/spawn/__init__.py` (modified)
 > `git diff 61a73ba~1..61a73ba -- tractor/spawn/__init__.py`
 Docstring rewrite only — no code. New text describes
 the per-backend `Layout` with `- ._spawn` (core),
 `- ._trio` (default), `- ._mp` (two mp variants), and
 reiterates the "no eager import" NOTE.
 ### `tractor/_testing/pytest.py` (modified)
 > `git diff 61a73ba~1..61a73ba -- tractor/_testing/pytest.py`
 Two-line logical change (plus a comment):
 - `from typing import` grows `get_args`
 - `pytest_generate_tests()` replaces the hardcoded
  `('mp_spawn','mp_forkserver','trio')` tuple with
  `get_args(SpawnMethodKey)` via lazy import
  `from tractor.spawn._spawn import SpawnMethodKey`
  inside the function body (matching the project's
  existing lazy-import convention for the spawn
  module)
 ## Non-code output (verbatim)
 ### Design rationale for the file split
 From my Phase A plan turn (already captured in
 `20260417T034918Z_9703210_prompt_io.md`):
 - `_spawn.py` stays as the "core" rather than renamed
  to `_core.py` so external imports
  (`_testing/pytest.py:228` imports
  `try_set_start_method` from `tractor.spawn._spawn`)
  keep working without churn.
 - Per-backend extraction chosen over alternatives
  (e.g. splitting generic helpers further) because
  the immediate motivation is hosting a 3rd
  `_subint.py` sibling cleanly in Phase B.
 ### Sanity-check output (verbatim terminal excerpts)
 Post-extraction import probe:
 ```
 extraction OK
 _methods: {'trio': 'tractor.spawn._trio.trio_proc',
           'mp_spawn': 'tractor.spawn._mp.mp_proc',
           'mp_forkserver': 'tractor.spawn._mp.mp_proc'}
 ```
 Spawn-relevant test subset (`tests/test_local.py
 test_rpc.py test_spawning.py test_multi_program.py
 test_discovery.py`):
 ```
 37 passed, 1 skipped, 14 warnings in 55.37s
 ```
 Full suite:
 ```
 350 passed, 14 skipped, 7 xfailed, 1 xpassed,
 151 warnings in 437.73s (0:07:17)
 ```
 No regressions vs. `main`. One transient `-x`
 early-stop `ERROR` on
 `test_close_channel_explicit_remote_registrar[trio-True]`
 was flaky (passed solo, passed without `-x`), not
 caused by this refactor.
 ### Commit message
 Also AI-drafted (via `/commit-msg`) — the 40-line
 message on commit `61a73bae` itself. Not reproduced
 here; see `git log -1 61a73bae`.
--- a/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.md
+++ b/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.md
@ -1,97 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 session: subints-spawner-phase-b1-impl
 timestamp: 2026-04-17T12:44:37Z
 git_ref: 5cd6df5
 scope: code
 substantive: true
 raw_file: 20260417T124437Z_5cd6df5_prompt_io.raw.md
 ---
 ## Prompt
 Continuing the Phase B work from the design sessions
 logged in `20260417T034918Z_9703210_prompt_io.md` and
 the Phase A impl in `20260417T035800Z_61a73ba_...`.
 User direction at this step:
 - "ok now let's continue with the subints part of the
  plan ya?" → kick off Phase B.
 - After I proposed a 4-sub-phase breakdown (B.1
  scaffolding, B.2 min-viable spawn, B.3 lifecycle,
  B.4 test enablement), user: "ok can we continue
  with B" → approving the phasing and design
  defaults (thread-per-subint, UDS default,
  code-string bootstrap via `interp.exec()`).
 - User switched the worktree to
  `subint_spawner_backend` (instead of opening a
  fresh worktree as I'd proposed): "i already
  switched branches in the wkt, you should work off
  `subint_spawner_backend` now".
 Scope of this turn: **B.1 scaffolding only** —
 wire the `'subint'` name through the spawn-method
 registry + harness, add a feature-detected stub
 module, bump pyproject. No actual spawn flow.
 ## Response summary
 Produced the Phase B.1 scaffolding patch, landed as
 commit `5cd6df58` (subject: `Add `'subint'` spawn
 backend scaffold (#379)`). Four files changed,
 +124 / -2 lines.
 Key pieces (all generated by claude, reviewed by
 human before commit):
 - `tractor/spawn/_subint.py` — **new**; feature-
  detects `concurrent.interpreters`; `subint_proc()`
  stub raises `RuntimeError` on py<3.14 or
  `NotImplementedError` with issue-#379 URL on
  py≥3.14. Signature mirrors `trio_proc`/`mp_proc`
  so B.2 can drop the impl in without touching
  `_methods`.
 - `tractor/spawn/_spawn.py` — adds `'subint'` to
  `SpawnMethodKey`, grows a `case 'subint'` arm in
  `try_set_start_method()` with feature-gate, re-
  imports `sys` for the gate-error msg, adds late
  `from ._subint import subint_proc` import and
  `_methods` entry.
 - `tractor/_testing/pytest.py` — converts the
  gate-error into `pytest.UsageError` via a
  `try/except` around `try_set_start_method()` so
  `--spawn-backend=subint` on py<3.14 prints a
  clean banner instead of a traceback.
 - `pyproject.toml` — pin `requires-python` `<3.14`
  → `<3.15`, add `3.14` trove classifier.
 ## Files changed
 See `git diff 5cd6df5~1..5cd6df5 --stat`:
 ```
 pyproject.toml             |   3 +-
 tractor/_testing/pytest.py |   8 +++-
 tractor/spawn/_spawn.py    |  15 ++++++
 tractor/spawn/_subint.py   | 100 +++++++++++++++++++++++++
 4 files changed, 124 insertions(+), 2 deletions(-)
 ```
 Validation on py3.13:
 - registry/gate probe — OK; `_has_subints = False`,
  `subint_proc()` raises `RuntimeError` as expected.
 - `try_set_start_method('subint')` — raises cleanly.
 - `pytest --spawn-backend=subint` — exits with
  `pytest.UsageError` banner.
 - Spawn-relevant test subset — 69 passed, 1 skipped.
 - Full suite on py3.13 deferred (scaffolding is
  no-op on <3.14).
 - Full-suite on py3.14 pending user setting up the
  venv.
 ## Human edits
 None — committed as generated by claude. `uv.lock`
 was intentionally left unstaged by the user (pin-
 bump regenerated cp314 wheel entries but they were
 deferred to a follow-up).
--- a/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260417T124437Z_5cd6df5_prompt_io.raw.md
@ -1,168 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 timestamp: 2026-04-17T12:44:37Z
 git_ref: 5cd6df5
 diff_cmd: git diff 5cd6df5~1..5cd6df5
 ---
 Code generated in this turn was committed verbatim as
 `5cd6df58` ("Add `'subint'` spawn backend scaffold
 (#379)"). Per diff-ref mode, per-file code is captured
 via the pointers below, each followed by a prose
 summary. Non-code output (sanity-check results,
 design rationale) is included verbatim.
 ## Per-file generated content
 ### `tractor/spawn/_subint.py` (new, 100 lines)
 > `git diff 5cd6df5~1..5cd6df5 -- tractor/spawn/_subint.py`
 New scaffolding module for the PEP 734 subinterpreter
 backend. Contents:
 - AGPL header + module docstring (describes backend
  intent, 3.14+ availability gate, and explicit
  "SCAFFOLDING STUB" status pointing at issue #379).
 - Top-level `try/except ImportError` wrapping
  `from concurrent import interpreters as
  _interpreters` → sets module-global
  `_has_subints: bool`. This lets the registry stay
  introspectable on py<3.14 while spawn-time still
  fails cleanly.
 - `subint_proc()` coroutine with signature matching
  `trio_proc`/`mp_proc` exactly (same param names,
  defaults, and `TaskStatus[Portal]` typing) —
  intentional so Phase B.2 can drop the impl in
  without touching `_methods` or changing call-site
  binding.
 - Body raises `RuntimeError` on py<3.14 (with
  `sys.version` printed) or `NotImplementedError`
  with issue-#379 URL on py≥3.14.
 ### `tractor/spawn/_spawn.py` (modified, +15 LOC)
 > `git diff 5cd6df5~1..5cd6df5 -- tractor/spawn/_spawn.py`
 - `import sys` re-added (pruned during Phase A, now
  needed again for the py-version string in the
  `'subint'` gate-error).
 - `SpawnMethodKey = Literal[...]` grows `'subint'` as
  the 4th member, with inline comment `# py3.14+ via
  `concurrent.interpreters` (PEP 734)`.
 - `try_set_start_method()` match-block grows a new
  `case 'subint':` arm that imports
  `from ._subint import _has_subints` lazily and
  raises `RuntimeError` with a multi-line gate msg
  if unavailable.
 - Bottom-of-module late-import section grows
  `from ._subint import subint_proc` alongside the
  existing `_trio` / `_mp` imports.
 - `_methods` dict grows `'subint': subint_proc`.
 ### `tractor/_testing/pytest.py` (modified, +8 LOC)
 > `git diff 5cd6df5~1..5cd6df5 -- tractor/_testing/pytest.py`
 `pytest_configure()` wraps the
 `try_set_start_method(backend)` call in a
 `try/except RuntimeError` that re-raises as
 `pytest.UsageError(str(err))`. Rationale: the gate
 error on py<3.14 is legitimately a configuration
 problem, not a test failure, so pytest's UsageError
 path (exit code 4) gives a clean single-line banner
 instead of a traceback.
 ### `pyproject.toml` (modified, +2 / -1)
 > `git diff 5cd6df5~1..5cd6df5 -- pyproject.toml`
 - `requires-python` pin relaxed `>=3.12, <3.14` →
  `>=3.12, <3.15` to admit 3.14 as a valid target.
 - Added `"Programming Language :: Python :: 3.14"`
  to the trove classifiers.
 ## Non-code output (verbatim)
 ### Design choices captured in the prior turn
 (Previously confirmed by the user on April 17 in this
 session — relevant excerpts captured here for
 provenance since they drove the shape of the impl):
 1. **Python pin**: `<3.15` + feature-gate the subint
   backend at spawn time — user answered option (a)
   from the design triage.
 2. **Deferred `fork()`-via-subint hack** from issue
   #379's "Our own thoughts" section.
 3. **Phase B phasing**: user approved the B.1 / B.2 /
   B.3 / B.4 breakdown — this commit is strictly B.1
   (scaffolding only, no spawn-flow impl).
 4. **Option (B) worktree strategy**: new worktree
   branched from `wkt/spawn_modularize`. *(Amended by
   user at runtime: user switched the existing
   `spawn_modularize` worktree to the
   `subint_spawner_backend` branch instead.)*
 ### Sanity-check output (verbatim terminal excerpts)
 Registry / feature-gate verification on py3.13:
 ```
 SpawnMethodKey values: ('trio', 'mp_spawn',
                       'mp_forkserver', 'subint')
 _methods keys: ['trio', 'mp_spawn',
                'mp_forkserver', 'subint']
 _has_subints: False (py version: (3, 13) )
 [expected] RuntimeError: The 'subint' spawn backend
 requires Python 3.14+ (stdlib
 `concurrent.interpreters`, PEP 734).
 ```
 `try_set_start_method('subint')` gate on py3.13:
 ```
 [expected] RuntimeError: Spawn method 'subint'
 requires Python 3.14+ (stdlib
 `concurrent.interpreters`, PEP 734).
 ```
 Pytest `--spawn-backend=subint` on py3.13 (the new
 UsageError wrapper kicking in):
 ```
 ERROR: Spawn method 'subint' requires Python 3.14+
 (stdlib `concurrent.interpreters`, PEP 734).
 Current runtime: 3.13.11 (main, Dec  5 2025,
 16:06:33) [GCC 15.2.0]
 ```
 Collection probe: `404 tests collected in 0.18s`
 (no import errors from the new module).
 Spawn-relevant test subset (`tests/test_local.py
 test_rpc.py test_spawning.py test_multi_program.py
 tests/discovery/`):
 ```
 69 passed, 1 skipped, 10 warnings in 61.38s
 ```
 Full suite was **not** run on py3.13 for this commit
 — the scaffolding is no-op on <3.14 and full-suite
 validation under py3.14 is pending that venv being
 set up by the user.
 ### Commit message
 Also AI-drafted (via `/commit-msg`, with the prose
 rewrapped through `/home/goodboy/.claude/skills/pr-msg/
 scripts/rewrap.py --width 67`) — the 33-line message
 on commit `5cd6df58` itself. Not reproduced here; see
 `git log -1 5cd6df58`.
 ### Known follow-ups flagged to user
 - **`uv.lock` deferred**: pin-bump regenerated cp314
  wheel entries in `uv.lock`, but the user chose to
  not stage `uv.lock` for this commit. Warned
  explicitly.
 - **Phase B.2 needs py3.14 venv** — running the
  actual subint impl requires it; user said they'd
  set it up separately.
--- a/ai/prompt-io/claude/20260418T042526Z_26fb820_prompt_io.md
+++ b/ai/prompt-io/claude/20260418T042526Z_26fb820_prompt_io.md
@ -1,117 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 session: subints-phase-b2-destroy-race-fix
 timestamp: 2026-04-18T04:25:26Z
 git_ref: 26fb820
 scope: code
 substantive: true
 raw_file: 20260418T042526Z_26fb820_prompt_io.raw.md
 ---
 ## Prompt
 Follow-up to Phase B.2 (`5cd6df58`) after the user
 observed intermittent mid-suite hangs when running
 the tractor test suite under `--spawn-backend=subint`
 on py3.14. The specific sequence of prompts over
 several turns:
 1. User pointed at the `test_context_stream_semantics.py`
   suite as the first thing to make run clean under
   `--spawn-backend=subint`.
 2. After a series of `timeout`-terminated runs that
   gave no diagnostic info, user nudged me to stop
   relying on `timeout` and get actual runtime
   diagnostics ("the suite hangs indefinitely, so i
   don't think this `timeout 30` is helping you at
   all.."). Switched to
   `faulthandler.dump_traceback_later(...)` and a
   resource-tracker fixture to rule out leaks.
 3. Captured a stack pinning the hang on
   `_interpreters.destroy(interp_id)` in the subint
   teardown finally block.
 4. Proposed dedicated-OS-thread fix. User greenlit.
 5. Implemented + verified on-worktree; user needed
   to be pointed at the *worktree*'s `./py313` venv
   because bare `pytest` was picking up the main
   repo's venv (running un-patched `_subint.py`) and
   still hanging.
 Running theme over the whole exchange: this patch
 only closes the *destroy race*. The user and I also
 traced through the deeper cancellation story — SIGINT
 can't reach subints, legacy-mode shares the GIL,
 portal-cancel dies when the IPC channel is already
 broken — and agreed the next step is a bounded
 hard-kill in `subint_proc`'s teardown plus a
 dedicated cancellation test suite. Those land as
 separate commits.
 ## Response summary
 Produced the `tractor/spawn/_subint.py` patch landed
 as commit `26fb8206` ("Fix subint destroy race via
 dedicated OS thread"). One file, +110/-84 LOC.
 Mechanism: swap `trio.to_thread.run_sync(_interpreters
 .exec, ...)` for a plain `threading.Thread(target=...
 , daemon=False)`. The trio thread cache recycles
 workers — so the OS thread that ran `_interpreters
 .exec()` remained alive in the cache holding a
 stale subint tstate, blocking
 `_interpreters.destroy()` in the finally indefinitely.
 A dedicated one-shot thread exits naturally after
 the sync target returns, releasing tstate and
 unblocking destroy.
 Coordination across the trio↔thread boundary:
 - `trio.lowlevel.current_trio_token()` captured at
  `subint_proc` entry
 - driver thread signals `subint_exited.set()` back
  to parent trio via `trio.from_thread.run_sync(...,
  trio_token=token)` (synchronous from the thread's
  POV; the call returns after trio has run `.set()`)
 - `trio.RunFinishedError` swallowed in that path for
  the process-teardown case where parent trio already
  exited
 - teardown `finally` off-loads the sync
  `driver_thread.join()` via `to_thread.run_sync` (a
  cache thread carries no subint tstate — safe)
 ## Files changed
 See `git diff 26fb820~1..26fb820 --stat`:
 ```
 tractor/spawn/_subint.py | 194 +++++++++++++++++++------------
 1 file changed, 110 insertions(+), 84 deletions(-)
 ```
 Validation:
 - `test_parent_cancels[chk_ctx_result_before_exit=True-
  cancel_method=ctx-child_returns_early=False]`
  (the specific test that was hanging for the user)
  — passed in 1.06s.
 - Full `tests/test_context_stream_semantics.py` under
  subint — 61 passed in 100.35s (clean-cache re-run:
  100.82s).
 - Trio backend regression subset — 69 passed / 1
  skipped / 89.19s — no regressions from this change.
 ## Files changed
 Beyond the `_subint.py` patch, the raw log also
 records the cancellation-semantics research that
 spanned this conversation but did not ship as code
 in *this* commit. Preserving it inline under "Non-
 code output" because it directly informs the
 Phase B.3 hard-kill impl that will follow (and any
 upstream CPython bug reports we end up filing).
 ## Human edits
 None — committed as generated. The commit message
 itself was also AI-drafted via `/commit-msg` and
 rewrapped via the project's `rewrap.py --width 67`
 tooling; user landed it without edits.
--- a/ai/prompt-io/claude/20260418T042526Z_26fb820_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260418T042526Z_26fb820_prompt_io.raw.md
@ -1,220 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 timestamp: 2026-04-18T04:25:26Z
 git_ref: 26fb820
 diff_cmd: git diff 26fb820~1..26fb820
 ---
 Code generated in this turn was committed as `26fb8206`
 ("Fix subint destroy race via dedicated OS thread").
 Per diff-ref mode, per-file code is captured via the
 pointer below; the rest is verbatim non-code output
 — test-session diagnostics, CPython-internals
 reasoning, and the design synthesis that's worth
 keeping alongside the code since it informs the
 still-pending Phase B.3 hard-kill work.
 ## Per-file generated content
 ### `tractor/spawn/_subint.py` (modified, +110/-84 LOC)
 > `git diff 26fb820~1..26fb820 -- tractor/spawn/_subint.py`
 Rewrites the subint driver-thread strategy:
 - replaces `trio.to_thread.run_sync(_interpreters.exec, ...)`
  with a plain `threading.Thread(target=_subint_target,
  daemon=False)` so the OS thread truly exits after
  `_interpreters.exec()` returns
 - captures a `trio.lowlevel.current_trio_token()` at
  `subint_proc` entry; the driver thread signals
  completion back via `trio.from_thread.run_sync(
  subint_exited.set, trio_token=...)`
 - swallows `trio.RunFinishedError` in the signal path
  for the case where the parent trio loop has already
  exited (process teardown)
 - in the teardown `finally` off-loads the sync
  `driver_thread.join()` call to `trio.to_thread.run_sync`
  (a cache thread w/ no subint tstate — so no cache
  conflict) to wait for the driver thread to fully
  exit before calling `_interpreters.destroy()`
 ## Non-code output (verbatim) — the CPython-internals research
 ### What went wrong before this commit
 Under `--spawn-backend=subint` on py3.14, most single
 tests passed but longer runs hung intermittently. The
 position of the hang moved between runs (test #22 on
 one run, test #53 on another) suggesting a timing-
 dependent race rather than a deterministic bug.
 `faulthandler.dump_traceback_later()` eventually
 caught a stack with the main thread blocked in
 `_interpreters.destroy(interp_id)` at `_subint.py:293`.
 Only 2 threads were alive:
 - main thread waiting in `_interpreters.destroy()`
 - one idle trio thread-cache worker in
  `trio._core._thread_cache._work`
 No subint was still running (`_interpreters.list_all()`
 showed only the main interp). A resource-tracker
 pytest fixture confirmed threads/subints did NOT
 accumulate across tests — this was not a leak but a
 specific "destroy blocks on cached thread w/ stale
 tstate" race.
 ### Why the race exists
 `trio.to_thread.run_sync` uses a thread *cache* to
 avoid OS-thread creation overhead. When the sync
 callable returns, the OS thread is NOT terminated —
 it's parked in `_thread_cache._work` waiting for the
 next job. CPython's subinterpreter implementation
 attaches a **tstate** (thread-state object) to each
 OS thread that ever entered a subint via
 `_interpreters.exec()`. That tstate is released
 lazily — either when the thread picks up a new job
 (which re-attaches a new tstate, evicting the old
 one) or when the thread truly exits.
 `_interpreters.destroy(interp_id)` waits for *all*
 tstates associated w/ that subint to be released
 before it can proceed. If the cached worker is idle
 holding the stale tstate, destroy blocks indefinitely.
 Whether the race manifests depends on timing — if
 the cached thread happens to pick up another job
 quickly, destroy unblocks; if it sits idle, we hang.
 ### Why a dedicated `threading.Thread` fixes it
 A plain `threading.Thread(target=_subint_target,
 daemon=False)` runs its target once and exits. When
 the target returns, OS-thread teardown (`_bootstrap_inner`
 → `_bootstrap`) fires and CPython releases the
 tstate for that thread. `_interpreters.destroy()`
 then has no blocker.
 ### Diagnostic tactics that actually helped
 1. `faulthandler.dump_traceback_later(n, repeat=False,
   file=open(path, 'w'))` for captured stack dumps on
   hang. Critically, pipe to a `file=` not stderr —
   pytest captures stderr weirdly and the dump is
   easy to miss.
 2. A resource-tracker autouse fixture printing
   per-test `threading.active_count()` +
   `len(_interpreters.list_all())` deltas → ruled out
   leak-accumulation theories quickly.
 3. Running the hanging test *solo* vs in-suite —
   when solo passes but in-suite hangs, you know
   it's a cross-test state-transfer bug rather than
   a test-internal bug.
 ### Design synthesis — SIGINT + subints + SC
 The user and I walked through the cancellation
 semantics of PEP 684/734 subinterpreters in detail.
 Key findings we want to preserve:
 **Signal delivery in subints (stdlib limitation).**
 CPython's signal machinery only delivers signals
 (SIGINT included) to the *main thread of the main
 interpreter*. Subints cannot install signal handlers
 that will ever fire. This is an intentional design
 choice in PEP 684 and not expected to change. For
 tractor's subint actors, this means:
 - Ctrl-C never reaches a subint directly.
 - `trio.run()` running on a worker thread (as we do
  for subints) already skips SIGINT handler install
  because `signal.signal()` raises on non-main
  threads.
 - The only cancellation surface into a subint is
  our IPC `Portal.cancel_actor()`.
 **Legacy-mode subints share the main GIL** (which
 our impl uses since `msgspec` lacks PEP 684 support
 per `jcrist/msgspec#563`). This means a stuck subint
 thread can starve the parent's trio loop during
 cancellation — the parent can't even *start* its
 teardown handling until the subint yields the GIL.
 **Failure modes identified for Phase B.3 audit:**
 1. Portal cancel lands cleanly → subint unwinds →
   thread exits → destroy succeeds. (Happy path.)
 2. IPC channel is already broken when we try to
   send cancel (e.g., `test_ipc_channel_break_*`)
   → cancel raises `BrokenResourceError` → subint
   keeps running unaware → parent hangs waiting for
   `subint_exited`. This is what breaks
   `test_advanced_faults.py` under subint.
 3. Subint is stuck in non-checkpointing Python code
   → portal-cancel msg queued but never processed.
 4. Subint is in a shielded cancel scope when cancel
   arrives → delay until shield exits.
 **Current teardown has a shield-bug too:**
 `trio.CancelScope(shield=True)` wrapping the `finally`
 block absorbs Ctrl-C, so even when the user tries
 to break out they can't. This is the reason
 `test_ipc_channel_break_during_stream[break_parent-...
 no_msgstream_aclose]` locks up unkillable.
 **B.3 hard-kill fix plan (next commit):**
 1. Bound `driver_thread.join()` with
   `trio.move_on_after(HARD_KILL_TIMEOUT)`.
 2. If it times out, log a warning naming the
   `interp_id` and switch the driver thread to
   `daemon=True` mode (not actually possible after
   start — so instead create as daemon=True upfront
   and accept the tradeoff of proc-exit not waiting
   for a stuck subint).
 3. Best-effort `_interpreters.destroy()`; catch the
   `InterpreterError` if the subint is still running.
 4. Document that the leak is real and the only
   escape hatch we have without upstream cooperation.
 **Test plan for Phase B.3:**
 New `tests/test_subint_cancellation.py` covering:
 - SIGINT at spawn
 - SIGINT mid-portal-RPC
 - SIGINT during shielded section in subint
 - Dead-channel cancel (mirror of `test_ipc_channel_
  break_during_stream` minimized)
 - Non-checkpointing subint (tight `while True` in
  user code)
 - Per-test `pytest-timeout`-style bounds so the
  tests visibly fail instead of wedging the runner
 ### Sanity-check output (verbatim terminal excerpts)
 Post-fix single-test validation:
 ```
 1 passed, 1 warning in 1.06s
 ```
 (same test that was hanging pre-fix:
 `test_parent_cancels[...cancel_method=ctx-...False]`)
 Full `tests/test_context_stream_semantics.py`
 under subint:
 ```
 61 passed, 1 warning in 100.35s (0:01:40)
 ```
 and a clean-cache re-run:
 ```
 61 passed, 1 warning in 100.82s (0:01:40)
 ```
 No regressions on trio backend (same subset):
 ```
 69 passed, 1 skipped, 3 warnings in 89.19s
 ```
 ### Commit msg
 Also AI-drafted via `/commit-msg` + `rewrap.py
 --width 67`. See `git log -1 26fb820`.
--- a/ai/prompt-io/claude/20260420T192739Z_5e8cd8b2_prompt_io.md
+++ b/ai/prompt-io/claude/20260420T192739Z_5e8cd8b2_prompt_io.md
@ -1,111 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 session: subint-phase-b-hang-classification
 timestamp: 2026-04-20T19:27:39Z
 git_ref: HEAD (pre-commit; on branch `subint_spawner_backend`)
 scope: docs
 substantive: true
 raw_file: 20260420T192739Z_5e8cd8b2_prompt_io.raw.md
 ---
 ## Prompt
 Collab between user (`goodboy`) and `claude` to
 disambiguate two distinct hang modes hit during
 Phase B subint-spawn-backend bringup (issue #379).
 The user ran the failing suites, captured `strace`
 evidence on hung pytest pids, and set the framing:
 > "These cannot be the same bug. Different fix
 > paths. Write them up separately or we'll keep
 > conflating them."
 Follow-on asks:
 - Cross-link each doc from its reproducer test so
  a future triager lands on the analysis without
  needing to grep `ai/conc-anal/`.
 - On `test_stale_entry_is_deleted`: arm
  `dump_on_hang(seconds=20, path=...)` so a
  regression captures a stack dump (pytest stderr
  capture otherwise eats `faulthandler` output).
 - Keep `test_stale_entry_is_deleted` un-`skip`ped
  so the dump file remains inspectable.
 Defer the actual fix for the class-2 hang
 (Ctrl-C-able, tractor-side bug) to a later commit
 per the incremental-commits preference.
 ## Response summary
 Wrote two new `ai/conc-anal/` classification docs
 and cross-linked them from the reproducer tests:
 1. **`subint_sigint_starvation_issue.md`** —
   class 1: abandoned-legacy-subint thread +
   shared GIL starves main trio loop →
   signal-wakeup-fd pipe fills → SIGINT silently
   dropped (`write() = EAGAIN`). Pytest process
   un-Ctrl-C-able. Structurally a CPython limit;
   blocked on `msgspec` PEP 684 support
   (jcrist/msgspec#563). Reproducer:
   `test_stale_entry_is_deleted[subint]`.
 2. **`subint_cancel_delivery_hang_issue.md`** —
   class 2: parent-side trio task parks on an
   orphaned IPC channel after subint teardown;
   no clean EOF delivered to waiting receiver.
   Ctrl-C-able (main trio loop iterating fine).
   OUR bug to fix. Candidate fix: explicit
   parent-side channel abort in `subint_proc`'s
   hard-kill teardown. Reproducer:
   `test_subint_non_checkpointing_child`.
 Test-side cross-links:
 - `tests/discovery/test_registrar.py`:
  `test_stale_entry_is_deleted` → `trio.run(main)`
  wrapped in `dump_on_hang(seconds=20,
  path=<per-method-tmp>)`; long inline comment
  summarizes `strace` evidence + root-cause chain
  and points at both docs.
 - `tests/test_subint_cancellation.py`:
  `test_subint_non_checkpointing_child` docstring
  extended with "KNOWN ISSUE (Ctrl-C-able hang)"
  section pointing at the class-2 doc + noting
  the class-1 doc is NOT what this test hits.
 ## Files changed
 - `ai/conc-anal/subint_sigint_starvation_issue.md`
  — new, 205 LOC
 - `ai/conc-anal/subint_cancel_delivery_hang_issue.md`
  — new, 161 LOC
 - `tests/discovery/test_registrar.py` — +52/-1
  (arm `dump_on_hang`, inline-comment cross-link)
 - `tests/test_subint_cancellation.py` — +26
  (docstring "KNOWN ISSUE" block)
 ## Human edits
 Substantive collab — prose was jointly iterated:
 - User framed the two-doc split, set the
  classification criteria (Ctrl-C-able vs not),
  and provided the `strace` evidence.
 - User decided to keep `test_stale_entry_is_deleted`
  un-`skip`ped (my initial suggestion was
  `pytestmark.skipif(spawn_backend=='subint')`).
 - User chose the candidate fix ordering for
  class 2 and marked "explicit parent-side channel
  abort" as the surgical preferred fix.
 - User picked the file naming convention
  (`subint_<hang-shape>_issue.md`) over my initial
  `hang_class_{1,2}.md`.
 - Assistant drafted the prose, aggregated prior-
  session root-cause findings from Phase B.2/B.3
  bringup, and wrote the test-side cross-linking
  comments.
 No further mechanical edits expected before
 commit; user may still rewrap via
 `scripts/rewrap.py` if preferred.
--- a/ai/prompt-io/claude/20260420T192739Z_5e8cd8b2_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260420T192739Z_5e8cd8b2_prompt_io.raw.md
@ -1,198 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 timestamp: 2026-04-20T19:27:39Z
 git_ref: HEAD (pre-commit; will land on branch `subint_spawner_backend`)
 diff_cmd: git diff HEAD~1..HEAD
 ---
 Collab between `goodboy` (user) and `claude` (this
 assistant) spanning multiple test-run iterations on
 branch `subint_spawner_backend`. The user ran the
 failing suites, captured `strace` evidence on the
 hung pytest pids, and set the direction ("these are
 two different hangs — write them up separately so
 we don't re-confuse ourselves later"). The assistant
 aggregated prior-session findings (Phase B.2/B.3
 bringup) into two classification docs + test-side
 cross-links. All prose was jointly iterated; the
 user had final say on framing and decided which
 candidate fix directions to list.
 ## Per-file generated content
 ### `ai/conc-anal/subint_sigint_starvation_issue.md` (new, 205 LOC)
 > `git diff HEAD~1..HEAD -- ai/conc-anal/subint_sigint_starvation_issue.md`
 Writes up the "abandoned-legacy-subint thread wedges
 the parent trio loop" class. Key sections:
 - **Symptom** — `test_stale_entry_is_deleted[subint]`
  hangs indefinitely AND is un-Ctrl-C-able.
 - **Evidence** — annotated `strace` excerpt showing
  SIGINT delivered to pytest, C-level signal handler
  tries to write to the signal-wakeup-fd pipe, gets
  `write() = -1 EAGAIN (Resource temporarily
  unavailable)`. Pipe is full because main trio loop
  isn't iterating often enough to drain it.
 - **Root-cause chain** — our hard-kill abandons the
  `daemon=True` driver OS thread after
  `_HARD_KILL_TIMEOUT`; the subint *inside* that
  thread is still running `trio.run()`;
  `_interpreters.destroy()` cannot force-stop a
  running subint (raises `InterpreterError`); legacy
  subints share the main GIL → abandoned subint
  starves main trio loop → wakeup-fd fills → SIGINT
  silently dropped.
 - **Why it's structurally a CPython limit** — no
  public force-destroy primitive for a running
  subint; the only escape is per-interpreter GIL
  isolation, gated on msgspec PEP 684 adoption
  (jcrist/msgspec#563).
 - **Current escape hatch** — harness-side SIGINT
  loop in the `daemon` fixture teardown that kills
  the bg registrar subproc, eventually unblocking
  a parent-side recv enough for the main loop to
  drain the wakeup pipe.
 ### `ai/conc-anal/subint_cancel_delivery_hang_issue.md` (new, 161 LOC)
 > `git diff HEAD~1..HEAD -- ai/conc-anal/subint_cancel_delivery_hang_issue.md`
 Writes up the *sibling* hang class — same subint
 backend, distinct root cause:
 - **TL;DR** — Ctrl-C-able, so NOT the SIGINT-
  starvation class; main trio loop iterates fine;
  ours to fix.
 - **Symptom** — `test_subint_non_checkpointing_child`
  hangs past the expected `_HARD_KILL_TIMEOUT`
  budget even after the subint is torn down.
 - **Diagnosis** — a parent-side trio task (likely
  a `chan.recv()` in `process_messages`) parks on
  an orphaned IPC channel; channel was torn down
  without emitting a clean EOF /
  `BrokenResourceError` to the waiting receiver.
 - **Candidate fix directions** — listed in rough
  order of preference:
  1. Explicit parent-side channel abort in
     `subint_proc`'s hard-kill teardown (surgical;
     most likely).
  2. Audit `process_messages` to add a timeout or
     cancel-scope protection that catches the
     orphaned-recv state.
  3. Wrap subint IPC channel construction in a
     sentinel that can force-close from the parent
     side regardless of subint liveness.
 ### `tests/discovery/test_registrar.py` (modified, +52/-1 LOC)
 > `git diff HEAD~1..HEAD -- tests/discovery/test_registrar.py`
 Wraps the `trio.run(main)` call at the bottom of
 `test_stale_entry_is_deleted` in
 `dump_on_hang(seconds=20, path=<per-method-tmp>)`.
 Adds a long inline comment that:
 - Enumerates variant-by-variant status
  (`[trio]`/`[mp_*]` = clean; `[subint]` = hangs
  + un-Ctrl-C-able)
 - Summarizes the `strace` evidence and root-cause
  chain inline (so a future reader hitting this
  test doesn't need to cross-ref the doc to
  understand the hang shape)
 - Points at
  `ai/conc-anal/subint_sigint_starvation_issue.md`
  for full analysis
 - Cross-links to the *sibling*
  `subint_cancel_delivery_hang_issue.md` so
  readers can tell the two classes apart
 - Explains why it's kept un-`skip`ped: the dump
  file is useful if the hang ever returns after
  a refactor. pytest stderr capture would
  otherwise eat `faulthandler` output, hence the
  file path.
 ### `tests/test_subint_cancellation.py` (modified, +26 LOC)
 > `git diff HEAD~1..HEAD -- tests/test_subint_cancellation.py`
 Extends the docstring of
 `test_subint_non_checkpointing_child` with a
 "KNOWN ISSUE (Ctrl-C-able hang)" block:
 - Describes the current hang: parent-side orphaned
  IPC recv after hard-kill; distinct from the
  SIGINT-starvation sibling class.
 - Cites `strace` distinguishing signal: wakeup-fd
  `write() = 1` (not `EAGAIN`) — i.e. main loop
  iterating.
 - Points at
  `ai/conc-anal/subint_cancel_delivery_hang_issue.md`
  for full analysis + candidate fix directions.
 - Clarifies that the *other* sibling doc
  (SIGINT-starvation) is NOT what this test hits.
 ## Non-code output
 ### Classification reasoning (why two docs, not one)
 The user and I converged on the two-doc split after
 running the suites and noticing two *qualitatively
 different* hang symptoms:
 1. `test_stale_entry_is_deleted[subint]` — pytest
   process un-Ctrl-C-able. Ctrl-C at the terminal
   does nothing. Must kill-9 from another shell.
 2. `test_subint_non_checkpointing_child` — pytest
   process Ctrl-C-able. One Ctrl-C at the prompt
   unblocks cleanly and the test reports a hang
   via pytest-timeout.
 From the user: "These cannot be the same bug.
 Different fix paths. Write them up separately or
 we'll keep conflating them."
 `strace` on the `[subint]` hang gave the decisive
 signal for the first class:
 ```
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(5, "\2", 1) = -1 EAGAIN (Resource temporarily unavailable)
 ```
 fd 5 is Python's signal-wakeup-fd pipe. `EAGAIN`
 on a `write()` of 1 byte to a pipe means the pipe
 buffer is full → reader side (main Python thread
 inside `trio.run()`) isn't consuming. That's the
 GIL-hostage signature.
 The second class's `strace` showed `write(5, "\2",
 1) = 1` — clean drain — so the main trio loop was
 iterating and the hang had to be on the application
 side of things, not the kernel-↔-Python signal
 boundary.
 ### Why the candidate fix for class 2 is "explicit parent-side channel abort"
 The second hang class has the trio loop alive. A
 parked `chan.recv()` that will never get bytes is
 fundamentally a tractor-side resource-lifetime bug
 — the IPC channel was torn down (subint destroyed)
 but no one explicitly raised
 `BrokenResourceError` at the parent-side receiver.
 The `subint_proc` hard-kill path is the natural
 place to add that notification, because it already
 knows the subint is unreachable at that point.
 Alternative fix paths (blanket timeouts on
 `process_messages`, sentinel-wrapped channels) are
 less surgical and risk masking unrelated bugs —
 hence the preference ordering in the doc.
 ### Why we're not just patching the code now
 The user explicitly deferred the fix to a later
 commit: "Document both classes now, land the fix
 for class 2 separately so the diff reviews clean."
 This matches the incremental-commits preference
 from memory.
--- a/ai/prompt-io/claude/20260422T200723Z_797f57c_prompt_io.md
+++ b/ai/prompt-io/claude/20260422T200723Z_797f57c_prompt_io.md
@ -1,155 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 session: subints-phase-b-hardening-and-fork-block
 timestamp: 2026-04-22T20:07:23Z
 git_ref: 797f57c
 scope: code
 substantive: true
 raw_file: 20260422T200723Z_797f57c_prompt_io.raw.md
 ---
 ## Prompt
 Session-spanning work on the Phase B `subint` spawn-backend.
 Three distinct sub-phases in one log:
 1. **Py3.13 gate tightening** — diagnose a reproducible hang
   of subint spawn flow under py3.13 (works on py3.14), trace
   to a private `_interpreters` module vintage issue, tighten
   our feature gate from "`_interpreters` present" to "public
   `concurrent.interpreters` present" (i.e. py3.14+).
 2. **Test-harness hardening** — add `pytest-timeout` dep, put
   `@pytest.mark.timeout(30, method='thread')` on the
   three known-hanging subint tests cataloged in
   `ai/conc-anal/subint_sigint_starvation_issue.md`. Separately,
   code-review the user's in-flight `skipon_spawn_backend`
   marker implementation; find four bugs; refactor to use
   `item.iter_markers()`.
 3. **`subint_fork` prototype → CPython-block finding** — draft
   a WIP `subint_fork_proc` backend using a sub-interpreter as
   a launchpad for `os.fork()` (to sidestep trio#1614). User
   tests on py3.14, hits
   `Fatal Python error: _PyInterpreterState_DeleteExceptMain:
   not main interpreter`. Walk CPython sources (local clone at
   `~/repos/cpython/`) to pinpoint the refusal
   (`Modules/posixmodule.c:728` → `Python/pystate.c:1040`).
   Revert implementation to a `NotImplementedError` stub in a
   new `_subint_fork.py` submodule, document the finding in a
   third `conc-anal/` doc with an upstream-report draft for
   the CPython issue tracker. Finally, discuss user's proposed
   workaround architecture (main-interp worker-thread
   forkserver) and draft a standalone smoke-test script for
   feasibility validation.
 ## Response summary
 All three sub-phases landed concrete artifacts:
 **Sub-phase 1** — `_subint.py` + `_spawn.py` gates + error
 messages updated to require py3.14+ via the public
 `concurrent.interpreters` module presence check. Module
 docstring revised to explain the empirical reason
 (py3.13's private `_interpreters` vintage wedges under
 multi-trio-task usage even though minimal standalone
 reproducers work fine there). Test-module
 `pytest.importorskip` likewise switched.
 **Sub-phase 2** — `pytest-timeout>=2.3` added to `testing`
 dep group. `@pytest.mark.timeout(30, method='thread')`
 applied on:
 - `tests/discovery/test_registrar.py::test_stale_entry_is_deleted`
 - `tests/test_cancellation.py::test_cancel_while_childs_child_in_sync_sleep`
 - `tests/test_cancellation.py::test_multierror_fast_nursery`
 - `tests/test_subint_cancellation.py::test_subint_non_checkpointing_child`
 `method='thread'` documented inline as load-bearing — the
 GIL-starvation path that drops `SIGINT` would equally drop
 `SIGALRM`, so only a watchdog-thread timeout can reliably
 escape.
 `skipon_spawn_backend` plugin refactored into a single
 `iter_markers`-driven loop in `pytest_collection_modifyitems`
 (~30 LOC replacing ~30 LOC of nested conditionals). Four
 bugs dissolved: wrong `.get()` key, module-level `pytestmark`
 suppressing per-test marks, unhandled `pytestmark = [list]`
 form, `pytest.Makr` typo. Marker help text updated to
 document the variadic backend-list + `reason=` kwarg
 surface.
 **Sub-phase 3** — Prototype drafted (then reverted):
 - `tractor/spawn/_subint_fork.py` — new dedicated submodule
  housing the `subint_fork_proc` stub. Module docstring +
  fn docstring explain the attempt, the CPython-level
  block, and the reason for keeping the stub in-tree
  (documentation of the attempt + starting point if CPython
  ever lifts the restriction).
 - `tractor/spawn/_spawn.py` — `'subint_fork'` registered as a
  `SpawnMethodKey` literal + in `_methods`, so
  `--spawn-backend=subint_fork` routes to a clean
  `NotImplementedError` pointing at the analysis doc rather
  than an "invalid backend" error.
 - `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md` —
  third sibling conc-anal doc. Full annotated CPython
  source walkthrough from user-visible
  `Fatal Python error` → `Modules/posixmodule.c:728
  PyOS_AfterFork_Child()` → `Python/pystate.c:1040
  _PyInterpreterState_DeleteExceptMain()` gate. Includes a
  copy-paste-ready upstream-report draft for the CPython
  issue tracker with a two-tier ask (ideally "make it work",
  minimally "cleaner error than `Fatal Python error`
  aborting the child").
 - `ai/conc-anal/subint_fork_from_main_thread_smoketest.py` —
  standalone zero-tractor-import CPython-level smoke test
  for the user's proposed workaround architecture
  (forkserver on a main-interp worker thread). Four
  argparse-driven scenarios: `control_subint_thread_fork`
  (reproduces the known-broken case as a test-harness
  sanity),  `main_thread_fork` (baseline), `worker_thread_fork`
  (architectural assertion), `full_architecture`
  (end-to-end trio-in-subint in forked child). User will
  run on py3.14 next.
 ## Files changed
 See `git log 26fb820..HEAD --stat` for the canonical list.
 New files this session:
 - `tractor/spawn/_subint_fork.py`
 - `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`
 - `ai/conc-anal/subint_fork_from_main_thread_smoketest.py`
 Modified (diff pointers in raw log):
 - `tractor/spawn/_subint.py` (py3.14 gate)
 - `tractor/spawn/_spawn.py` (`subint_fork` registration)
 - `tractor/_testing/pytest.py` (`skipon_spawn_backend` refactor)
 - `pyproject.toml` (`pytest-timeout` dep)
 - `tests/discovery/test_registrar.py`,
  `tests/test_cancellation.py`,
  `tests/test_subint_cancellation.py` (timeout marks,
  cross-refs to conc-anal docs)
 ## Human edits
 Several back-and-forth iterations with user-driven
 adjustments during the session:
 - User corrected my initial mis-classification of
  `test_cancel_while_childs_child_in_sync_sleep[subint-False]`
  as Ctrl-C-able — second strace showed `EAGAIN`, putting
  it squarely in class A (GIL-starvation). Re-analysis
  preserved in the raw log.
 - User independently fixed the `.get(reason)` → `.get('reason', reason)`
  bug in the marker plugin before my review; preserved their
  fix.
 - User suggested moving the `subint_fork_proc` stub from
  the bottom of `_subint.py` into its own
  `_subint_fork.py` submodule — applied.
 - User asked to keep the forkserver-architecture
  discussion as background for the smoke-test rather than
  committing to a tractor-side refactor until the smoke
  test validates the CPython-level assumptions.
 Commit messages in this range (b025c982 … 797f57c) were
 drafted via `/commit-msg` + `rewrap.py --width 67`; user
 landed them with the usual review.
--- a/ai/prompt-io/claude/20260422T200723Z_797f57c_prompt_io.raw.md
+++ b/ai/prompt-io/claude/20260422T200723Z_797f57c_prompt_io.raw.md
@ -1,343 +0,0 @@
 ---
 model: claude-opus-4-7[1m]
 service: claude
 timestamp: 2026-04-22T20:07:23Z
 git_ref: 797f57c
 diff_cmd: git log 26fb820..HEAD  # all session commits since the destroy-race fix log
 ---
 Session-spanning conversation covering the Phase B hardening
 of the `subint` spawn-backend and an investigation into a
 proposed `subint_fork` follow-up which turned out to be
 blocked at the CPython level. This log is a narrative capture
 of the substantive turns (not every message) and references
 the concrete code + docs the session produced. Per diff-ref
 mode the actual code diffs are pointed at via `git log` on
 each ref rather than duplicated inline.
 ## Narrative of the substantive turns
 ### Py3.13 hang / gate tightening
 Diagnosed a reproducible hang of the `subint` backend under
 py3.13 (test_spawning tests wedge after root-actor bringup).
 Root cause: py3.13's vintage of the private `_interpreters` C
 module has a latent thread/subint-interaction issue that
 `_interpreters.exec()` silently fails to progress under
 tractor's multi-trio usage pattern — even though a minimal
 standalone `threading.Thread` + `_interpreters.exec()`
 reproducer works fine on the same Python. Empirically
 py3.14 fixes it.
 Fix (from this session): tighten the `_has_subints` gate in
 `tractor.spawn._subint` from "private module importable" to
 "public `concurrent.interpreters` present" — which is 3.14+
 only. This leaves `subint_proc()` unchanged in behavior (we
 still call the *private* `_interpreters.create('legacy')`
 etc. under the hood) but refuses to engage on 3.13.
 Also tightened the matching gate in
 `tractor.spawn._spawn.try_set_start_method('subint')` and
 rev'd the corresponding error messages from "3.13+" to
 "3.14+" with a sentence explaining why. Test-module
 `pytest.importorskip` switched from `_interpreters` →
 `concurrent.interpreters` to match.
 ### `pytest-timeout` dep + `skipon_spawn_backend` marker plumbing
 Added `pytest-timeout>=2.3` to the `testing` dep group with
 an inline comment pointing at the `ai/conc-anal/*.md` docs.
 Applied `@pytest.mark.timeout(30, method='thread')` (the
 `method='thread'` is load-bearing — `signal`-method
 `SIGALRM` suffers the same GIL-starvation path that drops
 `SIGINT` in the class-A hang pattern) to the three known-
 hanging subint tests cataloged in
 `subint_sigint_starvation_issue.md`.
 Separately code-reviewed the user's newly-staged
 `skipon_spawn_backend` pytest marker implementation in
 `tractor/_testing/pytest.py`. Found four bugs:
 1. `modmark.kwargs.get(reason)` called `.get()` with the
   *variable* `reason` as the dict key instead of the string
   `'reason'` — user-supplied `reason=` was never picked up.
   (User had already fixed this locally via `.get('reason',
   reason)` by the time my review happened — preserved that
   fix.)
 2. The module-level `pytestmark` branch suppressed per-test
   marker handling (the `else:` was an `else:` rather than
   independent iteration).
 3. `mod_pytestmark.mark` assumed a single
   `MarkDecorator` — broke on the valid-pytest `pytestmark =
   [mark, mark]` list form.
 4. Typo: `pytest.Makr` → `pytest.Mark`.
 Refactored the hook to use `item.iter_markers(name=...)`
 which walks function + class + module scopes uniformly and
 handles both `pytestmark` forms natively. ~30 LOC replaced
 the original ~30 LOC of nested conditionals, all four bugs
 dissolved. Also updated the marker help string to reflect
 the variadic `*start_methods` + `reason=` surface.
 ### `subint_fork_proc` prototype attempt
 User's hypothesis: the known trio+`fork()` issues
 (python-trio/trio#1614) could be sidestepped by using a
 sub-interpreter purely as a launchpad — `os.fork()` from a
 subint that has never imported trio → child is in a
 trio-free context. In the child `execv()` back into
 `python -m tractor._child` and the downstream handshake
 matches `trio_proc()` identically.
 Drafted the prototype at `tractor/spawn/_subint.py`'s bottom
 (originally — later moved to its own submod, see below):
 launchpad-subint creation, bootstrap code-string with
 `os.fork()` + `execv()`, driver-thread orchestration,
 parent-side `ipc_server.wait_for_peer()` dance. Registered
 `'subint_fork'` as a new `SpawnMethodKey` literal, added
 `case 'subint' | 'subint_fork':` feature-gate arm in
 `try_set_start_method()`, added entry in `_methods` dict.
 ### CPython-level block discovered
 User tested on py3.14 and saw:
 ```
 Fatal Python error: _PyInterpreterState_DeleteExceptMain: not main interpreter
 Python runtime state: initialized
 Current thread 0x00007f6b71a456c0 [subint-fork-lau] (most recent call first):
  File "<script>", line 2 in <module>
 <script>:2: DeprecationWarning: This process (pid=802985) is multi-threaded, use of fork() may lead to deadlocks in the child.
 ```
 Walked CPython sources (local clone at `~/repos/cpython/`):
 - **`Modules/posixmodule.c:728` `PyOS_AfterFork_Child()`** —
  post-fork child-side cleanup. Calls
  `_PyInterpreterState_DeleteExceptMain(runtime)` with
  `goto fatal_error` on non-zero status. Has the
  `// Ideally we could guarantee tstate is running main.`
  self-acknowledging-fragile comment directly above.
 - **`Python/pystate.c:1040`
  `_PyInterpreterState_DeleteExceptMain()`** — the
  refusal. Hard `PyStatus_ERR("not main interpreter")` gate
  when `tstate->interp != interpreters->main`. Docstring
  formally declares the precondition ("If there is a
  current interpreter state, it *must* be the main
  interpreter"). `XXX` comments acknowledge further latent
  issues within.
 Definitive answer to "Open Question 1" of the prototype
 docstring: **no, CPython does not support `os.fork()` from
 a non-main sub-interpreter**. Not because the fork syscall
 is blocked (it isn't — the parent returns a valid pid),
 but because the child cannot survive CPython's post-fork
 initialization. This is an enforced invariant, not an
 incidental limitation.
 ### Revert: move to stub submod + doc the finding
 Per user request:
 1. Reverted the working `subint_fork_proc` body to a
   `NotImplementedError` stub, MOVED to its own submod
   `tractor/spawn/_subint_fork.py` (keeps `_subint.py`
   focused on the working `subint_proc` backend).
 2. Updated `_spawn.py` to import the stub from the new
   submod path; kept `'subint_fork'` in `SpawnMethodKey` +
   `_methods` so `--spawn-backend=subint_fork` routes to a
   clean `NotImplementedError` with pointer to the analysis
   doc rather than an "invalid backend" error.
 3. Wrote
   `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`
   with the full annotated CPython walkthrough + an
   upstream-report draft for the CPython issue tracker.
   Draft has a two-tier ask: ideally "make it work"
   (pre-fork tstate-swap hook or `DeleteExceptFor(interp)`
   variant), minimally "give us a clean `RuntimeError` in
   the parent instead of a `Fatal Python error` aborting
   the child silently".
 ### Design discussion — main-interp-thread forkserver workaround
 User proposed: set up a "subint forking server" that fork()s
 on behalf of subint callers. Core insight: the CPython gate
 is on `tstate->interp`, not thread identity, so **any thread
 whose tstate is main-interp** can fork cleanly. A worker
 thread attached to main-interp (never entering a subint)
 satisfies the precondition.
 Structurally this is `mp.forkserver` (which tractor already
 has as `mp_forkserver`) but **in-process**: instead of a
 separate Python subproc as the fork server, we'd put the
 forkserver on a thread in the tractor parent process. Pros:
 faster spawn (no IPC marshalling to external server + no
 separate Python startup), inherits already-imported modules
 for free. Cons: less crash isolation (forkserver failure
 takes the whole process).
 Required tractor-side refactor: move the root actor's
 `trio.run()` off main-interp-main-thread (so main-thread can
 run the forkserver loop). Nontrivial; approximately the same
 magnitude as "Phase C".
 The design would also not fully resolve the class-A
 GIL-starvation issue because child actors' trio still runs
 inside subints (legacy config, msgspec PEP 684 pending).
 Would mitigate SIGINT-starvation specifically if signal
 handling moves to the forkserver thread.
 Recommended pre-commitment: a standalone CPython-only smoke
 test validating the four assumptions the arch rests on,
 before any tractor-side work.
 ### Smoke-test script drafted
 Wrote `ai/conc-anal/subint_fork_from_main_thread_smoketest.py`:
 argparse-driven, four scenarios (`control_subint_thread_fork`
 reproducing the known-broken case, `main_thread_fork`
 baseline, `worker_thread_fork` the architectural assertion,
 `full_architecture` end-to-end with trio in a subint in the
 forked child). No `tractor` imports; pure CPython + `_interpreters`
 + `trio`. Bails cleanly on py<3.14. Pass/fail banners per
 scenario.
 User will validate on their py3.14 env next.
 ## Per-code-artifact provenance
 ### `tractor/spawn/_subint_fork.py` (new submod)
 > `git show 797f57c -- tractor/spawn/_subint_fork.py`
 NotImplementedError stub for the subint-fork backend. Module
 docstring + fn docstring explain the attempt, the CPython
 block, and why the stub is kept in-tree. No runtime behavior
 beyond raising with a pointer at the conc-anal doc.
 ### `tractor/spawn/_spawn.py` (modified)
 > `git log 26fb820..HEAD -- tractor/spawn/_spawn.py`
 - Added `'subint_fork'` to `SpawnMethodKey` literal with a
  block comment explaining the CPython-level block.
 - Generalized the `case 'subint':` arm to `case 'subint' |
  'subint_fork':` since both use the same py3.14+ gate.
 - Registered `subint_fork_proc` in `_methods` with a
  pointer-comment at the analysis doc.
 ### `tractor/spawn/_subint.py` (modified across session)
 > `git log 26fb820..HEAD -- tractor/spawn/_subint.py`
 - Tightened `_has_subints` gate: dual-requires public
  `concurrent.interpreters` + private `_interpreters`
  (tests for py3.14-or-newer on the public-API presence,
  then uses the private one for legacy-config subints
  because `msgspec` still blocks the public isolated mode
  per jcrist/msgspec#563).
 - Updated module docstring, `subint_proc()` docstring, and
  gate-error messages to reflect the 3.14+ requirement and
  the reason (py3.13 wedges under multi-trio usage even
  though the private module exists there).
 ### `tractor/_testing/pytest.py` (modified)
 > `git log 26fb820..HEAD -- tractor/_testing/pytest.py`
 - New `skipon_spawn_backend(*start_methods, reason=...)`
  pytest marker expanded into `pytest.mark.skip(reason=...)`
  at collection time via
  `pytest_collection_modifyitems()`.
 - Implementation uses `item.iter_markers(name=...)` which
  walks function + class + module scopes uniformly and
  handles both `pytestmark = <single Mark>` and
  `pytestmark = [mark, ...]` forms natively. ~30-LOC
  single-loop refactor replacing a prior nested
  conditional that had four bugs (see "Review" narrative
  above).
 - Added `pytest.Config` / `pytest.Function` /
  `pytest.FixtureRequest` type annotations on fixture
  signatures while touching the file.
 ### `pyproject.toml` (modified)
 > `git log 26fb820..HEAD -- pyproject.toml`
 Added `pytest-timeout>=2.3` to `testing` dep group with
 comment pointing at the `ai/conc-anal/` docs.
 ### `tests/discovery/test_registrar.py`,
 `tests/test_subint_cancellation.py`,
 `tests/test_cancellation.py` (modified)
 > `git log 26fb820..HEAD -- tests/`
 Applied `@pytest.mark.timeout(30, method='thread')` on
 known-hanging subint tests. Extended comments to cross-
 reference the `ai/conc-anal/*.md` docs. `method='thread'`
 is documented inline as load-bearing (`signal`-method
 SIGALRM suffers the same GIL-starvation path that drops
 SIGINT).
 ### `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md` (new)
 > `git show 797f57c -- ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`
 Third sibling doc under `conc-anal/`. Structure: TL;DR,
 context ("what we tried"), symptom (the user's exact
 `Fatal Python error` output), CPython source walkthrough
 with excerpted snippets from `posixmodule.c` +
 `pystate.c`, chain summary, definitive answer to Open
 Question 1, `## Upstream-report draft (for CPython issue
 tracker)` section with a two-tier ask, references.
 ### `ai/conc-anal/subint_fork_from_main_thread_smoketest.py` (new, THIS turn)
 Zero-tractor-import smoke test for the proposed workaround
 architecture. Four argparse-driven scenarios covering the
 control case + baseline + arch-critical case + end-to-end.
 Pass/fail banners per scenario; clean `--help` output;
 py3.13 early-exit.
 ## Non-code output (verbatim)
 ### The `strace` signature that kicked off the CPython
 walkthrough
 ```
 --- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
 write(16, "\2", 1)                      = -1 EAGAIN (Resource temporarily unavailable)
 rt_sigreturn({mask=[WINCH]})            = 139801964688928
 ```
 ### Key user quotes framing the direction
 > ok actually we get this [fatal error] ... see if you can
 > take a look at what's going on, in particular wrt to
 > cpython's sources. pretty sure there's a local copy at
 > ~/repos/cpython/
 (Drove the CPython walkthrough that produced the
 definitive refusal chain.)
 > is there any reason we can't just sidestep this "must fork
 > from main thread in main subint" issue by simply ensuring
 > a "subint forking server" is always setup prior to
 > invoking trio in a non-main-thread subint ...
 (Drove the main-interp-thread-forkserver architectural
 discussion + smoke-test script design.)
 ### CPython source tags for quick jump-back
 ```
 Modules/posixmodule.c:728   PyOS_AfterFork_Child()
 Modules/posixmodule.c:753   // Ideally we could guarantee tstate is running main.
 Modules/posixmodule.c:778   status = _PyInterpreterState_DeleteExceptMain(runtime);
 Python/pystate.c:1040       _PyInterpreterState_DeleteExceptMain()
 Python/pystate.c:1044-1047  tstate->interp != main → PyStatus_ERR("not main interpreter")
 ```
--- a/ai/prompt-io/claude/README.md
+++ b/ai/prompt-io/claude/README.md
@ -1,27 +0,0 @@
 # AI Prompt I/O Log — claude
 This directory tracks prompt inputs and model
 outputs for AI-assisted development using
 `claude` (Claude Code).
 ## Policy
 Prompt logging follows the
 [NLNet generative AI policy][nlnet-ai].
 All substantive AI contributions are logged
 with:
 - Model name and version
 - Timestamps
 - The prompts that produced the output
 - Unedited model output (`.raw.md` files)
 [nlnet-ai]: https://nlnet.nl/foundation/policies/generativeAI/
 ## Usage
 Entries are created by the `/prompt-io` skill
 or automatically via `/commit-msg` integration.
 Human contributors remain accountable for all
 code decisions. AI-generated content is never
 presented as human-authored work.
--- a/ai/prompt-io/prompts/multiaddr_declare_eps.md
+++ b/ai/prompt-io/prompts/multiaddr_declare_eps.md
@ -1,76 +0,0 @@
 ok now i want you to take a look at the most recent commit adding
 a `tpt_bind_addrs` to `open_root_actor()` and extend the existing
 tests/discovery/test_multiaddr* and friends to use this new param in
 at least one suite with parametrizations over,
 - `registry_addrs == tpt_bind_addrs`, as in both inputs are the same.
 - `set(registry_addrs) >= set(tpt_bind_addrs)`, as in the registry
  addrs include the bind set.
 - `registry_addrs != tpt_bind_addrs`, where the reg set is disjoint from
  the bind set in all possible combos you can imagine.
 All of the ^above cases should further be parametrized over,
 - the root being the registrar,
 - a non-registrar root using our bg `daemon` fixture.
 once we have a fairly thorough test suite and have flushed out all
 bugs and edge cases we want to design a wrapping API which allows
 declaring full tree's of actors tpt endpoints using multiaddrs such
 that a `dict[str, list[str]]` of actor-name -> multiaddr can be used
 to configure a tree of actors-as-services given such an input
 "endpoints-table" can be matched with the number of appropriately
 named subactore spawns in a `tractor` user-app.
 Here is a small example from piker,
 - in piker's root conf.toml we define a `[network]` section which can
  define various actor-service-daemon names set to a maddr
  (multiaddress str).
 - each actor whether part of the `pikerd` tree (as a sub) or spawned
  in other non-registrar rooted trees (such as `piker chart`) should
  configurable in terms of its `tractor` tpt bind addresses via
  a simple service lookup table,
  ```toml
  [network]
  pikerd = [
    '/ip4/127.0.0.1/tcp/6116',  # std localhost daemon-actor tree
    '/uds/run/user/1000/piker/pikerd@6116.sock',  # same but serving UDS
  ]
  chart = [
    '/ip4/127.0.0.1/tcp/3333',  # std localhost daemon-actor tree
    '/uds/run/user/1000/piker/chart@3333.sock',
  ]
  ```
 We should take whatever common API is needed to support this and
 distill it into a
 ```python
 tractor.discovery.parse_endpoints(
 ) -> dict[
  str,
  list[Address]
  |dict[str, list[Address]]
  # ^recursive case, see below
 ]:
 ```
 style API which can,
 - be re-used easily across dependent projects.
 - correctly raise tpt-backend support errors when a maddr specifying
  a unsupport proto is passed.
 - be used to handle "tunnelled" maddrs per
  https://github.com/multiformats/py-multiaddr/#tunneling such that
  for any such tunneled maddr-`str`-entry we deliver a data-structure
  which can easily be passed to nested `@acm`s which consecutively
  setup nested net bindspaces for binding the endpoint addrs using
  a combo of our `.ipc.*` machinery and, say for example something like
  https://github.com/svinota/pyroute2, more precisely say for
  managing tunnelled wireguard eps within network-namespaces,
  * https://docs.pyroute2.org/
  * https://docs.pyroute2.org/netns.html
 remember to include use of all default `.claude/skills` throughout
 this work!
--- a/ai/prompt-io/prompts/subints_spawner.md
+++ b/ai/prompt-io/prompts/subints_spawner.md
@ -1,34 +0,0 @@
 This is your first big boi, "from GH issue" design, plan and
 implement task.
 We need to try and add sub-interpreter (aka subint) support per the
 issue,
 https://github.com/goodboy/tractor/issues/379
 Part of this work should include,
 - modularizing and thus better organizing the `.spawn.*` subpkg by
  breaking up various backends currently in `spawn._spawn` into
  separate submods where it makes sense.
 - add a new `._subint` backend which tries to keep as much of the
  inter-process-isolation machinery in use as possible but with plans
  to optimize for localhost only benefits as offered by python's
  subints where possible.
  * utilizing localhost-only tpts like UDS, shm-buffers for
    performant IPC between subactors but also leveraging the benefits from
    the traditional OS subprocs mem/storage-domain isolation, linux
    namespaces where possible and as available/permitted by whatever
    is happening under the hood with how cpython implements subints.
  * default configuration should encourage state isolation as with
    subprocs, but explicit public escape hatches to enable rigorously
    managed shm channels for high performance apps.
 - all tests should be (able to be) parameterized to use the new
  `subints` backend and enabled by flag in the harness using the
  existing `pytest --spawn-backend <spawn-backend>` support offered in
  the `open_root_actor()` and `.testing._pytest` harness override
  fixture.
--- a/docs/README.rst
+++ b/docs/README.rst
@ -420,17 +420,20 @@ Check out our experimental system for `guest`_-mode controlled
    async def aio_echo_server(
-        chan: tractor.to_asyncio.LinkedTaskChannel,
+        to_trio: trio.MemorySendChannel,
        from_trio: asyncio.Queue,
    ) -> None:
        # a first message must be sent **from** this ``asyncio``
        # task or the ``trio`` side will never unblock from
        # ``tractor.to_asyncio.open_channel_from():``
-        chan.started_nowait('start')
+        to_trio.send_nowait('start')
        # XXX: this uses an ``from_trio: asyncio.Queue`` currently but we
        # should probably offer something better.
        while True:
            # echo the msg back
-            chan.send_nowait(await chan.get())
+            to_trio.send_nowait(await from_trio.get())
            await asyncio.sleep(0)
@ -442,7 +445,7 @@ Check out our experimental system for `guest`_-mode controlled
        # message.
        async with tractor.to_asyncio.open_channel_from(
            aio_echo_server,
-        ) as (chan, first):
+        ) as (first, chan):
            assert first == 'start'
            await ctx.started(first)
@ -501,10 +504,8 @@ Yes, we spawn a python process, run ``asyncio``, start ``trio`` on the
 ``asyncio`` loop, then send commands to the ``trio`` scheduled tasks to
 tell ``asyncio`` tasks what to do XD
-The ``asyncio``-side task receives a single
+We need help refining the `asyncio`-side channel API to be more
-``chan: LinkedTaskChannel`` handle providing a ``trio``-like
+`trio`-like. Feel free to sling your opinion in `#273`_!
 API: ``.started_nowait()``, ``.send_nowait()``, ``.get()``
 and more. Feel free to sling your opinion in `#273`_!
 .. _#273: https://github.com/goodboy/tractor/issues/273
@ -640,15 +641,13 @@ Help us push toward the future of distributed `Python`.
 - Typed capability-based (dialog) protocols ( see `#196
  <https://github.com/goodboy/tractor/issues/196>`_ with draft work
  started in `#311 <https://github.com/goodboy/tractor/pull/311>`_)
- **macOS is now officially supported** and tested in CI
+- We **recently disabled CI-testing on windows** and need help getting
-  alongside Linux!
+  it running again! (see `#327
- We **recently disabled CI-testing on windows** and need
+  <https://github.com/goodboy/tractor/pull/327>`_). **We do have windows
-  help getting it running again! (see `#327
+  support** (and have for quite a while) but since no active hacker
-  <https://github.com/goodboy/tractor/pull/327>`_). **We do
+  exists in the user-base to help test on that OS, for now we're not
-  have windows support** (and have for quite a while) but
+  actively maintaining testing due to the added hassle and general
-  since no active hacker exists in the user-base to help
+  latency..
  test on that OS, for now we're not actively maintaining
  testing due to the added hassle and general latency..
 Feel like saying hi?
--- a/examples/advanced_faults/ipc_failure_during_stream.py
+++ b/examples/advanced_faults/ipc_failure_during_stream.py
@ -17,7 +17,6 @@ from tractor import (
    MsgStream,
    _testing,
    trionics,
    TransportClosed,
 )
 import trio
 import pytest
@ -209,16 +208,12 @@ async def main(
                        # TODO: is this needed or no?
                        raise
-                    except (
+                    except trio.ClosedResourceError:
                        trio.ClosedResourceError,
                        TransportClosed,
                    ) as _tpt_err:
                        # NOTE: don't send if we already broke the
                        # connection to avoid raising a closed-error
                        # such that we drop through to the ctl-c
                        # mashing by user.
-                        with trio.CancelScope(shield=True):
+                        await trio.sleep(0.01)
                            await trio.sleep(0.01)
                    # timeout: int = 1
                    # with trio.move_on_after(timeout) as cs:
@ -252,7 +247,6 @@ async def main(
                    await stream.send(i)
                    pytest.fail('stream not closed?')
                except (
                    TransportClosed,
                    trio.ClosedResourceError,
                    trio.EndOfChannel,
                ) as send_err:
--- a/examples/debugging/asyncio_bp.py
+++ b/examples/debugging/asyncio_bp.py
@ -18,14 +18,15 @@ async def aio_sleep_forever():
 async def bp_then_error(
-    chan: to_asyncio.LinkedTaskChannel,
+    to_trio: trio.MemorySendChannel,
    from_trio: asyncio.Queue,
    raise_after_bp: bool = True,
 ) -> None:
    # sync with `trio`-side (caller) task
-    chan.started_nowait('start')
+    to_trio.send_nowait('start')
    # NOTE: what happens here inside the hook needs some refinement..
    # => seems like it's still `.debug._set_trace()` but
@ -59,7 +60,7 @@ async def trio_ctx(
        to_asyncio.open_channel_from(
            bp_then_error,
            # raise_after_bp=not bp_before_started,
-        ) as (chan, first),
+        ) as (first, chan),
        trio.open_nursery() as tn,
    ):
--- a/examples/debugging/fast_error_in_root_after_spawn.py
+++ b/examples/debugging/fast_error_in_root_after_spawn.py
@ -20,7 +20,7 @@ async def sleep(
 async def open_ctx(
-    n: tractor.runtime._supervise.ActorNursery
+    n: tractor._supervise.ActorNursery
 ):
    # spawn both actors
--- a/examples/debugging/shield_hang_in_sub.py
+++ b/examples/debugging/shield_hang_in_sub.py
@ -3,7 +3,6 @@ Verify we can dump a `stackscope` tree on a hang.
 '''
 import os
 import platform
 import signal
 import trio
@ -32,26 +31,13 @@ async def main(
    from_test: bool = False,
 ) -> None:
    if platform.system() != 'Darwin':
        tpt = 'uds'
    else:
        # XXX, precisely we can't use pytest's tmp-path generation
        # for tests.. apparently because:
        #
        # > The OSError: AF_UNIX path too long in macOS Python occurs
        # > because the path to the Unix domain socket exceeds the
        # > operating system's maximum path length limit (around 104
        #
        # WHICH IS just, wtf hillarious XD
        tpt = 'tcp'
    async with (
        tractor.open_nursery(
            debug_mode=True,
            enable_stack_on_sig=True,
            # maybe_enable_greenback=False,
            loglevel='devx',
-            enable_transports=[tpt],
+            enable_transports=['uds'],
        ) as an,
    ):
        ptl: tractor.Portal  = await an.start_actor(
--- a/examples/debugging/subactor_bp_in_ctx.py
+++ b/examples/debugging/subactor_bp_in_ctx.py
@ -1,5 +1,3 @@
 import platform
 import tractor
 import trio
@ -36,22 +34,9 @@ async def just_bp(
 async def main():
    if platform.system() != 'Darwin':
        tpt = 'uds'
    else:
        # XXX, precisely we can't use pytest's tmp-path generation
        # for tests.. apparently because:
        #
        # > The OSError: AF_UNIX path too long in macOS Python occurs
        # > because the path to the Unix domain socket exceeds the
        # > operating system's maximum path length limit (around 104
        #
        # WHICH IS just, wtf hillarious XD
        tpt = 'tcp'
    async with tractor.open_nursery(
        debug_mode=True,
-        enable_transports=[tpt],
+        enable_transports=['uds'],
        loglevel='devx',
    ) as n:
        p = await n.start_actor(
--- a/examples/full_fledged_streaming_service.py
+++ b/examples/full_fledged_streaming_service.py
@ -90,7 +90,7 @@ async def main() -> list[int]:
    # yes, a nursery which spawns `trio`-"actors" B)
    an: ActorNursery
    async with tractor.open_nursery(
-        loglevel='error',
+        loglevel='cancel',
        # debug_mode=True,
    ) as an:
@ -118,10 +118,8 @@ async def main() -> list[int]:
        cancelled: bool = await portal.cancel_actor()
        assert cancelled
-        print(
+        print(f"STREAM TIME = {time.time() - start}")
-            f"STREAM TIME = {time.time() - start}\n"
+        print(f"STREAM + SPAWN TIME = {time.time() - pre_start}")
            f"STREAM + SPAWN TIME = {time.time() - pre_start}\n"
        )
        assert result_stream == list(range(seed))
        return result_stream
--- a/examples/infected_asyncio_echo_server.py
+++ b/examples/infected_asyncio_echo_server.py
@ -11,17 +11,21 @@ import tractor
 async def aio_echo_server(
-    chan: tractor.to_asyncio.LinkedTaskChannel,
+    to_trio: trio.MemorySendChannel,
    from_trio: asyncio.Queue,
 ) -> None:
    # a first message must be sent **from** this ``asyncio``
    # task or the ``trio`` side will never unblock from
    # ``tractor.to_asyncio.open_channel_from():``
-    chan.started_nowait('start')
+    to_trio.send_nowait('start')
    # XXX: this uses an ``from_trio: asyncio.Queue`` currently but we
    # should probably offer something better.
    while True:
        # echo the msg back
-        chan.send_nowait(await chan.get())
+        to_trio.send_nowait(await from_trio.get())
        await asyncio.sleep(0)
@ -33,7 +37,7 @@ async def trio_to_aio_echo_server(
    # message.
    async with tractor.to_asyncio.open_channel_from(
        aio_echo_server,
-    ) as (chan, first):
+    ) as (first, chan):
        assert first == 'start'
        await ctx.started(first)
--- a/examples/integration/mpi4py/init.py
+++ b/examples/integration/mpi4py/init.py
--- a/examples/integration/mpi4py/_child.py
+++ b/examples/integration/mpi4py/_child.py
@ -1,5 +0,0 @@
 import os
 async def child_fn() -> str:
    return f"child OK  pid={os.getpid()}"
--- a/examples/integration/mpi4py/inherit_parent_main.py
+++ b/examples/integration/mpi4py/inherit_parent_main.py
@ -1,50 +0,0 @@
 """
 Integration test: spawning tractor actors from an MPI process.
 When a parent is launched via ``mpirun``, Open MPI sets ``OMPI_*`` env
 vars that bind ``MPI_Init`` to the ``orted`` daemon.  Tractor children
 inherit those env vars, so if ``inherit_parent_main=True`` (the default)
 the child re-executes ``__main__``, re-imports ``mpi4py``, and
 ``MPI_Init_thread`` fails because the child was never spawned by
 ``orted``::
    getting local rank failed
      --> Returned value No permission (-17) instead of ORTE_SUCCESS
 Passing ``inherit_parent_main=False`` and placing RPC functions in a
 separate importable module (``_child``) avoids the re-import entirely.
 Usage::
    mpirun --allow-run-as-root -np 1 python -m \
        examples.integration.mpi4py.inherit_parent_main
 """
 from mpi4py import MPI
 import os
 import trio
 import tractor
 from ._child import child_fn
 async def main() -> None:
    rank = MPI.COMM_WORLD.Get_rank()
    print(f"[parent] rank={rank}  pid={os.getpid()}", flush=True)
    async with tractor.open_nursery(start_method='trio') as an:
        portal = await an.start_actor(
            'mpi-child',
            enable_modules=[child_fn.__module__],
            # Without this the child replays __main__, which
            # re-imports mpi4py and crashes on MPI_Init.
            inherit_parent_main=False,
        )
        result = await portal.run(child_fn)
        print(f"[parent] got: {result}", flush=True)
        await portal.cancel_actor()
 if __name__ == "__main__":
    trio.run(main)
--- a/examples/service_discovery.py
+++ b/examples/service_discovery.py
@ -10,7 +10,7 @@ async def main(service_name):
        await an.start_actor(service_name)
        async with tractor.get_registry() as portal:
-            print(f"Registrar is listening on {portal.channel}")
+            print(f"Arbiter is listening on {portal.channel}")
        async with tractor.wait_for_actor(service_name) as sockaddr:
            print(f"my_service is found at {sockaddr}")
--- a/flake.lock
+++ b/flake.lock
@ -1,27 +0,0 @@
 {
  "nodes": {
    "nixpkgs": {
      "locked": {
        "lastModified": 1769018530,
        "narHash": "sha256-MJ27Cy2NtBEV5tsK+YraYr2g851f3Fl1LpNHDzDX15c=",
        "owner": "nixos",
        "repo": "nixpkgs",
        "rev": "88d3861acdd3d2f0e361767018218e51810df8a1",
        "type": "github"
      },
      "original": {
        "owner": "nixos",
        "ref": "nixos-unstable",
        "repo": "nixpkgs",
        "type": "github"
      }
    },
    "root": {
      "inputs": {
        "nixpkgs": "nixpkgs"
      }
    }
  },
  "root": "root",
  "version": 7
 }
--- a/flake.nix
+++ b/flake.nix
@ -1,70 +0,0 @@
 # An "impure" template thx to `pyproject.nix`,
 # https://pyproject-nix.github.io/pyproject.nix/templates.html#impure
 # https://github.com/pyproject-nix/pyproject.nix/blob/master/templates/impure/flake.nix
 {
  description = "An impure overlay (w dev-shell) using `uv`";
  inputs = {
    nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
  };
  outputs =
    { nixpkgs, ... }:
    let
      inherit (nixpkgs) lib;
      forAllSystems = lib.genAttrs lib.systems.flakeExposed;
    in
    {
      devShells = forAllSystems (
        system:
        let
          pkgs = nixpkgs.legacyPackages.${system};
          # XXX NOTE XXX, for now we overlay specific pkgs via
          # a major-version-pinned-`cpython`
          cpython = "python313";
          venv_dir = "py313";
          pypkgs = pkgs."${cpython}Packages";
        in
        {
          default = pkgs.mkShell {
            packages = [
              # XXX, ensure sh completions activate!
              pkgs.bashInteractive
              pkgs.bash-completion
              # XXX, on nix(os), use pkgs version to avoid
              # build/sys-sh-integration issues
              pkgs.ruff
              pkgs.uv
              pkgs.${cpython}# ?TODO^ how to set from `cpython` above?
            ];
            shellHook = ''
              # unmask to debug **this** dev-shell-hook
              # set -e
              # link-in c++ stdlib for various AOT-ext-pkgs (numpy, etc.)
              LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib:$LD_LIBRARY_PATH"
              export LD_LIBRARY_PATH
              # RUNTIME-SETTINGS
              # ------ uv ------
              # - always use the ./py313/ venv-subdir
              # - sync env with all extras
              export UV_PROJECT_ENVIRONMENT=${venv_dir}
              uv sync --dev --all-extras
              # ------ TIPS ------
              # NOTE, to launch the py-venv installed `xonsh` (like @goodboy)
              # run the `nix develop` cmd with,
              # >> nix develop -c uv run xonsh
            '';
          };
        }
      );
    };
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@ -9,7 +9,7 @@ name = "tractor"
 version = "0.1.0a6dev0"
 description = 'structured concurrent `trio`-"actors"'
 authors = [{ name = "Tyler Goodlet", email = "goodboy_foss@protonmail.com" }]
-requires-python = ">=3.13, <3.15"
+requires-python = ">= 3.11"
 readme = "docs/README.rst"
 license = "AGPL-3.0-or-later"
 keywords = [
@ -24,14 +24,11 @@ keywords = [
 classifiers = [
  "Development Status :: 3 - Alpha",
  "Operating System :: POSIX :: Linux",
  "Operating System :: MacOS",
  "Framework :: Trio",
  "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
  "Programming Language :: Python :: Implementation :: CPython",
  "Programming Language :: Python :: 3 :: Only",
-  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.11",
  "Programming Language :: Python :: 3.13",
  "Programming Language :: Python :: 3.14",
  "Topic :: System :: Distributed Computing",
 ]
 dependencies = [
@ -44,99 +41,49 @@ dependencies = [
  "tricycle>=0.4.1,<0.5",
  "wrapt>=1.16.0,<2",
  "colorlog>=6.8.2,<7",
  # built-in multi-actor `pdb` REPL
-  "pdbp>=1.8.2,<2", # windows only (from `pdbp`)
+  "pdbp>=1.6,<2", # windows only (from `pdbp`)
  # typed IPC msging
-  "msgspec>=0.20.0",
+  "msgspec>=0.19.0",
  "cffi>=1.17.1",
  "bidict>=0.23.1",
  "multiaddr>=0.2.0",
  "platformdirs>=4.4.0",
 ]
 # ------ project ------
 [dependency-groups]
 dev = [
  {include-group = 'devx'},
  {include-group = 'testing'},
  {include-group = 'repl'},
 ]
 devx = [
  # `tractor.devx` tooling
  "stackscope>=0.2.2,<0.3",
  # ^ requires this?
  "typing-extensions>=4.14.1",
  # {include-group = 'sync_pause'},  # XXX, no 3.14 yet!
 ]
 sync_pause = [
  "greenback>=1.2.1,<2",  # TODO? 3.14 greenlet on nix?
 ]
 testing = [
  # test suite
  # TODO: maybe some of these layout choices?
  # https://docs.pytest.org/en/8.0.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules
  "pytest>=8.3.5",
  "pexpect>=4.9.0,<5",
-  # per-test wall-clock bound (used via
+  # `tractor.devx` tooling
-  # `@pytest.mark.timeout(..., method='thread')` on the
+  "greenback>=1.2.1,<2",
-  # known-hanging `subint`-backend audit tests; see
+  "stackscope>=0.2.2,<0.3",
-  # `ai/conc-anal/subint_*_issue.md`).
+  # ^ requires this?
-  "pytest-timeout>=2.3",
+  "typing-extensions>=4.14.1",
-]
+
 repl = [
  "pyperclip>=1.9.0",
  "prompt-toolkit>=3.0.50",
-  "xonsh>=0.22.8",
+  "xonsh>=0.19.2",
  "psutil>=7.0.0",
 ]
 lint = [
  "ruff>=0.9.6"
 ]
 # XXX, used for linux-only hi perf eventfd+shm channels
 # now mostly moved over to `hotbaud`.
 eventfd = [
  "cffi>=1.17.1",
 ]
 subints = [
  "msgspec>=0.21.0",
 ]
 # TODO, add these with sane versions; were originally in
 # `requirements-docs.txt`..
 # docs = [
 #   "sphinx>="
 #   "sphinx_book_theme>="
 # ]
 # ------ dependency-groups ------
-[tool.uv.dependency-groups]
+# ------ dependency-groups ------
 # for subints, we require 3.14+ due to 2 issues,
 # - hanging behaviour for various multi-task teardown cases (see
 #   "Availability" section in the `tractor.spawn._subints` doc string).
 # - `msgspec` support which is oustanding per PEP 684 upstream tracker:
 #   https://github.com/jcrist/msgspec/issues/563
 #
 # https://docs.astral.sh/uv/concepts/projects/dependencies/#group-requires-python
 subints = {requires-python = ">=3.14"}
 eventfd = {requires-python = ">=3.13, <3.14"}
 sync_pause = {requires-python = ">=3.13, <3.14"}
 [tool.uv.sources]
 # XXX NOTE, only for @goodboy's hacking on `pprint(sort_dicts=False)`
 # for the `pp` alias..
-# ------ gh upstream ------
+# pdbp = { path = "../pdbp", editable = true }
 # xonsh = { git = 'https://github.com/anki-code/xonsh.git', branch = 'prompt_next_suggestion' }
 # ^ https://github.com/xonsh/xonsh/pull/6048
 # xonsh = { git = 'https://github.com/xonsh/xonsh.git', branch = 'main' }
 xonsh = { path = "../xonsh", editable = true }
 # [tool.uv.sources.pdbp]
 # XXX, in case we need to tmp patch again.
 # git = "https://github.com/goodboy/pdbp.git"
 # branch ="repair_stack_trace_frame_indexing"
 # path = "../pdbp"
 # editable = true
 # ------ tool.uv.sources ------
 # TODO, distributed (multi-host) extensions
@ -198,8 +145,6 @@ all_bullets = true
 [tool.pytest.ini_options]
 minversion = '6.0'
 timeout = 200  # per-test hard limit
 # https://docs.pytest.org/en/stable/reference/reference.html#configuration-options
 testpaths = [
  'tests'
 ]
@ -210,40 +155,10 @@ addopts = [
  '--import-mode=importlib',
  # don't show frickin captured logs AGAIN in the report..
  '--show-capture=no',
  # sys-level capture. REQUIRED for fork-based spawn
  # backends (e.g. `subint_forkserver`): default
  # `--capture=fd` redirects fd 1,2 to temp files, and fork
  # children inherit those fds — opaque deadlocks happen in
  # the pytest-capture-machinery ↔ fork-child stdio
  # interaction. `--capture=sys` only redirects Python-level
  # `sys.stdout`/`sys.stderr`, leaving fd 1,2 alone.
  #
  # Trade-off (vs. `--capture=fd`):
  # - LOST: per-test attribution of subactor *raw-fd* output
  #   (C-ext writes, `os.write(2, ...)`, subproc stdout). Not
  #   zero — those go to the terminal, captured by CI's
  #   terminal-level capture, just not per-test-scoped in the
  #   pytest failure report.
  # - KEPT: Python-level `print()` + `logging` capture per-
  #   test (tractor's logger uses `sys.stderr`, so tractor
  #   log output IS still attributed per-test).
  # - KEPT: user `pytest -s` for debugging (unaffected).
  #
  # Full post-mortem in
  # `ai/conc-anal/subint_forkserver_test_cancellation_leak_issue.md`.
  '--capture=sys',
  # disable `xonsh` plugin
  # https://docs.pytest.org/en/stable/how-to/plugins.html#disabling-plugins-from-autoloading
  # https://docs.pytest.org/en/stable/how-to/plugins.html#deactivating-unregistering-a-plugin-by-name
  '-p no:xonsh'
 ]
 log_cli = false
 # TODO: maybe some of these layout choices?
 # https://docs.pytest.org/en/8.0.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules
 # pythonpath = "src"
 # https://docs.pytest.org/en/stable/reference/reference.html#confval-console_output_style
 console_output_style = 'progress'
 # ------ tool.pytest ------
--- a/pytest.ini
+++ b/pytest.ini
@ -0,0 +1,8 @@
 # vim: ft=ini
 # pytest.ini for tractor
 [pytest]
 # don't show frickin captured logs AGAIN in the report..
 addopts = --show-capture='no'
 log_cli = false
 ; minversion = 6.0
--- a/ruff.toml
+++ b/ruff.toml
@ -35,8 +35,8 @@ exclude = [
 line-length = 88
 indent-width = 4
-# assume latest minor cpython
+# Assume Python 3.9
-target-version = "py313"
+target-version = "py311"
 [lint]
 # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -9,11 +9,8 @@ import os
 import signal
 import platform
 import time
 from pathlib import Path
 from typing import Literal
 import pytest
 import tractor
 from tractor._testing import (
    examples_dir as examples_dir,
    tractor_test as tractor_test,
@ -25,8 +22,6 @@ pytest_plugins: list[str] = [
    'tractor._testing.pytest',
 ]
 _ci_env: bool = os.environ.get('CI', False)
 _non_linux: bool = platform.system() != 'Linux'
 # Sending signal.SIGINT on subprocess fails on windows. Use CTRL_* alternatives
 if platform.system() == 'Windows':
@ -39,8 +34,9 @@ else:
    _INT_SIGNAL = signal.SIGINT
    _INT_RETURN_CODE = 1 if sys.version_info < (3, 8) else -signal.SIGINT.value
    _PROC_SPAWN_WAIT = (
-        2 if _ci_env
+        0.6
-        else 1
+        if sys.version_info < (3, 7)
        else 0.4
    )
@ -48,80 +44,6 @@ no_windows = pytest.mark.skipif(
    platform.system() == "Windows",
    reason="Test is unsupported on windows",
 )
 no_macos = pytest.mark.skipif(
    platform.system() == "Darwin",
    reason="Test is unsupported on MacOS",
 )
 def get_cpu_state(
    icpu: int = 0,
    setting: Literal[
        'scaling_governor',
        '*_pstate_max_freq',
        'scaling_max_freq',
        # 'scaling_cur_freq',
    ] = '*_pstate_max_freq',
 ) -> tuple[
    Path,
    str|int,
 ]|None:
    '''
    Attempt to read the (first) CPU's setting according
    to the set `setting` from under the file-sys,
    /sys/devices/system/cpu/cpu0/cpufreq/{setting}
    Useful to determine latency headroom for various perf affected
    test suites.
    '''
    try:
        # Read governor for core 0 (usually same for all)
        setting_path: Path = list(
            Path(f'/sys/devices/system/cpu/cpu{icpu}/cpufreq/')
            .glob(f'{setting}')
        )[0]  # <- XXX must be single match!
        with open(
            setting_path,
            'r',
        ) as f:
            return (
                setting_path,
                f.read().strip(),
            )
    except (FileNotFoundError, IndexError):
        return None
 def cpu_scaling_factor() -> float:
    '''
    Return a latency-headroom multiplier (>= 1.0) reflecting how
    much to inflate time-limits when CPU-freq scaling is active on
    linux.
    When no scaling info is available (non-linux, missing sysfs),
    returns 1.0 (i.e. no headroom adjustment needed).
    '''
    if _non_linux:
        return 1.
    mx = get_cpu_state()
    cur = get_cpu_state(setting='scaling_max_freq')
    if mx is None or cur is None:
        return 1.
    _mx_pth, max_freq = mx
    _cur_pth, cur_freq = cur
    cpu_scaled: float = int(cur_freq) / int(max_freq)
    if cpu_scaled != 1.:
        return 1. / (
            cpu_scaled * 2  # <- bc likely "dual threaded"
        )
    return 1.
 def pytest_addoption(
@ -139,54 +61,16 @@ def pytest_addoption(
@pytest.fixture(scope='session', autouse=True)
-def loglevel(
+def loglevel(request):
    request: pytest.FixtureRequest,
 ) -> str:
    import tractor
    orig = tractor.log._default_loglevel
    level = tractor.log._default_loglevel = request.config.option.loglevel
-    log = tractor.log.get_console_log(
+    tractor.log.get_console_log(level)
        level=level,
        name='tractor',  # <- enable root logger
    )
    log.info(
        f'Test-harness set runtime loglevel: {level!r}\n'
    )
    yield level
    tractor.log._default_loglevel = orig
-@pytest.fixture(scope='function')
+_ci_env: bool = os.environ.get('CI', False)
 def test_log(
    request: pytest.FixtureRequest,
    loglevel: str,
 ) -> tractor.log.StackLevelAdapter:
    '''
    Deliver a per test-module-fn logger instance for reporting from
    within actual test bodies/fixtures.
    For example this can be handy to report certain error cases from
    exception handlers using `test_log.exception()`.
    '''
    modname: str = request.function.__module__
    log = tractor.log.get_logger(
        name=modname,  # <- enable root logger
        # pkg_name='tests',
    )
    _log = tractor.log.get_console_log(
        level=loglevel,
        logger=log,
        name=modname,
        # pkg_name='tests',
    )
    _log.debug(
        f'In-test-logging requested\n'
        f'test_log.name: {log.name!r}\n'
        f'level: {loglevel!r}\n'
    )
    yield _log
@pytest.fixture(scope='session')
@ -201,39 +85,15 @@ def ci_env() -> bool:
 def sig_prog(
    proc: subprocess.Popen,
    sig: int,
-    canc_timeout: float = 0.2,
+    canc_timeout: float = 0.1,
    tries: int = 3,
 ) -> int:
-    '''
+    "Kill the actor-process with ``sig``."
-    Kill the actor-process with `sig`.
+    proc.send_signal(sig)
-
+    time.sleep(canc_timeout)
-    Prefer to kill with the provided signal and
+    if not proc.poll():
    failing a `canc_timeout`, send a `SIKILL`-like
    to ensure termination.
    '''
    for i in range(tries):
        proc.send_signal(sig)
        if proc.poll() is None:
            print(
                f'WARNING, proc still alive after,\n'
                f'canc_timeout={canc_timeout!r}\n'
                f'sig={sig!r}\n'
                f'\n'
                f'{proc.args!r}\n'
            )
            time.sleep(canc_timeout)
    else:
        # TODO: why sometimes does SIGINT not work on teardown?
        # seems to happen only when trace logging enabled?
-        if proc.poll() is None:
+        proc.send_signal(_KILL_SIGNAL)
            print(
                f'XXX WARNING KILLING PROG WITH SIGINT XXX\n'
                f'canc_timeout={canc_timeout!r}\n'
                f'{proc.args!r}\n'
            )
            proc.send_signal(_KILL_SIGNAL)
    ret: int = proc.wait()
    assert ret
@ -246,8 +106,6 @@ def daemon(
    testdir: pytest.Pytester,
    reg_addr: tuple[str, int],
    tpt_proto: str,
    ci_env: bool,
    test_log: tractor.log.StackLevelAdapter,
 ) -> subprocess.Popen:
    '''
@ -263,12 +121,10 @@ def daemon(
        "import tractor; "
        "tractor.run_daemon([], "
        "registry_addrs={reg_addrs}, "
        "enable_transports={enable_tpts}, "
        "debug_mode={debug_mode}, "
        "loglevel={ll})"
    ).format(
        reg_addrs=str([reg_addr]),
        enable_tpts=str([tpt_proto]),
        ll="'{}'".format(loglevel) if loglevel else None,
        debug_mode=debug_mode,
    )
@ -287,25 +143,13 @@ def daemon(
        **kwargs,
    )
    # TODO! we should poll for the registry socket-bind to take place
    # and only once that's done yield to the requester!
    # -[ ] TCP: use the `._root.open_root_actor()`::`ping_tpt_socket()`
    #      closure!
    # -[ ] UDS: can we do something similar for 'pinging" the
    #     file-socket?
    #
    global _PROC_SPAWN_WAIT
    # UDS sockets are **really** fast to bind()/listen()/connect()
    # so it's often required that we delay a bit more starting
    # the first actor-tree..
    if tpt_proto == 'uds':
-        _PROC_SPAWN_WAIT += 1.6
+        global _PROC_SPAWN_WAIT
        _PROC_SPAWN_WAIT = 0.6
    if _non_linux and ci_env:
        _PROC_SPAWN_WAIT += 1
    # XXX, allow time for the sub-py-proc to boot up.
    # !TODO, see ping-polling ideas above!
    time.sleep(_PROC_SPAWN_WAIT)
    assert not proc.returncode
@ -315,30 +159,18 @@ def daemon(
    # XXX! yeah.. just be reaaal careful with this bc sometimes it
    # can lock up on the `_io.BufferedReader` and hang..
    stderr: str = proc.stderr.read().decode()
-    stdout: str = proc.stdout.read().decode()
+    if stderr:
    if (
        stderr
        or
        stdout
    ):
        print(
-            f'Daemon actor tree produced output:\n'
+            f'Daemon actor tree produced STDERR:\n'
            f'{proc.args}\n'
            f'\n'
-            f'stderr: {stderr!r}\n'
+            f'{stderr}\n'
            f'stdout: {stdout!r}\n'
        )
-
+    if proc.returncode != -2:
-    if (rc := proc.returncode) != -2:
+        raise RuntimeError(
-        msg: str = (
+            'Daemon actor tree failed !?\n'
-            f'Daemon actor tree was not cancelled !?\n'
+            f'{proc.args}\n'
            f'proc.args: {proc.args!r}\n'
            f'proc.returncode: {rc!r}\n'
        )
        if rc < 0:
            raise RuntimeError(msg)
        test_log.error(msg)
 # @pytest.fixture(autouse=True)
--- a/tests/devx/conftest.py
+++ b/tests/devx/conftest.py
@ -3,8 +3,6 @@
 '''
 from __future__ import annotations
 import platform
 import signal
 import time
 from typing import (
    Callable,
@ -34,23 +32,9 @@ if TYPE_CHECKING:
    from pexpect import pty_spawn
 _non_linux: bool = platform.system() != 'Linux'
 def pytest_configure(config):
    # register custom marks to avoid warnings see,
    # https://docs.pytest.org/en/stable/how-to/writing_plugins.html#registering-custom-markers
    config.addinivalue_line(
        'markers',
        'ctlcs_bish: test will (likely) not behave under SIGINT..'
    )
 # a fn that sub-instantiates a `pexpect.spawn()`
 # and returns it.
-type PexpectSpawner = Callable[
+type PexpectSpawner = Callable[[str], pty_spawn.spawn]
    [str],
    pty_spawn.spawn,
 ]
@pytest.fixture
@ -80,65 +64,26 @@ def spawn(
        '''
        import os
        # disable colored tbs
        os.environ['PYTHON_COLORS'] = '0'
        # disable all ANSI color output
        # os.environ['NO_COLOR'] = '1'
    spawned: PexpectSpawner|None = None
    def _spawn(
        cmd: str,
        expect_timeout: float = 4,
        **mkcmd_kwargs,
    ) -> pty_spawn.spawn:
        nonlocal spawned
        unset_colors()
-        spawned = testdir.spawn(
+        return testdir.spawn(
            cmd=mk_cmd(
                cmd,
                **mkcmd_kwargs,
            ),
-            expect_timeout=(timeout:=(
+            expect_timeout=3,
                expect_timeout + 6
                if _non_linux and _ci_env
                else expect_timeout
            )),
            # preexec_fn=unset_colors,
            # ^TODO? get `pytest` core to expose underlying
            # `pexpect.spawn()` stuff?
        )
        # sanity
        assert spawned.timeout == timeout
        return spawned
    # such that test-dep can pass input script name.
-    yield _spawn  # the `PexpectSpawner`, type alias.
+    return _spawn  # the `PexpectSpawner`, type alias.
    if (
        spawned
        and
        (ptyproc := spawned.ptyproc)
    ):
        start: float = time.time()
        timeout: float = 5
        while (
            ptyproc.isalive()
            and
            (
                (_time_took := (time.time() - start))
                 <
                 timeout
            )
        ):
            ptyproc.kill(signal.SIGINT)
            time.sleep(0.01)
        if ptyproc.isalive():
            ptyproc.kill(signal.SIGKILL)
    # TODO? ensure we've cleaned up any UDS-paths?
    # breakpoint()
@pytest.fixture(
@ -146,12 +91,13 @@ def spawn(
    ids='ctl-c={}'.format,
 )
 def ctlc(
-    request: pytest.FixtureRequest,
+    request,
    ci_env: bool,
 ) -> bool:
-    use_ctlc: bool = request.param
+    use_ctlc = request.param
    node = request.node
    markers = node.own_markers
    for mark in markers:
@ -163,13 +109,7 @@ def ctlc(
                'https://github.com/goodboy/tractor/issues/320'
            )
-        if (
+        if mark.name == 'ctlcs_bish':
            mark.name == 'ctlcs_bish'
            and
            use_ctlc
            and
            all(mark.args)
        ):
            pytest.skip(
                f'Test {node} prolly uses something from the stdlib (namely `asyncio`..)\n'
                f'The test and/or underlying example script can *sometimes* run fine '
@ -274,13 +214,12 @@ def assert_before(
        err_on_false=True,
        **kwargs
    )
    return str(child.before.decode())
 def do_ctlc(
    child,
    count: int = 3,
-    delay: float|None = None,
+    delay: float = 0.1,
    patt: str|None = None,
    # expect repl UX to reprint the prompt after every
@ -292,7 +231,6 @@ def do_ctlc(
 ) -> str|None:
    before: str|None = None
    delay = delay or 0.1
    # make sure ctl-c sends don't do anything but repeat output
    for _ in range(count):
@ -303,10 +241,7 @@ def do_ctlc(
        # if you run this test manually it works just fine..
        if expect_prompt:
            time.sleep(delay)
-            child.expect(
+            child.expect(PROMPT)
                PROMPT,
                timeout=(child.timeout * 2) if _ci_env else child.timeout,
            )
            before = str(child.before.decode())
            time.sleep(delay)
--- a/tests/devx/test_debugger.py
+++ b/tests/devx/test_debugger.py
@ -37,9 +37,6 @@ from .conftest import (
    in_prompt_msg,
    assert_before,
 )
 from ..conftest import (
    _ci_env,
 )
 if TYPE_CHECKING:
    from ..conftest import PexpectSpawner
@ -54,14 +51,13 @@ if TYPE_CHECKING:
 # - recurrent root errors
 _non_linux: bool = platform.system() != 'Linux'
 if platform.system() == 'Windows':
    pytest.skip(
        'Debugger tests have no windows support (yet)',
        allow_module_level=True,
    )
 # TODO: was trying to this xfail style but some weird bug i see in CI
 # that's happening at collect time.. pretty soon gonna dump actions i'm
 # thinkin...
@ -197,11 +193,6 @@ def test_root_actor_bp_forever(
    child.expect(EOF)
 # skip on non-Linux CI
@pytest.mark.ctlcs_bish(
    _non_linux,
    _ci_env,
 )
@pytest.mark.parametrize(
    'do_next',
    (True, False),
@ -267,11 +258,6 @@ def test_subactor_error(
    child.expect(EOF)
 # skip on non-Linux CI
@pytest.mark.ctlcs_bish(
    _non_linux,
    _ci_env,
 )
 def test_subactor_breakpoint(
    spawn,
    ctlc: bool,
@ -494,24 +480,8 @@ def test_multi_daemon_subactors(
    stream.
    '''
    non_linux = _non_linux
    if non_linux and ctlc:
        pytest.skip(
            'Ctl-c + MacOS is too unreliable/racy for this test..\n'
        )
        # !TODO, if someone with more patience then i wants to muck
        # with the timings on this please feel free to see all the
        # `non_linux` branching logic i added on my first attempt
        # below!
        #
        # my conclusion was that if i were to run the script
        # manually, and thus as slowly as a human would, the test
        # would and should pass as described in this test fn, however
        # after fighting with it for >= 1hr. i decided more then
        # likely the more extensive `linux` testing should cover most
        # regressions.
    child = spawn('multi_daemon_subactors')
    child.expect(PROMPT)
    # there can be a race for which subactor will acquire
@ -541,19 +511,8 @@ def test_multi_daemon_subactors(
    else:
        raise ValueError('Neither log msg was found !?')
    non_linux_delay: float = 0.3
    if ctlc:
-        do_ctlc(
+        do_ctlc(child)
            child,
            delay=(
                non_linux_delay
                if non_linux
                else None
            ),
        )
        if non_linux:
            time.sleep(1)
    # NOTE: previously since we did not have clobber prevention
    # in the root actor this final resume could result in the debugger
@ -584,66 +543,33 @@ def test_multi_daemon_subactors(
    # assert "in use by child ('bp_forever'," in before
    if ctlc:
-        do_ctlc(
+        do_ctlc(child)
            child,
            delay=(
                non_linux_delay
                if non_linux
                else None
            ),
        )
        if non_linux:
            time.sleep(1)
    # expect another breakpoint actor entry
    child.sendline('c')
    child.expect(PROMPT)
    try:
-        before: str = assert_before(
+        assert_before(
            child,
            bp_forev_parts,
        )
    except AssertionError:
-        before: str = assert_before(
+        assert_before(
            child,
            name_error_parts,
        )
    else:
        if ctlc:
-            before: str = do_ctlc(
+            do_ctlc(child)
                child,
                delay=(
                    non_linux_delay
                    if non_linux
                    else None
                ),
            )
            if non_linux:
                time.sleep(1)
        # should crash with the 2nd name error (simulates
        # a retry) and then the root eventually (boxed) errors
        # after 1 or more further bp actor entries.
        child.sendline('c')
-        try:
+        child.expect(PROMPT)
            child.expect(
                PROMPT,
                timeout=3,
            )
        except EOF:
            before: str = child.before.decode()
            print(
                f'\n'
                f'??? NEVER RXED `pdb` PROMPT ???\n'
                f'\n'
                f'{before}\n'
            )
            raise
        assert_before(
            child,
            name_error_parts,
@ -763,8 +689,7 @@ def test_multi_subactors_root_errors(
@has_nested_actors
 def test_multi_nested_subactors_error_through_nurseries(
-    ci_env: bool,
+    spawn,
    spawn: PexpectSpawner,
    # TODO: address debugger issue for nested tree:
    # https://github.com/goodboy/tractor/issues/320
@ -785,29 +710,9 @@ def test_multi_nested_subactors_error_through_nurseries(
    # timed_out_early: bool = False
-    for (
+    for send_char in itertools.cycle(['c', 'q']):
        i,
        send_char,
    ) in enumerate(itertools.cycle(['c', 'q'])):
        timeout: float = -1
        if (
            _non_linux
            and
            ci_env
        ):
            timeout: float = 6
        # XXX linux but the first crash sequence
        # can take longer to arrive at a prompt.
        elif i == 0:
            timeout = 5
        try:
-            child.expect(
+            child.expect(PROMPT)
                PROMPT,
                timeout=timeout,
            )
            child.sendline(send_char)
            time.sleep(0.01)
@ -984,11 +889,6 @@ def test_different_debug_mode_per_actor(
    )
 # skip on non-Linux CI
@pytest.mark.ctlcs_bish(
    _non_linux,
    _ci_env,
 )
 def test_post_mortem_api(
    spawn,
    ctlc: bool,
@ -1233,21 +1133,12 @@ def test_ctxep_pauses_n_maybe_ipc_breaks(
            # closed so verify we see error reporting as well as
            # a failed crash-REPL request msg and can CTL-c our way
            # out.
            # ?TODO, match depending on `tpt_proto(s)`?
            # - [ ] how can we pass it into the script tho?
            tpt: str = 'UDS'
            if _non_linux:
                tpt: str = 'TCP'
            assert_before(
                child,
                ['peer IPC channel closed abruptly?',
                 'another task closed this fd',
                 'Debug lock request was CANCELLED?',
-                 f"'Msgpack{tpt}Stream' was already closed locally?",
+                 "TransportClosed: 'MsgpackUDSStream' was already closed locally ?",]
                 f"TransportClosed: 'Msgpack{tpt}Stream' was already closed 'by peer'?",
                ]
                # XXX races on whether these show/hit?
                 # 'Failed to REPl via `_pause()` You called `tractor.pause()` from an already cancelled scope!',
--- a/tests/devx/test_pause_from_non_trio.py
+++ b/tests/devx/test_pause_from_non_trio.py
@ -63,9 +63,6 @@ def test_pause_from_sync(
    `examples/debugging/sync_bp.py`
    '''
    # XXX required for `breakpoint()` overload and
    # thus`tractor.devx.pause_from_sync()`.
    pytest.importorskip('greenback')
    child = spawn('sync_bp')
    # first `sync_pause()` after nurseries open
@ -263,9 +260,6 @@ def test_sync_pause_from_aio_task(
    `examples/debugging/asycio_bp.py`
    '''
    # XXX required for `breakpoint()` overload and
    # thus`tractor.devx.pause_from_sync()`.
    pytest.importorskip('greenback')
    child = spawn('asyncio_bp')
    # RACE on whether trio/asyncio task bps first
--- a/tests/devx/test_tooling.py
+++ b/tests/devx/test_tooling.py
@ -31,9 +31,6 @@ from .conftest import (
    PROMPT,
    _pause_msg,
 )
 from ..conftest import (
    no_macos,
 )
 import pytest
 from pexpect.exceptions import (
@ -45,7 +42,6 @@ if TYPE_CHECKING:
    from ..conftest import PexpectSpawner
@no_macos
 def test_shield_pause(
    spawn: PexpectSpawner,
 ):
@ -61,7 +57,6 @@ def test_shield_pause(
    expect(
        child,
        'Yo my child hanging..?',
        timeout=3,
    )
    assert_before(
        child,
@ -126,7 +121,7 @@ def test_shield_pause(
        child.pid,
        signal.SIGINT,
    )
-    from tractor.runtime._supervise import _shutdown_msg
+    from tractor._supervise import _shutdown_msg
    expect(
        child,
        # 'Shutting down actor runtime',
@ -156,10 +151,8 @@ def test_breakpoint_hook_restored(
    calls used.
    '''
    # XXX required for `breakpoint()` overload and
    # thus`tractor.devx.pause_from_sync()`.
    pytest.importorskip('greenback')
    child = spawn('restore_builtin_breakpoint')
    child.expect(PROMPT)
    try:
        assert_before(
--- a/tests/discovery/init.py
+++ b/tests/discovery/init.py
--- a/tests/discovery/test_multiaddr.py
+++ b/tests/discovery/test_multiaddr.py
@ -1,376 +0,0 @@
 '''
 Multiaddr construction, parsing, and round-trip tests for
 `tractor.discovery._multiaddr.mk_maddr()` and
 `tractor.discovery._multiaddr.parse_maddr()`.
 '''
 from pathlib import Path
 from types import SimpleNamespace
 import pytest
 from multiaddr import Multiaddr
 from tractor.ipc._tcp import TCPAddress
 from tractor.ipc._uds import UDSAddress
 from tractor.discovery._multiaddr import (
    mk_maddr,
    parse_maddr,
    parse_endpoints,
    _tpt_proto_to_maddr,
    _maddr_to_tpt_proto,
 )
 from tractor.discovery._addr import wrap_address
 def test_tpt_proto_to_maddr_mapping():
    '''
    `_tpt_proto_to_maddr` maps all supported `proto_key`
    values to their correct multiaddr protocol names.
    '''
    assert _tpt_proto_to_maddr['tcp'] == 'tcp'
    assert _tpt_proto_to_maddr['uds'] == 'unix'
    assert len(_tpt_proto_to_maddr) == 2
 def test_mk_maddr_tcp_ipv4():
    '''
    `mk_maddr()` on a `TCPAddress` with an IPv4 host
    produces the correct `/ip4/<host>/tcp/<port>` multiaddr.
    '''
    addr = TCPAddress('127.0.0.1', 1234)
    result: Multiaddr = mk_maddr(addr)
    assert isinstance(result, Multiaddr)
    assert str(result) == '/ip4/127.0.0.1/tcp/1234'
    protos = result.protocols()
    assert protos[0].name == 'ip4'
    assert protos[1].name == 'tcp'
    assert result.value_for_protocol('ip4') == '127.0.0.1'
    assert result.value_for_protocol('tcp') == '1234'
 def test_mk_maddr_tcp_ipv6():
    '''
    `mk_maddr()` on a `TCPAddress` with an IPv6 host
    produces the correct `/ip6/<host>/tcp/<port>` multiaddr.
    '''
    addr = TCPAddress('::1', 5678)
    result: Multiaddr = mk_maddr(addr)
    assert str(result) == '/ip6/::1/tcp/5678'
    protos = result.protocols()
    assert protos[0].name == 'ip6'
    assert protos[1].name == 'tcp'
 def test_mk_maddr_uds():
    '''
    `mk_maddr()` on a `UDSAddress` produces a `/unix/<path>`
    multiaddr containing the full socket path.
    '''
    # NOTE, use an absolute `filedir` to match real runtime
    # UDS paths; `mk_maddr()` strips the leading `/` to avoid
    # the double-slash `/unix//run/..` that py-multiaddr
    # rejects as "empty protocol path".
    filedir = '/tmp/tractor_test'
    filename = 'test_sock.sock'
    addr = UDSAddress(
        filedir=filedir,
        filename=filename,
    )
    result: Multiaddr = mk_maddr(addr)
    assert isinstance(result, Multiaddr)
    result_str: str = str(result)
    assert result_str.startswith('/unix/')
    # verify the leading `/` was stripped to avoid double-slash
    assert '/unix/tmp/tractor_test/' in result_str
    sockpath_rel: str = str(
        Path(filedir) / filename
    ).lstrip('/')
    unix_val: str = result.value_for_protocol('unix')
    assert unix_val.endswith(sockpath_rel)
 def test_mk_maddr_unsupported_proto_key():
    '''
    `mk_maddr()` raises `ValueError` for an unsupported
    `proto_key`.
    '''
    fake_addr = SimpleNamespace(proto_key='quic')
    with pytest.raises(
        ValueError,
        match='Unsupported proto_key',
    ):
        mk_maddr(fake_addr)
@pytest.mark.parametrize(
    'addr',
    [
        pytest.param(
            TCPAddress('127.0.0.1', 9999),
            id='tcp-ipv4',
        ),
        pytest.param(
            UDSAddress(
                filedir='/tmp/tractor_rt',
                filename='roundtrip.sock',
            ),
            id='uds',
        ),
    ],
 )
 def test_mk_maddr_roundtrip(addr):
    '''
    `mk_maddr()` output is valid multiaddr syntax that the
    library can re-parse back into an equivalent `Multiaddr`.
    '''
    maddr: Multiaddr = mk_maddr(addr)
    reparsed = Multiaddr(str(maddr))
    assert reparsed == maddr
    assert str(reparsed) == str(maddr)
 # ------ parse_maddr() tests ------
 def test_maddr_to_tpt_proto_mapping():
    '''
    `_maddr_to_tpt_proto` is the exact inverse of
    `_tpt_proto_to_maddr`.
    '''
    assert _maddr_to_tpt_proto == {
        'tcp': 'tcp',
        'unix': 'uds',
    }
 def test_parse_maddr_tcp_ipv4():
    '''
    `parse_maddr()` on an IPv4 TCP multiaddr string
    produce a `TCPAddress` with the correct host and port.
    '''
    result = parse_maddr('/ip4/127.0.0.1/tcp/1234')
    assert isinstance(result, TCPAddress)
    assert result.unwrap() == ('127.0.0.1', 1234)
 def test_parse_maddr_tcp_ipv6():
    '''
    `parse_maddr()` on an IPv6 TCP multiaddr string
    produce a `TCPAddress` with the correct host and port.
    '''
    result = parse_maddr('/ip6/::1/tcp/5678')
    assert isinstance(result, TCPAddress)
    assert result.unwrap() == ('::1', 5678)
 def test_parse_maddr_uds():
    '''
    `parse_maddr()` on a `/unix/...` multiaddr string
    produce a `UDSAddress` with the correct dir and filename,
    preserving absolute path semantics.
    '''
    result = parse_maddr('/unix/tmp/tractor_test/test.sock')
    assert isinstance(result, UDSAddress)
    filedir, filename = result.unwrap()
    assert filename == 'test.sock'
    assert str(filedir) == '/tmp/tractor_test'
 def test_parse_maddr_unsupported():
    '''
    `parse_maddr()` raise `ValueError` for an unsupported
    protocol combination like UDP.
    '''
    with pytest.raises(
        ValueError,
        match='Unsupported multiaddr protocol combo',
    ):
        parse_maddr('/ip4/127.0.0.1/udp/1234')
@pytest.mark.parametrize(
    'addr',
    [
        pytest.param(
            TCPAddress('127.0.0.1', 9999),
            id='tcp-ipv4',
        ),
        pytest.param(
            UDSAddress(
                filedir='/tmp/tractor_rt',
                filename='roundtrip.sock',
            ),
            id='uds',
        ),
    ],
 )
 def test_parse_maddr_roundtrip(addr):
    '''
    Full round-trip: `addr -> mk_maddr -> str -> parse_maddr`
    produce an `Address` whose `.unwrap()` matches the original.
    '''
    maddr: Multiaddr = mk_maddr(addr)
    maddr_str: str = str(maddr)
    parsed = parse_maddr(maddr_str)
    assert type(parsed) is type(addr)
    assert parsed.unwrap() == addr.unwrap()
 def test_wrap_address_maddr_str():
    '''
    `wrap_address()` accept a multiaddr-format string and
    return the correct `Address` type.
    '''
    result = wrap_address('/ip4/127.0.0.1/tcp/9999')
    assert isinstance(result, TCPAddress)
    assert result.unwrap() == ('127.0.0.1', 9999)
 # ------ parse_endpoints() tests ------
 def test_parse_endpoints_tcp_only():
    '''
    `parse_endpoints()` with a single TCP maddr per actor
    produce the correct `TCPAddress` instances.
    '''
    table = {
        'registry': ['/ip4/127.0.0.1/tcp/1616'],
        'data_feed': ['/ip4/0.0.0.0/tcp/5555'],
    }
    result = parse_endpoints(table)
    assert set(result.keys()) == {'registry', 'data_feed'}
    reg_addr = result['registry'][0]
    assert isinstance(reg_addr, TCPAddress)
    assert reg_addr.unwrap() == ('127.0.0.1', 1616)
    feed_addr = result['data_feed'][0]
    assert isinstance(feed_addr, TCPAddress)
    assert feed_addr.unwrap() == ('0.0.0.0', 5555)
 def test_parse_endpoints_mixed_tpts():
    '''
    `parse_endpoints()` with both TCP and UDS maddrs for
    the same actor produce the correct mixed `Address` list.
    '''
    table = {
        'broker': [
            '/ip4/127.0.0.1/tcp/4040',
            '/unix/tmp/tractor/broker.sock',
        ],
    }
    result = parse_endpoints(table)
    addrs = result['broker']
    assert len(addrs) == 2
    assert isinstance(addrs[0], TCPAddress)
    assert addrs[0].unwrap() == ('127.0.0.1', 4040)
    assert isinstance(addrs[1], UDSAddress)
    filedir, filename = addrs[1].unwrap()
    assert filename == 'broker.sock'
    assert str(filedir) == '/tmp/tractor'
 def test_parse_endpoints_unwrapped_tuples():
    '''
    `parse_endpoints()` accept raw `(host, port)` tuples
    and wrap them as `TCPAddress`.
    '''
    table = {
        'ems': [('127.0.0.1', 6666)],
    }
    result = parse_endpoints(table)
    addr = result['ems'][0]
    assert isinstance(addr, TCPAddress)
    assert addr.unwrap() == ('127.0.0.1', 6666)
 def test_parse_endpoints_mixed_str_and_tuple():
    '''
    `parse_endpoints()` accept a mix of maddr strings and
    raw tuples in the same actor entry list.
    '''
    table = {
        'quoter': [
            '/ip4/127.0.0.1/tcp/7777',
            ('127.0.0.1', 8888),
        ],
    }
    result = parse_endpoints(table)
    addrs = result['quoter']
    assert len(addrs) == 2
    assert isinstance(addrs[0], TCPAddress)
    assert addrs[0].unwrap() == ('127.0.0.1', 7777)
    assert isinstance(addrs[1], TCPAddress)
    assert addrs[1].unwrap() == ('127.0.0.1', 8888)
 def test_parse_endpoints_unsupported_proto():
    '''
    `parse_endpoints()` raise `ValueError` when a maddr
    string uses an unsupported protocol like `/udp/`.
    '''
    table = {
        'bad_actor': ['/ip4/127.0.0.1/udp/9999'],
    }
    with pytest.raises(
        ValueError,
        match='Unsupported multiaddr protocol combo',
    ):
        parse_endpoints(table)
 def test_parse_endpoints_empty_table():
    '''
    `parse_endpoints()` on an empty table return an empty
    dict.
    '''
    assert parse_endpoints({}) == {}
 def test_parse_endpoints_empty_actor_list():
    '''
    `parse_endpoints()` with an actor mapped to an empty
    list preserve the key with an empty list value.
    '''
    result = parse_endpoints({'x': []})
    assert result == {'x': []}
--- a/tests/discovery/test_tpt_bind_addrs.py
+++ b/tests/discovery/test_tpt_bind_addrs.py
@ -1,345 +0,0 @@
 '''
 `open_root_actor(tpt_bind_addrs=...)` test suite.
 Verify all three runtime code paths for explicit IPC-server
 bind-address selection in `_root.py`:
 1. Non-registrar, no explicit bind -> random addrs from registry proto
 2. Registrar, no explicit bind -> binds to registry_addrs
 3. Explicit bind given -> wraps via `wrap_address()` and uses them
 '''
 import pytest
 import trio
 import tractor
 from tractor.discovery._addr import (
    wrap_address,
 )
 from tractor.discovery._multiaddr import mk_maddr
 from tractor._testing.addr import get_rando_addr
 # ------------------------------------------------------------------
 # helpers
 # ------------------------------------------------------------------
 def _bound_bindspaces(
    actor: tractor.Actor,
 ) -> set[str]:
    '''
    Collect the set of bindspace strings from the actor's
    currently bound IPC-server accept addresses.
    '''
    return {
        wrap_address(a).bindspace
        for a in actor.accept_addrs
    }
 def _bound_wrapped(
    actor: tractor.Actor,
 ) -> list:
    '''
    Return the actor's accept addrs as wrapped `Address` objects.
    '''
    return [
        wrap_address(a)
        for a in actor.accept_addrs
    ]
 # ------------------------------------------------------------------
 # 1) Registrar + explicit tpt_bind_addrs
 # ------------------------------------------------------------------
@pytest.mark.parametrize(
    'addr_combo',
    [
        'bind-eq-reg',
        'bind-subset-reg',
        'bind-disjoint-reg',
    ],
    ids=lambda v: v,
 )
 def test_registrar_root_tpt_bind_addrs(
    reg_addr: tuple,
    tpt_proto: str,
    debug_mode: bool,
    addr_combo: str,
 ):
    '''
    Registrar root-actor with explicit `tpt_bind_addrs`:
    bound set must include all registry + all bind addr bindspaces
    (merge behavior).
    '''
    reg_wrapped = wrap_address(reg_addr)
    if addr_combo == 'bind-eq-reg':
        bind_addrs = [reg_addr]
        # extra secondary reg addr for subset test
        extra_reg = []
    elif addr_combo == 'bind-subset-reg':
        second_reg = get_rando_addr(tpt_proto)
        bind_addrs = [reg_addr]
        extra_reg = [second_reg]
    elif addr_combo == 'bind-disjoint-reg':
        # port=0 on same host -> completely different addr
        rando = wrap_address(reg_addr).get_random(
            bindspace=reg_wrapped.bindspace,
        )
        bind_addrs = [rando.unwrap()]
        extra_reg = []
    all_reg = [reg_addr] + extra_reg
    async def _main():
        async with tractor.open_root_actor(
            registry_addrs=all_reg,
            tpt_bind_addrs=bind_addrs,
            debug_mode=debug_mode,
        ):
            actor = tractor.current_actor()
            assert actor.is_registrar
            bound = actor.accept_addrs
            bound_bs = _bound_bindspaces(actor)
            # all registry bindspaces must appear in bound set
            for ra in all_reg:
                assert wrap_address(ra).bindspace in bound_bs
            # all bind-addr bindspaces must appear
            for ba in bind_addrs:
                assert wrap_address(ba).bindspace in bound_bs
            # registry addr must appear verbatim in bound
            # (after wrapping both sides for comparison)
            bound_w = _bound_wrapped(actor)
            assert reg_wrapped in bound_w
            if addr_combo == 'bind-disjoint-reg':
                assert len(bound) >= 2
    trio.run(_main)
@pytest.mark.parametrize(
    'addr_combo',
    [
        'bind-same-bindspace',
        'bind-disjoint',
    ],
    ids=lambda v: v,
 )
 def test_non_registrar_root_tpt_bind_addrs(
    daemon,
    reg_addr: tuple,
    tpt_proto: str,
    debug_mode: bool,
    addr_combo: str,
 ):
    '''
    Non-registrar root with explicit `tpt_bind_addrs`:
    bound set must exactly match the requested bind addrs
    (no merge with registry).
    '''
    reg_wrapped = wrap_address(reg_addr)
    if addr_combo == 'bind-same-bindspace':
        # same bindspace as reg but port=0 so we get a random port
        rando = reg_wrapped.get_random(
            bindspace=reg_wrapped.bindspace,
        )
        bind_addrs = [rando.unwrap()]
    elif addr_combo == 'bind-disjoint':
        rando = reg_wrapped.get_random(
            bindspace=reg_wrapped.bindspace,
        )
        bind_addrs = [rando.unwrap()]
    async def _main():
        async with tractor.open_root_actor(
            registry_addrs=[reg_addr],
            tpt_bind_addrs=bind_addrs,
            debug_mode=debug_mode,
        ):
            actor = tractor.current_actor()
            assert not actor.is_registrar
            bound = actor.accept_addrs
            assert len(bound) == len(bind_addrs)
            # bindspaces must match
            bound_bs = _bound_bindspaces(actor)
            for ba in bind_addrs:
                assert wrap_address(ba).bindspace in bound_bs
            # TCP port=0 should resolve to a real port
            for uw_addr in bound:
                w = wrap_address(uw_addr)
                if w.proto_key == 'tcp':
                    _host, port = uw_addr
                    assert port > 0
    trio.run(_main)
 # ------------------------------------------------------------------
 # 3) Non-registrar, default random bind (baseline)
 # ------------------------------------------------------------------
 def test_non_registrar_default_random_bind(
    daemon,
    reg_addr: tuple,
    debug_mode: bool,
 ):
    '''
    Baseline: no `tpt_bind_addrs`, daemon running.
    Bound bindspace matches registry bindspace,
    but bound addr differs from reg_addr (random).
    '''
    reg_wrapped = wrap_address(reg_addr)
    async def _main():
        async with tractor.open_root_actor(
            registry_addrs=[reg_addr],
            debug_mode=debug_mode,
        ):
            actor = tractor.current_actor()
            assert not actor.is_registrar
            bound_bs = _bound_bindspaces(actor)
            assert reg_wrapped.bindspace in bound_bs
            # bound addr should differ from the registry addr
            # (the runtime picks a random port/path)
            bound_w = _bound_wrapped(actor)
            assert reg_wrapped not in bound_w
    trio.run(_main)
 # ------------------------------------------------------------------
 # 4) Multiaddr string input
 # ------------------------------------------------------------------
 def test_tpt_bind_addrs_as_maddr_str(
    reg_addr: tuple,
    debug_mode: bool,
 ):
    '''
    Pass multiaddr strings as `tpt_bind_addrs`.
    Runtime should parse and bind successfully.
    '''
    reg_wrapped = wrap_address(reg_addr)
    # build a port-0 / random maddr string for binding
    rando = reg_wrapped.get_random(
        bindspace=reg_wrapped.bindspace,
    )
    maddr_str: str = str(mk_maddr(rando))
    async def _main():
        async with tractor.open_root_actor(
            registry_addrs=[reg_addr],
            tpt_bind_addrs=[maddr_str],
            debug_mode=debug_mode,
        ):
            actor = tractor.current_actor()
            assert actor.is_registrar
            for uw_addr in actor.accept_addrs:
                w = wrap_address(uw_addr)
                if w.proto_key == 'tcp':
                    _host, port = uw_addr
                    assert port > 0
    trio.run(_main)
 # ------------------------------------------------------------------
 # 5) Registrar merge produces union of binds
 # ------------------------------------------------------------------
 def test_registrar_merge_binds_union(
    tpt_proto: str,
    debug_mode: bool,
 ):
    '''
    Registrar + disjoint bind addr: bound set must include
    both registry and explicit bind addresses.
    '''
    reg_addr = get_rando_addr(tpt_proto)
    reg_wrapped = wrap_address(reg_addr)
    rando = reg_wrapped.get_random(
        bindspace=reg_wrapped.bindspace,
    )
    bind_addrs = [rando.unwrap()]
    # NOTE: for UDS, `get_random()` produces the same
    # filename for the same pid+actor-state, so the
    # "disjoint" premise only holds when the addrs
    # actually differ (always true for TCP, may
    # collide for UDS).
    expect_disjoint: bool = (
        tuple(reg_addr) != rando.unwrap()
    )
    async def _main():
        async with tractor.open_root_actor(
            registry_addrs=[reg_addr],
            tpt_bind_addrs=bind_addrs,
            debug_mode=debug_mode,
        ):
            actor = tractor.current_actor()
            assert actor.is_registrar
            bound = actor.accept_addrs
            bound_w = _bound_wrapped(actor)
            if expect_disjoint:
                # must have at least 2 (registry + bind)
                assert len(bound) >= 2
            # registry addr must appear in bound set
            assert reg_wrapped in bound_w
    trio.run(_main)
 # ------------------------------------------------------------------
 # 6) open_nursery forwards tpt_bind_addrs
 # ------------------------------------------------------------------
 def test_open_nursery_forwards_tpt_bind_addrs(
    reg_addr: tuple,
    debug_mode: bool,
 ):
    '''
    `open_nursery(tpt_bind_addrs=...)` forwards through
    `**kwargs` to `open_root_actor()`.
    '''
    reg_wrapped = wrap_address(reg_addr)
    rando = reg_wrapped.get_random(
        bindspace=reg_wrapped.bindspace,
    )
    bind_addrs = [rando.unwrap()]
    async def _main():
        async with tractor.open_nursery(
            registry_addrs=[reg_addr],
            tpt_bind_addrs=bind_addrs,
            debug_mode=debug_mode,
        ):
            actor = tractor.current_actor()
            bound_bs = _bound_bindspaces(actor)
            for ba in bind_addrs:
                assert wrap_address(ba).bindspace in bound_bs
    trio.run(_main)
--- a/tests/ipc/test_each_tpt.py
+++ b/tests/ipc/test_each_tpt.py
@ -8,16 +8,17 @@ from pathlib import Path
 import pytest
 import trio
 import tractor
-from tractor import Actor
+from tractor import (
-from tractor.runtime import _state
+    Actor,
-from tractor.discovery import _addr
+    _state,
    _addr,
 )
@pytest.fixture
 def bindspace_dir_str() -> str:
-    from tractor.runtime._state import get_rt_dir
+    rt_dir: Path = tractor._state.get_rt_dir()
    rt_dir: Path = get_rt_dir()
    bs_dir: Path = rt_dir / 'doggy'
    bs_dir_str: str = str(bs_dir)
    assert not bs_dir.is_dir()
--- a/tests/ipc/test_multi_tpt.py
+++ b/tests/ipc/test_multi_tpt.py
@ -13,9 +13,9 @@ from tractor import (
    Portal,
    ipc,
    msg,
    _state,
    _addr,
 )
 from tractor.runtime import _state
 from tractor.discovery import _addr
@tractor.context
 async def chk_tpts(
@ -62,13 +62,6 @@ def test_root_passes_tpt_to_sub(
    reg_addr: tuple,
    debug_mode: bool,
 ):
    # XXX NOTE, the `reg_addr` addr won't be the same type as the
    # `tpt_proto_key` would deliver here unless you pass `--tpt-proto
    # <tpt_proto_key>` on the CLI.
    #
    # if tpt_proto_key == 'uds':
    #     breakpoint()
    async def main():
        async with tractor.open_nursery(
            enable_transports=[tpt_proto_key],
--- a/tests/msg/init.py
+++ b/tests/msg/init.py
@ -1,4 +0,0 @@
 '''
 `tractor.msg.*` sub-sys test suite.
 '''
--- a/tests/msg/conftest.py
+++ b/tests/msg/conftest.py
@ -1,4 +0,0 @@
 '''
 `tractor.msg.*` test sub-pkg conf.
 '''
--- a/tests/msg/test_pretty_struct.py
+++ b/tests/msg/test_pretty_struct.py
@ -1,240 +0,0 @@
 '''
 Unit tests for `tractor.msg.pretty_struct`
 private-field filtering in `pformat()`.
 '''
 import pytest
 from tractor.msg.pretty_struct import (
    Struct,
    pformat,
    iter_struct_ppfmt_lines,
 )
 from tractor.msg._codec import (
    MsgDec,
    mk_dec,
 )
 # ------ test struct definitions ------ #
 class PublicOnly(Struct):
    '''
    All-public fields for baseline testing.
    '''
    name: str = 'alice'
    age: int = 30
 class PrivateOnly(Struct):
    '''
    Only underscore-prefixed (private) fields.
    '''
    _secret: str = 'hidden'
    _internal: int = 99
 class MixedFields(Struct):
    '''
    Mix of public and private fields.
    '''
    name: str = 'bob'
    _hidden: int = 42
    value: float = 3.14
    _meta: str = 'internal'
 class Inner(
    Struct,
    frozen=True,
 ):
    '''
    Frozen inner struct with a private field,
    for nesting tests.
    '''
    x: int = 1
    _secret: str = 'nope'
 class Outer(Struct):
    '''
    Outer struct nesting an `Inner`.
    '''
    label: str = 'outer'
    inner: Inner = Inner()
 class EmptyStruct(Struct):
    '''
    Struct with zero fields.
    '''
    pass
 # ------ tests ------ #
@pytest.mark.parametrize(
    'struct_and_expected',
    [
        (
            PublicOnly(),
            {
                'shown': ['name', 'age'],
                'hidden': [],
            },
        ),
        (
            MixedFields(),
            {
                'shown': ['name', 'value'],
                'hidden': ['_hidden', '_meta'],
            },
        ),
        (
            PrivateOnly(),
            {
                'shown': [],
                'hidden': ['_secret', '_internal'],
            },
        ),
    ],
    ids=[
        'all-public',
        'mixed-pub-priv',
        'all-private',
    ],
 )
 def test_field_visibility_in_pformat(
    struct_and_expected: tuple[
        Struct,
        dict[str, list[str]],
    ],
 ):
    '''
    Verify `pformat()` shows public fields
    and hides `_`-prefixed private fields.
    '''
    (
        struct,
        expected,
    ) = struct_and_expected
    output: str = pformat(struct)
    for field_name in expected['shown']:
        assert field_name in output, (
            f'{field_name!r} should appear in:\n'
            f'{output}'
        )
    for field_name in expected['hidden']:
        assert field_name not in output, (
            f'{field_name!r} should NOT appear in:\n'
            f'{output}'
        )
 def test_iter_ppfmt_lines_skips_private():
    '''
    Directly verify `iter_struct_ppfmt_lines()`
    never yields tuples with `_`-prefixed field
    names.
    '''
    struct = MixedFields()
    lines: list[tuple[str, str]] = list(
        iter_struct_ppfmt_lines(
            struct,
            field_indent=2,
        )
    )
    # should have lines for public fields only
    assert len(lines) == 2
    for _prefix, line_content in lines:
        field_name: str = (
            line_content.split(':')[0].strip()
        )
        assert not field_name.startswith('_'), (
            f'private field leaked: {field_name!r}'
        )
 def test_nested_struct_filters_inner_private():
    '''
    Verify that nested struct's private fields
    are also filtered out during recursion.
    '''
    outer = Outer()
    output: str = pformat(outer)
    # outer's public field
    assert 'label' in output
    # inner's public field (recursed into)
    assert 'x' in output
    # inner's private field must be hidden
    assert '_secret' not in output
 def test_empty_struct_pformat():
    '''
    An empty struct should produce a valid
    `pformat()` result with no field lines.
    '''
    output: str = pformat(EmptyStruct())
    assert 'EmptyStruct(' in output
    assert output.rstrip().endswith(')')
    # no field lines => only struct header+footer
    lines: list[tuple[str, str]] = list(
        iter_struct_ppfmt_lines(
            EmptyStruct(),
            field_indent=2,
        )
    )
    assert lines == []
 def test_real_msgdec_pformat_hides_private():
    '''
    Verify `pformat()` on a real `MsgDec`
    hides the `_dec` internal field.
    NOTE: `MsgDec.__repr__` is custom and does
    NOT call `pformat()`, so we call it directly.
    '''
    dec: MsgDec = mk_dec(spec=int)
    output: str = pformat(dec)
    # the private `_dec` field should be filtered
    assert '_dec' not in output
    # but the struct type name should be present
    assert 'MsgDec(' in output
 def test_pformat_repr_integration():
    '''
    Verify that `Struct.__repr__()` (which calls
    `pformat()`) also hides private fields for
    custom structs that do NOT override `__repr__`.
    '''
    mixed = MixedFields()
    output: str = repr(mixed)
    assert 'name' in output
    assert 'value' in output
    assert '_hidden' not in output
    assert '_meta' not in output
--- a/tests/spawn/init.py
+++ b/tests/spawn/init.py
--- a/tests/spawn/test_subint_cancellation.py
+++ b/tests/spawn/test_subint_cancellation.py
@ -1,245 +0,0 @@
 '''
 Cancellation + hard-kill semantics audit for the `subint` spawn
 backend.
 Exercises the escape-hatch machinery added to
 `tractor.spawn._subint` (module-level `_HARD_KILL_TIMEOUT`,
 bounded shields around the soft-kill / thread-join sites, daemon
 driver-thread abandonment) so that future stdlib regressions or
 our own refactors don't silently re-introduce the hangs first
 diagnosed during the Phase B.2/B.3 bringup (issue #379).
 Every test in this module:
 - is wrapped in `trio.fail_after()` for a deterministic per-test
  wall-clock ceiling (the whole point of these tests is to fail
  fast when our escape hatches regress; an unbounded test would
  defeat itself),
 - arms `tractor.devx.dump_on_hang()` to capture a stack dump on
  failure — without it, a hang here is opaque because pytest's
  stderr capture swallows `faulthandler` output by default
  (hard-won lesson from the original diagnosis),
 - skips on py<3.13 (no `_interpreters`) and on any
  `--spawn-backend` other than `'subint'` (these tests are
  subint-specific by design — they'd be nonsense under `trio` or
  `mp_*`).
 '''
 from __future__ import annotations
 from functools import partial
 import pytest
 import trio
 import tractor
 from tractor.devx import dump_on_hang
 # Gate: the `subint` backend requires py3.14+. Check the
 # public stdlib wrapper's presence (added in 3.14) rather than
 # the private `_interpreters` module (which exists on 3.13 but
 # wedges under tractor's usage — see `tractor.spawn._subint`).
 pytest.importorskip('concurrent.interpreters')
 # Subint-only: read the spawn method that `pytest_configure`
 # committed via `try_set_start_method()`. By the time this module
 # imports, the CLI backend choice has been applied.
 from tractor.spawn._spawn import _spawn_method  # noqa: E402
 if _spawn_method != 'subint':
    pytestmark = pytest.mark.skip(
        reason=(
            "subint-specific cancellation audit — "
            "pass `--spawn-backend=subint` to run."
        ),
    )
 # ----------------------------------------------------------------
 # child-side task bodies (run inside the spawned subint)
 # ----------------------------------------------------------------
 async def _trivial_rpc() -> str:
    '''
    Minimal RPC body for the baseline happy-teardown test.
    '''
    return 'hello from subint'
 async def _spin_without_trio_checkpoints() -> None:
    '''
    Block the main task with NO trio-visible checkpoints so any
    `Portal.cancel_actor()` arriving over IPC has nothing to hand
    off to.
    `threading.Event.wait(timeout)` releases the GIL (so other
    threads — including trio's IO/RPC tasks — can progress) but
    does NOT insert a trio checkpoint, so the subactor's main
    task never notices cancellation.
    This is the exact "stuck subint" scenario the hard-kill
    shields exist to survive.
    '''
    import threading
    never_set = threading.Event()
    while not never_set.is_set():
        # 1s re-check granularity; low enough not to waste CPU,
        # high enough that even a pathologically slow
        # `_HARD_KILL_TIMEOUT` won't accidentally align with a
        # wake.
        never_set.wait(timeout=1.0)
 # ----------------------------------------------------------------
 # parent-side harnesses (driven inside `trio.run(...)`)
 # ----------------------------------------------------------------
 async def _happy_path(
    reg_addr: tuple[str, int|str],
    deadline: float,
 ) -> None:
    with trio.fail_after(deadline):
        async with (
            tractor.open_root_actor(
                registry_addrs=[reg_addr],
            ),
            tractor.open_nursery() as an,
        ):
            portal: tractor.Portal = await an.run_in_actor(
                _trivial_rpc,
                name='subint-happy',
            )
            result: str = await portal.wait_for_result()
            assert result == 'hello from subint'
 async def _spawn_stuck_then_cancel(
    reg_addr: tuple[str, int|str],
    deadline: float,
 ) -> None:
    with trio.fail_after(deadline):
        async with (
            tractor.open_root_actor(
                registry_addrs=[reg_addr],
            ),
            tractor.open_nursery() as an,
        ):
            await an.run_in_actor(
                _spin_without_trio_checkpoints,
                name='subint-stuck',
            )
            # Give the child time to reach its non-checkpointing
            # loop before we cancel; the precise value doesn't
            # matter as long as it's a handful of trio schedule
            # ticks.
            await trio.sleep(0.5)
            an.cancel_scope.cancel()
 # ----------------------------------------------------------------
 # tests
 # ----------------------------------------------------------------
 def test_subint_happy_teardown(
    reg_addr: tuple[str, int|str],
 ) -> None:
    '''
    Baseline: spawn a subactor, do one portal RPC, close nursery
    cleanly. No cancel, no faults.
    If this regresses we know something's wrong at the
    spawn/teardown layer unrelated to the hard-kill escape
    hatches.
    '''
    deadline: float = 10.0
    with dump_on_hang(
        seconds=deadline,
        path='/tmp/subint_cancellation_happy.dump',
    ):
        trio.run(partial(_happy_path, reg_addr, deadline))
@pytest.mark.skipon_spawn_backend(
    'subint',
    reason=(
        'XXX SUBINT HANGING TEST XXX\n'
        'See oustanding issue(s)\n'
        # TODO, put issue link!
    )
 )
 # Wall-clock bound via `pytest-timeout` (`method='thread'`)
 # as defense-in-depth over the inner `trio.fail_after(15)`.
 # Under the orphaned-channel hang class described in
 # `ai/conc-anal/subint_cancel_delivery_hang_issue.md`, SIGINT
 # is still deliverable and this test *should* be unwedgeable
 # by the inner trio timeout — but sibling subint-backend
 # tests in this repo have also exhibited the
 # `subint_sigint_starvation_issue.md` GIL-starvation flavor,
 # so `method='thread'` keeps us safe in case ordering or
 # load shifts the failure mode.
@pytest.mark.timeout(
    3,  # NOTE never passes pre-3.14+ subints support.
    method='thread',
 )
 def test_subint_non_checkpointing_child(
    reg_addr: tuple[str, int|str],
 ) -> None:
    '''
    Cancel a subactor whose main task is stuck in a non-
    checkpointing Python loop.
    `Portal.cancel_actor()` may be delivered over IPC but the
    main task never checkpoints to observe the Cancelled —
    so the subint's `trio.run()` can't exit gracefully.
    The parent `subint_proc` bounded-shield + daemon-driver-
    thread combo should abandon the thread after
    `_HARD_KILL_TIMEOUT` and let the parent return cleanly.
    Wall-clock budget:
    - ~0.5s: settle time for child to enter the stuck loop
    - ~3s: `_HARD_KILL_TIMEOUT` (soft-kill wait)
    - ~3s: `_HARD_KILL_TIMEOUT` (thread-join wait)
    - margin
    KNOWN ISSUE (Ctrl-C-able hang):
    -------------------------------
    This test currently hangs past the hard-kill timeout for
    reasons unrelated to the subint teardown itself — after
    the subint is destroyed, a parent-side trio task appears
    to park on an orphaned IPC channel (no clean EOF
    delivered to a waiting receive). Unlike the
    SIGINT-starvation sibling case in
    `test_stale_entry_is_deleted`, this hang IS Ctrl-C-able
    (`strace` shows SIGINT wakeup-fd `write() = 1`, not
    `EAGAIN`) — i.e. the main trio loop is still iterating
    normally. That makes this *our* bug to fix, not a
    CPython-level limitation.
    See `ai/conc-anal/subint_cancel_delivery_hang_issue.md`
    for the full analysis + candidate fix directions
    (explicit parent-side channel abort in `subint_proc`
    teardown being the most likely surgical fix).
    The sibling `ai/conc-anal/subint_sigint_starvation_issue.md`
    documents the *other* hang class (abandoned-legacy-subint
    thread + shared-GIL starvation → signal-wakeup-fd pipe
    fills → SIGINT silently dropped) — that one is
    structurally blocked on msgspec PEP 684 adoption and is
    NOT what this test is hitting.
    '''
    deadline: float = 15.0
    with dump_on_hang(
        seconds=deadline,
        path='/tmp/subint_cancellation_stuck.dump',
    ):
        trio.run(
            partial(
                _spawn_stuck_then_cancel,
                reg_addr,
                deadline,
            ),
        )
--- a/tests/spawn/test_subint_forkserver.py
+++ b/tests/spawn/test_subint_forkserver.py
@ -1,603 +0,0 @@
 '''
 Integration exercises for the `tractor.spawn._subint_forkserver`
 submodule at three tiers:
 1. the low-level primitives
   (`fork_from_worker_thread()` +
   `run_subint_in_worker_thread()`) driven from inside a real
   `trio.run()` in the parent process,
 2. the full `subint_forkserver_proc` spawn backend wired
   through tractor's normal actor-nursery + portal-RPC
   machinery — i.e. `open_root_actor` + `open_nursery` +
   `run_in_actor` against a subactor spawned via fork from a
   main-interp worker thread.
 Background
 ----------
 `ai/conc-anal/subint_fork_blocked_by_cpython_post_fork_issue.md`
 establishes that `os.fork()` from a non-main sub-interpreter
 aborts the child at the CPython level. The sibling
 `subint_fork_from_main_thread_smoketest.py` proves the escape
 hatch: fork from a main-interp *worker thread* (one that has
 never entered a subint) works, and the forked child can then
 host its own `trio.run()` inside a fresh subint.
 Those smoke-test scenarios are standalone — no trio runtime
 in the *parent*. Tiers (1)+(2) here cover the primitives
 driven from inside `trio.run()` in the parent, and tier (3)
 (the `*_spawn_basic` test) drives the registered
 `subint_forkserver` spawn backend end-to-end against the
 tractor runtime.
 Gating
 ------
 - py3.14+ (via `concurrent.interpreters` presence)
 - no `--spawn-backend` restriction — the backend-level test
  flips `tractor.spawn._spawn._spawn_method` programmatically
  (via `try_set_start_method('subint_forkserver')`) and
  restores it on teardown, so these tests are independent of
  the session-level CLI backend choice.
 '''
 from __future__ import annotations
 from functools import partial
 import os
 from pathlib import Path
 import platform
 import select
 import signal
 import subprocess
 import sys
 import time
 import pytest
 import trio
 import tractor
 from tractor.devx import dump_on_hang
 # Gate: subint forkserver primitives require py3.14+. Check
 # the public stdlib wrapper's presence (added in 3.14) rather
 # than `_interpreters` directly — see
 # `tractor.spawn._subint` for why.
 pytest.importorskip('concurrent.interpreters')
 from tractor.spawn._subint_forkserver import (  # noqa: E402
    fork_from_worker_thread,
    run_subint_in_worker_thread,
    wait_child,
 )
 from tractor.spawn import _spawn as _spawn_mod  # noqa: E402
 from tractor.spawn._spawn import try_set_start_method  # noqa: E402
 # ----------------------------------------------------------------
 # child-side callables (passed via `child_target=` across fork)
 # ----------------------------------------------------------------
 _CHILD_TRIO_BOOTSTRAP: str = (
    'import trio\n'
    'async def _main():\n'
    '    await trio.sleep(0.05)\n'
    '    return 42\n'
    'result = trio.run(_main)\n'
    'assert result == 42, f"trio.run returned {result}"\n'
 )
 def _child_trio_in_subint() -> int:
    '''
    `child_target` for the trio-in-child scenario: drive a
    trivial `trio.run()` inside a fresh legacy-config subint
    on a worker thread.
    Returns an exit code suitable for `os._exit()`:
    - 0: subint-hosted `trio.run()` succeeded
    - 3: driver thread hang (timeout inside `run_subint_in_worker_thread`)
    - 4: subint bootstrap raised some other exception
    '''
    try:
        run_subint_in_worker_thread(
            _CHILD_TRIO_BOOTSTRAP,
            thread_name='child-subint-trio-thread',
        )
    except RuntimeError:
        # timeout / thread-never-returned
        return 3
    except BaseException:
        return 4
    return 0
 # ----------------------------------------------------------------
 # parent-side harnesses (run inside `trio.run()`)
 # ----------------------------------------------------------------
 async def run_fork_in_non_trio_thread(
    deadline: float,
    *,
    child_target=None,
 ) -> int:
    '''
    From inside a parent `trio.run()`, off-load the
    forkserver primitive to a main-interp worker thread via
    `trio.to_thread.run_sync()` and return the forked child's
    pid.
    Then `wait_child()` on that pid (also off-loaded so we
    don't block trio's event loop on `waitpid()`) and assert
    the child exited cleanly.
    '''
    with trio.fail_after(deadline):
        # NOTE: `fork_from_worker_thread` internally spawns its
        # own dedicated `threading.Thread` (not from trio's
        # cache) and joins it before returning — so we can
        # safely off-load via `to_thread.run_sync` without
        # worrying about the trio-thread-cache recycling the
        # runner. Pass `abandon_on_cancel=False` for the
        # same "bounded + clean" rationale we use in
        # `_subint.subint_proc`.
        pid: int = await trio.to_thread.run_sync(
            partial(
                fork_from_worker_thread,
                child_target,
                thread_name='test-subint-forkserver',
            ),
            abandon_on_cancel=False,
        )
        assert pid > 0
        ok, status_str = await trio.to_thread.run_sync(
            partial(
                wait_child,
                pid,
                expect_exit_ok=True,
            ),
            abandon_on_cancel=False,
        )
        assert ok, (
            f'forked child did not exit cleanly: '
            f'{status_str}'
        )
        return pid
 # ----------------------------------------------------------------
 # tests
 # ----------------------------------------------------------------
 # Bounded wall-clock via `pytest-timeout` (`method='thread'`)
 # for the usual GIL-hostage safety reason documented in the
 # sibling `test_subint_cancellation.py` / the class-A
 # `subint_sigint_starvation_issue.md`. Each test also has an
 # inner `trio.fail_after()` so assertion failures fire fast
 # under normal conditions.
@pytest.mark.timeout(30, method='thread')
 def test_fork_from_worker_thread_via_trio(
 ) -> None:
    '''
    Baseline: inside `trio.run()`, call
    `fork_from_worker_thread()` via `trio.to_thread.run_sync()`,
    get a child pid back, reap the child cleanly.
    No trio-in-child. If this regresses we know the parent-
    side trio↔worker-thread plumbing is broken independent
    of any child-side subint machinery.
    '''
    deadline: float = 10.0
    with dump_on_hang(
        seconds=deadline,
        path='/tmp/subint_forkserver_baseline.dump',
    ):
        pid: int = trio.run(
            partial(run_fork_in_non_trio_thread, deadline),
        )
    # parent-side sanity — we got a real pid back.
    assert isinstance(pid, int) and pid > 0
    # by now the child has been waited on; it shouldn't be
    # reap-able again.
    with pytest.raises((ChildProcessError, OSError)):
        os.waitpid(pid, os.WNOHANG)
@pytest.mark.timeout(30, method='thread')
 def test_fork_and_run_trio_in_child() -> None:
    '''
    End-to-end: inside the parent's `trio.run()`, off-load
    `fork_from_worker_thread()` to a worker thread, have the
    forked child then create a fresh subint and run
    `trio.run()` inside it on yet another worker thread.
    This is the full "forkserver + trio-in-subint-in-child"
    pattern the proposed `subint_forkserver` spawn backend
    would rest on.
    '''
    deadline: float = 15.0
    with dump_on_hang(
        seconds=deadline,
        path='/tmp/subint_forkserver_trio_in_child.dump',
    ):
        pid: int = trio.run(
            partial(
                run_fork_in_non_trio_thread,
                deadline,
                child_target=_child_trio_in_subint,
            ),
        )
    assert isinstance(pid, int) and pid > 0
 # ----------------------------------------------------------------
 # tier-3 backend test: drive the registered `subint_forkserver`
 # spawn backend end-to-end through tractor's actor-nursery +
 # portal-RPC machinery.
 # ----------------------------------------------------------------
 async def _trivial_rpc() -> str:
    '''
    Minimal subactor-side RPC body: just return a sentinel
    string the parent can assert on.
    '''
    return 'hello from subint-forkserver child'
 async def _happy_path_forkserver(
    reg_addr: tuple[str, int | str],
    deadline: float,
 ) -> None:
    '''
    Parent-side harness: stand up a root actor, open an actor
    nursery, spawn one subactor via the currently-selected
    spawn backend (which this test will have flipped to
    `subint_forkserver`), run a trivial RPC through its
    portal, assert the round-trip result.
    '''
    with trio.fail_after(deadline):
        async with (
            tractor.open_root_actor(
                registry_addrs=[reg_addr],
            ),
            tractor.open_nursery() as an,
        ):
            portal: tractor.Portal = await an.run_in_actor(
                _trivial_rpc,
                name='subint-forkserver-child',
            )
            result: str = await portal.wait_for_result()
            assert result == 'hello from subint-forkserver child'
@pytest.fixture
 def forkserver_spawn_method():
    '''
    Flip `tractor.spawn._spawn._spawn_method` to
    `'subint_forkserver'` for the duration of a test, then
    restore whatever was in place before (usually the
    session-level CLI choice, typically `'trio'`).
    Without this, other tests in the same session would
    observe the global flip and start spawning via fork —
    which is almost certainly NOT what their assertions were
    written against.
    '''
    prev_method: str = _spawn_mod._spawn_method
    prev_ctx = _spawn_mod._ctx
    try_set_start_method('subint_forkserver')
    try:
        yield
    finally:
        _spawn_mod._spawn_method = prev_method
        _spawn_mod._ctx = prev_ctx
@pytest.mark.timeout(60, method='thread')
 def test_subint_forkserver_spawn_basic(
    reg_addr: tuple[str, int | str],
    forkserver_spawn_method,
 ) -> None:
    '''
    Happy-path: spawn ONE subactor via the
    `subint_forkserver` backend (parent-side fork from a
    main-interp worker thread), do a trivial portal-RPC
    round-trip, tear the nursery down cleanly.
    If this passes, the "forkserver + tractor runtime" arch
    is proven end-to-end: the registered
    `subint_forkserver_proc` spawn target successfully
    forks a child, the child runs `_actor_child_main()` +
    completes IPC handshake + serves an RPC, and the parent
    reaps via `_ForkedProc.wait()` without regressing any of
    the normal nursery teardown invariants.
    '''
    deadline: float = 20.0
    with dump_on_hang(
        seconds=deadline,
        path='/tmp/subint_forkserver_spawn_basic.dump',
    ):
        trio.run(
            partial(
                _happy_path_forkserver,
                reg_addr,
                deadline,
            ),
        )
 # ----------------------------------------------------------------
 # tier-4 DRAFT: orphaned-subactor SIGINT survivability
 #
 # Motivating question: with `subint_forkserver`, the child's
 # `trio.run()` lives on the fork-inherited worker thread which
 # is NOT `threading.main_thread()` — so trio cannot install its
 # `signal.set_wakeup_fd`-based SIGINT handler. If the parent
 # goes away via `SIGKILL` (no IPC `Portal.cancel_actor()`
 # possible), does SIGINT on the orphan child cleanly tear it
 # down via CPython's default `KeyboardInterrupt` delivery, or
 # does it hang?
 #
 # Working hypothesis (unverified pre-this-test): post-fork the
 # child is effectively single-threaded (only the fork-worker
 # tstate survived), so SIGINT → default handler → raises
 # `KeyboardInterrupt` on the only thread — which happens to be
 # the one driving trio's event loop — so trio observes it at
 # the next checkpoint. If so, we're "fine" on this backend
 # despite the missing trio SIGINT handler.
 #
 # Cross-backend generalization (decide after this passes):
 # - applicable to any backend whose subactors are separate OS
 #   processes: `trio`, `mp_spawn`, `mp_forkserver`,
 #   `subint_forkserver`.
 # - NOT applicable to plain `subint` (subactors are in-process
 #   subinterpreters, no orphan child process to SIGINT).
 # - move path: lift the harness script into
 #   `tests/_orphan_harness.py`, parametrize on the session's
 #   `_spawn_method`, add `skipif _spawn_method == 'subint'`.
 # ----------------------------------------------------------------
 _ORPHAN_HARNESS_SCRIPT: str = '''
 import os
 import sys
 import trio
 import tractor
 from tractor.spawn._spawn import try_set_start_method
 async def _sleep_forever() -> None:
    print(f"CHILD_PID={os.getpid()}", flush=True)
    await trio.sleep_forever()
 async def _main(reg_addr):
    async with (
        tractor.open_root_actor(registry_addrs=[reg_addr]),
        tractor.open_nursery() as an,
    ):
        portal = await an.run_in_actor(
            _sleep_forever,
            name="orphan-test-child",
        )
        print(f"PARENT_READY={os.getpid()}", flush=True)
        await trio.sleep_forever()
 if __name__ == "__main__":
    backend = sys.argv[1]
    host = sys.argv[2]
    port = int(sys.argv[3])
    try_set_start_method(backend)
    trio.run(_main, (host, port))
 '''
 def _read_marker(
    proc: subprocess.Popen,
    marker: str,
    timeout: float,
    _buf: dict,
 ) -> str:
    '''
    Block until `<marker>=<value>\\n` appears on `proc.stdout`
    and return `<value>`. Uses a per-proc byte buffer (`_buf`)
    to carry partial lines across calls.
    '''
    deadline: float = time.monotonic() + timeout
    remainder: bytes = _buf.get('remainder', b'')
    prefix: bytes = f'{marker}='.encode()
    while time.monotonic() < deadline:
        # drain any complete lines already buffered
        while b'\n' in remainder:
            line, remainder = remainder.split(b'\n', 1)
            if line.startswith(prefix):
                _buf['remainder'] = remainder
                return line[len(prefix):].decode().strip()
        ready, _, _ = select.select([proc.stdout], [], [], 0.2)
        if not ready:
            continue
        chunk: bytes = os.read(proc.stdout.fileno(), 4096)
        if not chunk:
            break
        remainder += chunk
    _buf['remainder'] = remainder
    raise TimeoutError(
        f'Never observed marker {marker!r} on harness stdout '
        f'within {timeout}s'
    )
 def _process_alive(pid: int) -> bool:
    '''Liveness probe for a pid we do NOT parent (post-orphan).'''
    try:
        os.kill(pid, 0)
        return True
    except ProcessLookupError:
        return False
 # Flakey under session-level env pollution (leftover
 # subactor PIDs from earlier tests competing for ports /
 # inheriting the harness subprocess's FDs). Passes
 # cleanly in isolation, fails in suite; `strict=False`
 # so either outcome is tolerated until the env isolation
 # is improved. Tracker:
 # `ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md`.
@pytest.mark.xfail(
    strict=False,
    reason=(
        'Env-pollution sensitive. Passes in isolation, '
        'flakey in full-suite runs; orphan subactor may '
        'take longer than 10s to exit when competing for '
        'resources with leftover state from earlier tests.'
    ),
 )
@pytest.mark.timeout(
    30,
    method='thread',
 )
 def test_orphaned_subactor_sigint_cleanup_DRAFT(
    reg_addr: tuple[str, int | str],
    tmp_path: Path,
 ) -> None:
    '''
    DRAFT — orphaned-subactor SIGINT survivability under the
    `subint_forkserver` backend.
    Sequence:
      1. Spawn a harness subprocess that brings up a root
         actor + one `sleep_forever` subactor via
         `subint_forkserver`.
      2. Read the harness's stdout for `PARENT_READY=<pid>`
         and `CHILD_PID=<pid>` markers (confirms the
         parent→child IPC handshake completed).
      3. `SIGKILL` the parent (no IPC cancel possible — the
         whole point of this test).
      4. `SIGINT` the orphan child.
      5. Poll `os.kill(child_pid, 0)` for up to 10s — assert
         the child exits.
    Empirical result (2026-04, py3.14): currently **FAILS** —
    SIGINT on the orphan child doesn't unwind the trio loop,
    despite trio's `KIManager` handler being correctly
    installed in the subactor (the post-fork thread IS
    `threading.main_thread()` on py3.14). `faulthandler` dump
    shows the subactor wedged in `trio/_core/_io_epoll.py::
    get_events` — the signal's supposed wakeup of the event
    loop isn't firing. Full analysis + diagnostic evidence
    in `ai/conc-anal/
    subint_forkserver_orphan_sigint_hang_issue.md`.
    The runtime's *intentional* "KBI-as-OS-cancel" path at
    `tractor/spawn/_entry.py::_trio_main:164` is therefore
    unreachable under this backend+config. Closing the gap is
    aligned with existing design intent (make the already-
    designed behavior actually fire), not a new feature.
    Marked `xfail(strict=True)` so the
    mark flips to XPASS→fail once the gap is closed and we'll
    know to drop the mark.
    '''
    if platform.system() != 'Linux':
        pytest.skip(
            'orphan-reparenting semantics only exercised on Linux'
        )
    script_path = tmp_path / '_orphan_harness.py'
    script_path.write_text(_ORPHAN_HARNESS_SCRIPT)
    # Offset the port so we don't race the session reg_addr with
    # any concurrently-running backend test's listener.
    host: str = reg_addr[0]
    port: int = int(reg_addr[1]) + 17
    proc: subprocess.Popen = subprocess.Popen(
        [
            sys.executable,
            str(script_path),
            'subint_forkserver',
            host,
            str(port),
        ],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
    )
    parent_pid: int | None = None
    child_pid: int | None = None
    buf: dict = {}
    try:
        child_pid = int(_read_marker(proc, 'CHILD_PID', 15.0, buf))
        parent_pid = int(_read_marker(proc, 'PARENT_READY', 15.0, buf))
        # sanity: both alive before we start killing stuff
        assert _process_alive(parent_pid), (
            f'harness parent pid={parent_pid} gone before '
            f'SIGKILL — test premise broken'
        )
        assert _process_alive(child_pid), (
            f'orphan-candidate child pid={child_pid} gone '
            f'before test started'
        )
        # step 3: kill parent — no IPC cancel arrives at child.
        # `proc.wait()` reaps the zombie so it truly disappears
        # from the process table (otherwise `os.kill(pid, 0)`
        # keeps reporting it as alive).
        os.kill(parent_pid, signal.SIGKILL)
        try:
            proc.wait(timeout=3.0)
        except subprocess.TimeoutExpired:
            pytest.fail(
                f'harness parent pid={parent_pid} did not die '
                f'after SIGKILL — test premise broken'
            )
        assert _process_alive(child_pid), (
            f'child pid={child_pid} died along with parent — '
            f'did the parent reap it before SIGKILL took? '
            f'test premise requires an orphan.'
        )
        # step 4+5: SIGINT the orphan, poll for exit.
        os.kill(child_pid, signal.SIGINT)
        timeout: float = 6.0
        cleanup_deadline: float = time.monotonic() + timeout
        while time.monotonic() < cleanup_deadline:
            if not _process_alive(child_pid):
                return  # <- success path
            time.sleep(0.1)
        pytest.fail(
            f'Orphan subactor (pid={child_pid}) did NOT exit '
            f'within 10s of SIGINT under `subint_forkserver` '
            f'→ trio on non-main thread did not observe the '
            f'default CPython KeyboardInterrupt; backend needs '
            f'explicit SIGINT plumbing.'
        )
    finally:
        # best-effort cleanup to avoid leaking orphans across
        # the test session regardless of outcome.
        for pid in (parent_pid, child_pid):
            if pid is None:
                continue
            try:
                os.kill(pid, signal.SIGKILL)
            except ProcessLookupError:
                pass
        try:
            proc.kill()
        except OSError:
            pass
        try:
            proc.wait(timeout=2.0)
        except subprocess.TimeoutExpired:
            pass
--- a/tests/test_2way.py
+++ b/tests/test_2way.py
@ -1,12 +1,7 @@
-'''
+"""
-Audit the simplest inter-actor bidirectional (streaming)
+Bidirectional streaming.
 msg patterns.
-'''
+"""
 from __future__ import annotations
 from typing import (
    Callable,
 )
 import pytest
 import trio
 import tractor
@ -14,8 +9,10 @@ import tractor
@tractor.context
 async def simple_rpc(
    ctx: tractor.Context,
    data: int,
 ) -> None:
    '''
    Test a small ping-pong server.
@ -42,13 +39,15 @@ async def simple_rpc(
@tractor.context
 async def simple_rpc_with_forloop(
    ctx: tractor.Context,
    data: int,
 ) -> None:
    '''
    Same as previous test but using `async for` syntax/api.
-    '''
+) -> None:
    """Same as previous test but using ``async for`` syntax/api.
    """
    # signal to parent that we're up
    await ctx.started(data + 1)
@ -69,78 +68,62 @@ async def simple_rpc_with_forloop(
@pytest.mark.parametrize(
    'use_async_for',
-    [
+    [True, False],
        True,
        False,
    ],
    ids='use_async_for={}'.format,
 )
@pytest.mark.parametrize(
    'server_func',
-    [
+    [simple_rpc, simple_rpc_with_forloop],
        simple_rpc,
        simple_rpc_with_forloop,
    ],
    ids='server_func={}'.format,
 )
-def test_simple_rpc(
+def test_simple_rpc(server_func, use_async_for):
    server_func: Callable,
    use_async_for: bool,
    loglevel: str,
    debug_mode: bool,
 ):
    '''
    The simplest request response pattern.
    '''
    async def main():
-        with trio.fail_after(6):
+        async with tractor.open_nursery() as n:
            async with tractor.open_nursery(
                loglevel=loglevel,
                debug_mode=debug_mode,
            ) as an:
                portal: tractor.Portal = await an.start_actor(
                    'rpc_server',
                    enable_modules=[__name__],
                )
-                async with portal.open_context(
+            portal = await n.start_actor(
-                    server_func,  # taken from pytest parameterization
+                'rpc_server',
-                    data=10,
+                enable_modules=[__name__],
-                ) as (ctx, sent):
+            )
-                    assert sent == 11
+            async with portal.open_context(
                server_func,  # taken from pytest parameterization
                data=10,
            ) as (ctx, sent):
-                    async with ctx.open_stream() as stream:
+                assert sent == 11
-                        if use_async_for:
+                async with ctx.open_stream() as stream:
-                            count = 0
+                    if use_async_for:
-                            # receive msgs using async for style
+
                        count = 0
                        # receive msgs using async for style
                        print('ping')
                        await stream.send('ping')
                        async for msg in stream:
                            assert msg == 'pong'
                            print('ping')
                            await stream.send('ping')
                            count += 1
-                            async for msg in stream:
+                            if count >= 9:
-                                assert msg == 'pong'
+                                break
                                print('ping')
                                await stream.send('ping')
                                count += 1
-                                if count >= 9:
+                    else:
-                                    break
+                        # classic send/receive style
                        for _ in range(10):
-                        else:
+                            print('ping')
-                            # classic send/receive style
+                            await stream.send('ping')
-                            for _ in range(10):
+                            assert await stream.receive() == 'pong'
-                                print('ping')
+                # stream should terminate here
                                await stream.send('ping')
                                assert await stream.receive() == 'pong'
-                    # stream should terminate here
+            # final context result(s) should be consumed here in __aexit__()
-                # final context result(s) should be consumed here in __aexit__()
+            await portal.cancel_actor()
                await portal.cancel_actor()
    trio.run(main)
--- a/tests/test_advanced_faults.py
+++ b/tests/test_advanced_faults.py
@ -98,8 +98,7 @@ def test_ipc_channel_break_during_stream(
        expect_final_exc = TransportClosed
    mod: ModuleType = import_path(
-        examples_dir()
+        examples_dir() / 'advanced_faults'
        / 'advanced_faults'
        / 'ipc_failure_during_stream.py',
        root=examples_dir(),
        consider_namespace_packages=False,
@ -114,9 +113,8 @@ def test_ipc_channel_break_during_stream(
    if (
        # only expect EoC if trans is broken on the child side,
        ipc_break['break_child_ipc_after'] is not False
        and
        # AND we tell the child to call `MsgStream.aclose()`.
-        pre_aclose_msgstream
+        and pre_aclose_msgstream
    ):
        # expect_final_exc = trio.EndOfChannel
        # ^XXX NOPE! XXX^ since now `.open_stream()` absorbs this
@ -146,6 +144,9 @@ def test_ipc_channel_break_during_stream(
        # a user sending ctl-c by raising a KBI.
        if pre_aclose_msgstream:
            expect_final_exc = KeyboardInterrupt
            if tpt_proto == 'uds':
                expect_final_exc = TransportClosed
                expect_final_cause = trio.BrokenResourceError
            # XXX OLD XXX
            # if child calls `MsgStream.aclose()` then expect EoC.
@ -159,13 +160,16 @@ def test_ipc_channel_break_during_stream(
        ipc_break['break_child_ipc_after'] is not False
        and (
            ipc_break['break_parent_ipc_after']
-            >
+            > ipc_break['break_child_ipc_after']
            ipc_break['break_child_ipc_after']
        )
    ):
        if pre_aclose_msgstream:
            expect_final_exc = KeyboardInterrupt
            if tpt_proto == 'uds':
                expect_final_exc = TransportClosed
                expect_final_cause = trio.BrokenResourceError
    # NOTE when the parent IPC side dies (even if the child does as well
    # but the child fails BEFORE the parent) we always expect the
    # IPC layer to raise a closed-resource, NEVER do we expect
@ -244,15 +248,8 @@ def test_ipc_channel_break_during_stream(
    # get raw instance from pytest wrapper
    value = excinfo.value
    if isinstance(value, ExceptionGroup):
-        excs: tuple[Exception] = value.exceptions
+        excs = value.exceptions
-        assert (
+        assert len(excs) == 1
            len(excs) <= 2
            and
            all(
                isinstance(exc, TransportClosed)
                for exc in excs
            )
        )
        final_exc = excs[0]
        assert isinstance(final_exc, expect_final_exc)
--- a/tests/test_cancellation.py
+++ b/tests/test_cancellation.py
@ -17,18 +17,8 @@ from tractor._testing import (
 from .conftest import no_windows
-_non_linux: bool = platform.system() != 'Linux'
+def is_win():
-_friggin_windows: bool = platform.system() == 'Windows'
+    return platform.system() == 'Windows'
 pytestmark = pytest.mark.skipon_spawn_backend(
    'subint',
    reason=(
        'XXX SUBINT HANGING TEST XXX\n'
        'See oustanding issue(s)\n'
        # TODO, put issue link!
    )
 )
 async def assert_err(delay=0):
@ -120,17 +110,8 @@ def test_remote_error(reg_addr, args_err):
            assert exc.boxed_type == errtype
 # @pytest.mark.skipon_spawn_backend(
 #     'subint',
 #     reason=(
 #         'XXX SUBINT HANGING TEST XXX\n'
 #         'See oustanding issue(s)\n'
 #         # TODO, put issue link!
 #     )
 # )
 def test_multierror(
    reg_addr: tuple[str, int],
    start_method: str,
 ):
    '''
    Verify we raise a ``BaseExceptionGroup`` out of a nursery where
@ -160,28 +141,15 @@ def test_multierror(
        trio.run(main)
@pytest.mark.parametrize('delay', (0, 0.5))
@pytest.mark.parametrize(
-    'delay',
+    'num_subactors', range(25, 26),
    (0, 0.5),
    ids='delays={}'.format,
 )
-@pytest.mark.parametrize(
+def test_multierror_fast_nursery(reg_addr, start_method, num_subactors, delay):
-    'num_subactors',
+    """Verify we raise a ``BaseExceptionGroup`` out of a nursery where
    range(25, 26),
    ids= 'num_subs={}'.format,
 )
 def test_multierror_fast_nursery(
    reg_addr: tuple,
    start_method: str,
    num_subactors: int,
    delay: float,
 ):
    '''
    Verify we raise a ``BaseExceptionGroup`` out of a nursery where
    more then one actor errors and also with a delay before failure
    to test failure during an ongoing spawning.
-
+    """
    '''
    async def main():
        async with tractor.open_nursery(
            registry_addrs=[reg_addr],
@ -221,15 +189,8 @@ async def do_nothing():
    pass
-@pytest.mark.parametrize(
+@pytest.mark.parametrize('mechanism', ['nursery_cancel', KeyboardInterrupt])
-    'mechanism', [
+def test_cancel_single_subactor(reg_addr, mechanism):
    'nursery_cancel',
    KeyboardInterrupt,
 ])
 def test_cancel_single_subactor(
    reg_addr: tuple,
    mechanism: str|KeyboardInterrupt,
 ):
    '''
    Ensure a ``ActorNursery.start_actor()`` spawned subactor
    cancels when the nursery is cancelled.
@ -271,13 +232,9 @@ async def stream_forever():
        await trio.sleep(0.01)
-@tractor_test(
+@tractor_test
-    timeout=6,
+async def test_cancel_infinite_streamer(start_method):
-)
+
 async def test_cancel_infinite_streamer(
    reg_addr: tuple,
    start_method: str,
 ):
    # stream for at most 1 seconds
    with (
        trio.fail_after(4),
@ -300,14 +257,6 @@ async def test_cancel_infinite_streamer(
    assert n.cancelled
 # @pytest.mark.skipon_spawn_backend(
 #     'subint',
 #     reason=(
 #         'XXX SUBINT HANGING TEST XXX\n'
 #         'See oustanding issue(s)\n'
 #         # TODO, put issue link!
 #     )
 # )
@pytest.mark.parametrize(
    'num_actors_and_errs',
    [
@ -337,12 +286,9 @@ async def test_cancel_infinite_streamer(
        'no_daemon_actors_fail_all_run_in_actors_sleep_then_fail',
    ],
 )
-@tractor_test(
+@tractor_test
    timeout=10,
 )
 async def test_some_cancels_all(
    num_actors_and_errs: tuple,
    reg_addr: tuple,
    start_method: str,
    loglevel: str,
 ):
@ -424,10 +370,7 @@ async def test_some_cancels_all(
        pytest.fail("Should have gotten a remote assertion error?")
-async def spawn_and_error(
+async def spawn_and_error(breadth, depth) -> None:
    breadth: int,
    depth: int,
 ) -> None:
    name = tractor.current_actor().name
    async with tractor.open_nursery() as nursery:
        for i in range(breadth):
@ -452,18 +395,8 @@ async def spawn_and_error(
            await nursery.run_in_actor(*args, **kwargs)
 # NOTE: subint_forkserver skip handled by file-level `pytestmark`
 # above (same pytest-capture-fd hang class as siblings).
@pytest.mark.timeout(
    10,
    method='thread',
 )
@tractor_test
-async def test_nested_multierrors(
+async def test_nested_multierrors(loglevel, start_method):
    reg_addr: tuple,
    loglevel: str,
    start_method: str,
 ):
    '''
    Test that failed actor sets are wrapped in `BaseExceptionGroup`s. This
    test goes only 2 nurseries deep but we should eventually have tests
@ -498,7 +431,7 @@ async def test_nested_multierrors(
            for subexc in err.exceptions:
                # verify first level actor errors are wrapped as remote
-                if _friggin_windows:
+                if is_win():
                    # windows is often too slow and cancellation seems
                    # to happen before an actor is spawned
@ -531,7 +464,7 @@ async def test_nested_multierrors(
                    # XXX not sure what's up with this..
                    # on windows sometimes spawning is just too slow and
                    # we get back the (sent) cancel signal instead
-                    if _friggin_windows:
+                    if is_win():
                        if isinstance(subexc, tractor.RemoteActorError):
                            assert subexc.boxed_type in (
                                BaseExceptionGroup,
@ -550,24 +483,20 @@ async def test_nested_multierrors(
@no_windows
 def test_cancel_via_SIGINT(
-    reg_addr: tuple,
+    loglevel,
-    loglevel: str,
+    start_method,
-    start_method: str,
+    spawn_backend,
 ):
-    '''
+    """Ensure that a control-C (SIGINT) signal cancels both the parent and
    Ensure that a control-C (SIGINT) signal cancels both the parent and
    child processes in trionic fashion
-
+    """
-    '''
+    pid = os.getpid()
    pid: int = os.getpid()
    async def main():
        with trio.fail_after(2):
-            async with tractor.open_nursery(
+            async with tractor.open_nursery() as tn:
                registry_addrs=[reg_addr],
            ) as tn:
                await tn.start_actor('sucka')
-                if 'mp' in start_method:
+                if 'mp' in spawn_backend:
                    time.sleep(0.1)
                os.kill(pid, signal.SIGINT)
                await trio.sleep_forever()
@ -578,38 +507,23 @@ def test_cancel_via_SIGINT(
@no_windows
 def test_cancel_via_SIGINT_other_task(
-    reg_addr: tuple,
+    loglevel,
-    loglevel: str,
+    start_method,
-    start_method: str,
+    spawn_backend,
    spawn_backend: str,
 ):
-    '''
+    """Ensure that a control-C (SIGINT) signal cancels both the parent
-    Ensure that a control-C (SIGINT) signal cancels both the parent
+    and child processes in trionic fashion even a subprocess is started
-    and child processes in trionic fashion even a subprocess is
+    from a seperate ``trio`` child  task.
-    started from a seperate ``trio`` child  task.
+    """
-
+    pid = os.getpid()
-    '''
+    timeout: float = 2
-    from .conftest import cpu_scaling_factor
+    if is_win():  # smh
    pid: int = os.getpid()
    timeout: float = (
        4 if _non_linux
        else 2
    )
    if _friggin_windows:  # smh
        timeout += 1
    # add latency headroom for CPU freq scaling (auto-cpufreq et al.)
    headroom: float = cpu_scaling_factor()
    if headroom != 1.:
        timeout *= headroom
    async def spawn_and_sleep_forever(
        task_status=trio.TASK_STATUS_IGNORED
    ):
-        async with tractor.open_nursery(
+        async with tractor.open_nursery() as tn:
            registry_addrs=[reg_addr],
        ) as tn:
            for i in range(3):
                await tn.run_in_actor(
                    sleep_forever,
@ -654,14 +568,6 @@ async def spawn_sub_with_sync_blocking_task():
        print('exiting first subactor layer..\n')
 # @pytest.mark.skipon_spawn_backend(
 #     'subint',
 #     reason=(
 #         'XXX SUBINT HANGING TEST XXX\n'
 #         'See oustanding issue(s)\n'
 #         # TODO, put issue link!
 #     )
 # )
@pytest.mark.parametrize(
    'man_cancel_outer',
    [
@ -738,11 +644,7 @@ def test_cancel_while_childs_child_in_sync_sleep(
        #
        # delay = 1  # no AssertionError in eg, TooSlowError raised.
        # delay = 2  # is AssertionError in eg AND no TooSlowError !?
-        # is AssertionError in eg AND no _cs cancellation.
+        delay = 4  # is AssertionError in eg AND no _cs cancellation.
        delay = (
            6 if _non_linux
            else 4 
        )
        with trio.fail_after(delay) as _cs:
        # with trio.CancelScope() as cs:
@ -776,7 +678,7 @@ def test_cancel_while_childs_child_in_sync_sleep(
 def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
-    start_method: str,
+    start_method,
 ):
    '''
    This is a very subtle test which demonstrates how cancellation
@ -794,7 +696,7 @@ def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
    kbi_delay = 0.5
    timeout: float = 2.9
-    if _friggin_windows:  # smh
+    if is_win():  # smh
        timeout += 1
    async def main():
--- a/tests/test_child_manages_service_nursery.py
+++ b/tests/test_child_manages_service_nursery.py
@ -18,15 +18,16 @@ from tractor import RemoteActorError
 async def aio_streamer(
-    chan: tractor.to_asyncio.LinkedTaskChannel,
+    from_trio: asyncio.Queue,
    to_trio: trio.abc.SendChannel,
 ) -> trio.abc.ReceiveChannel:
    # required first msg to sync caller
-    chan.started_nowait(None)
+    to_trio.send_nowait(None)
    from itertools import cycle
    for i in cycle(range(10)):
-        chan.send_nowait(i)
+        to_trio.send_nowait(i)
        await asyncio.sleep(0.01)
@ -68,7 +69,7 @@ async def wrapper_mngr(
        else:
            async with tractor.to_asyncio.open_channel_from(
                aio_streamer,
-            ) as (from_aio, first):
+            ) as (first, from_aio):
                assert not first
                # cache it so next task uses broadcast receiver
--- a/tests/test_clustering.py
+++ b/tests/test_clustering.py
@ -10,19 +10,7 @@ from tractor._testing import tractor_test
 MESSAGE = 'tractoring at full speed'
-def test_empty_mngrs_input_raises(
+def test_empty_mngrs_input_raises() -> None:
    tpt_proto: str,
 ) -> None:
    # TODO, the `open_actor_cluster()` teardown hangs
    # intermittently on UDS when `gather_contexts(mngrs=())`
    # raises `ValueError` mid-setup; likely a race in the
    # actor-nursery cleanup vs UDS socket shutdown. Needs
    # a deeper look at `._clustering`/`._supervise` teardown
    # paths with the UDS transport.
    if tpt_proto == 'uds':
        pytest.skip(
            'actor-cluster teardown hangs intermittently on UDS'
        )
    async def main():
        with trio.fail_after(3):
@ -68,39 +56,25 @@ async def worker(
            print(msg)
            assert msg == MESSAGE
-        # ?TODO, does this ever cause a hang?
+        # TODO: does this ever cause a hang
        # assert 0
 # ?TODO, but needs a fn-scoped tpt_proto fixture..
 # @pytest.mark.no_tpt('uds')
@tractor_test
-async def test_streaming_to_actor_cluster(
+async def test_streaming_to_actor_cluster() -> None:
    tpt_proto: str,
 ):
    '''
    Open an actor "cluster" using the (experimental) `._clustering`
    API and conduct standard inter-task-ctx streaming.
-    '''
+    async with (
-    if tpt_proto == 'uds':
+        open_actor_cluster(modules=[__name__]) as portals,
        pytest.skip(
            f'Test currently fails with tpt-proto={tpt_proto!r}\n'
        )
-    with trio.fail_after(6):
+        gather_contexts(
-        async with (
+            mngrs=[p.open_context(worker) for p in portals.values()],
-            open_actor_cluster(modules=[__name__]) as portals,
+        ) as contexts,
-            gather_contexts(
+        gather_contexts(
-                mngrs=[p.open_context(worker) for p in portals.values()],
+            mngrs=[ctx[0].open_stream() for ctx in contexts],
-            ) as contexts,
+        ) as streams,
-            gather_contexts(
+    ):
-                mngrs=[ctx[0].open_stream() for ctx in contexts],
+        with trio.move_on_after(1):
-            ) as streams,
+            for stream in itertools.cycle(streams):
-
+                await stream.send(MESSAGE)
        ):
            with trio.move_on_after(1):
                for stream in itertools.cycle(streams):
                    await stream.send(MESSAGE)
--- a/tests/test_context_stream_semantics.py
+++ b/tests/test_context_stream_semantics.py
@ -9,7 +9,6 @@ from itertools import count
 import math
 import platform
 from pprint import pformat
 import sys
 from typing import (
    Callable,
 )
@ -26,7 +25,7 @@ from tractor._exceptions import (
    StreamOverrun,
    ContextCancelled,
 )
-from tractor.runtime._state import current_ipc_ctx
+from tractor._state import current_ipc_ctx
 from tractor._testing import (
    tractor_test,
@ -939,14 +938,9 @@ def test_one_end_stream_not_opened(
    '''
    overrunner, buf_size_increase, entrypoint = overrun_by
-    from tractor.runtime._runtime import Actor
+    from tractor._runtime import Actor
    buf_size = buf_size_increase + Actor.msg_buffer_size
    timeout: float = (
        1 if sys.platform == 'linux'
        else 3
    )
    async def main():
        async with tractor.open_nursery(
            debug_mode=debug_mode,
@ -956,7 +950,7 @@ def test_one_end_stream_not_opened(
                enable_modules=[__name__],
            )
-            with trio.fail_after(timeout):
+            with trio.fail_after(1):
                async with portal.open_context(
                    entrypoint,
                ) as (ctx, sent):
--- a/tests/discovery/test_registrar.py
+++ b/tests/discovery/test_registrar.py
@ -1,33 +1,25 @@
-'''
+"""
-Discovery subsystem via a "registrar" actor scenarios.
+Actor "discovery" testing
-
+"""
 '''
 import os
 import signal
 import platform
 from functools import partial
 import itertools
 import time
 from typing import Callable
 import psutil
 import pytest
 import subprocess
 import tractor
 from tractor.devx import dump_on_hang
 from tractor.trionics import collapse_eg
 from tractor._testing import tractor_test
 from tractor.discovery._addr import wrap_address
 from tractor.discovery._multiaddr import mk_maddr
 import trio
@tractor_test
-async def test_reg_then_unreg(
+async def test_reg_then_unreg(reg_addr):
    reg_addr: tuple,
 ):
    actor = tractor.current_actor()
-    assert actor.is_registrar
+    assert actor.is_arbiter
    assert len(actor._registry) == 1  # only self is registered
    async with tractor.open_nursery(
@ -35,10 +27,10 @@ async def test_reg_then_unreg(
    ) as n:
        portal = await n.start_actor('actor', enable_modules=[__name__])
-        uid = portal.channel.aid.uid
+        uid = portal.channel.uid
        async with tractor.get_registry(reg_addr) as aportal:
-            # this local actor should be the registrar
+            # this local actor should be the arbiter
            assert actor is aportal.actor
            async with tractor.wait_for_actor('actor'):
@ -56,49 +48,6 @@ async def test_reg_then_unreg(
        assert not sockaddrs
@tractor_test
 async def test_reg_then_unreg_maddr(
    reg_addr: tuple,
 ):
    '''
    Same as `test_reg_then_unreg` but pass the registry
    address as a multiaddr string to verify `wrap_address()`
    multiaddr parsing end-to-end through the runtime.
    '''
    # tuple -> Address -> multiaddr string
    addr_obj = wrap_address(reg_addr)
    maddr_str: str = str(mk_maddr(addr_obj))
    actor = tractor.current_actor()
    assert actor.is_registrar
    async with tractor.open_nursery(
        registry_addrs=[maddr_str],
    ) as n:
        portal = await n.start_actor(
            'actor_maddr',
            enable_modules=[__name__],
        )
        uid = portal.channel.aid.uid
        async with tractor.get_registry(maddr_str) as aportal:
            assert actor is aportal.actor
            async with tractor.wait_for_actor('actor_maddr'):
                assert uid in aportal.actor._registry
                sockaddrs = actor._registry[uid]
                assert sockaddrs
        await n.cancel()
        await trio.sleep(0.1)
        assert uid not in aportal.actor._registry
        sockaddrs = actor._registry.get(uid)
        assert not sockaddrs
 the_line = 'Hi my name is {}'
@ -132,20 +81,12 @@ async def say_hello_use_wait(
        return result
@pytest.mark.timeout(
    7,
    method='thread',
 )
@tractor_test
-@pytest.mark.parametrize(
+@pytest.mark.parametrize('func', [say_hello, say_hello_use_wait])
    'func',
    [say_hello,
     say_hello_use_wait]
 )
 async def test_trynamic_trio(
-    func: Callable,
+    func,
-    start_method: str,
+    start_method,
-    reg_addr: tuple,
+    reg_addr,
 ):
    '''
    Root actor acting as the "director" and running one-shot-task-actors
@ -178,10 +119,7 @@ async def stream_forever():
        await trio.sleep(0.01)
-async def cancel(
+async def cancel(use_signal, delay=0):
    use_signal: bool,
    delay: float = 0,
 ):
    # hold on there sally
    await trio.sleep(delay)
@ -194,17 +132,15 @@ async def cancel(
        raise KeyboardInterrupt
-async def stream_from(portal: tractor.Portal):
+async def stream_from(portal):
    async with portal.open_stream_from(stream_forever) as stream:
        async for value in stream:
            print(value)
-async def unpack_reg(
+async def unpack_reg(actor_or_portal):
    actor_or_portal: tractor.Portal|tractor.Actor,
 ):
    '''
-    Get and unpack a "registry" RPC request from the registrar
+    Get and unpack a "registry" RPC request from the "arbiter" registry
    system.
    '''
@ -213,10 +149,7 @@ async def unpack_reg(
    else:
        msg = await actor_or_portal.run_from_ns('self', 'get_registry')
-    return {
+    return {tuple(key.split('.')): val for key, val in msg.items()}
        tuple(key.split('.')): val
        for key, val in msg.items()
    }
 async def spawn_and_check_registry(
@ -240,26 +173,24 @@ async def spawn_and_check_registry(
        registry_addrs=[reg_addr],
        debug_mode=debug_mode,
    ):
-        async with tractor.get_registry(
+        async with tractor.get_registry(reg_addr) as portal:
            addr=reg_addr,
        ) as portal:
            # runtime needs to be up to call this
            actor = tractor.current_actor()
            if remote_arbiter:
-                assert not actor.is_registrar
+                assert not actor.is_arbiter
-            if actor.is_registrar:
+            if actor.is_arbiter:
-                extra = 1  # registrar is local root actor
+                extra = 1  # arbiter is local root actor
                get_reg = partial(unpack_reg, actor)
            else:
                get_reg = partial(unpack_reg, portal)
-                extra = 2  # local root actor + remote registrar
+                extra = 2  # local root actor + remote arbiter
            # ensure current actor is registered
            registry: dict = await get_reg()
-            assert actor.aid.uid in registry
+            assert actor.uid in registry
            try:
                async with tractor.open_nursery() as an:
@ -307,35 +238,22 @@ async def spawn_and_check_registry(
                # all subactors should have de-registered
                registry = await get_reg()
-                start: float = time.time()
+                assert len(registry) == extra
-                while (
+                assert actor.uid in registry
                    not (len(registry) == extra)
                    and
                    (time.time() - start) < 5
                ):
                    print(
                        f'Waiting for remaining subs to dereg..\n'
                        f'{registry!r}\n'
                    )
                    await trio.sleep(0.3)
                else:
                    assert len(registry) == extra
                assert actor.aid.uid in registry
@pytest.mark.parametrize('use_signal', [False, True])
@pytest.mark.parametrize('with_streaming', [False, True])
 def test_subactors_unregister_on_cancel(
    debug_mode: bool,
-    start_method: str,
+    start_method,
-    use_signal: bool,
+    use_signal,
-    reg_addr: tuple,
+    reg_addr,
-    with_streaming: bool,
+    with_streaming,
 ):
    '''
    Verify that cancelling a nursery results in all subactors
-    deregistering themselves with the registrar.
+    deregistering themselves with the arbiter.
    '''
    with pytest.raises(KeyboardInterrupt):
@ -356,17 +274,15 @@ def test_subactors_unregister_on_cancel(
 def test_subactors_unregister_on_cancel_remote_daemon(
    daemon: subprocess.Popen,
    debug_mode: bool,
-    start_method: str,
+    start_method,
-    use_signal: bool,
+    use_signal,
-    reg_addr: tuple,
+    reg_addr,
-    with_streaming: bool,
+    with_streaming,
 ):
-    '''
+    """Verify that cancelling a nursery results in all subactors
-    Verify that cancelling a nursery results in all subactors
+    deregistering themselves with a **remote** (not in the local process
-    deregistering themselves with a **remote** (not in the local
+    tree) arbiter.
-    process tree) registrar.
+    """
    '''
    with pytest.raises(KeyboardInterrupt):
        trio.run(
            partial(
@ -409,24 +325,20 @@ async def close_chans_before_nursery(
            try:
                get_reg = partial(unpack_reg, aportal)
-                async with tractor.open_nursery() as an:
+                async with tractor.open_nursery() as tn:
-                    portal1 = await an.start_actor(
+                    portal1 = await tn.start_actor(
-                        name='consumer1',
+                        name='consumer1', enable_modules=[__name__])
-                        enable_modules=[__name__],
+                    portal2 = await tn.start_actor(
-                    )
+                        'consumer2', enable_modules=[__name__])
                    portal2 = await an.start_actor(
                        'consumer2',
                        enable_modules=[__name__],
                    )
-                    async with (
+                    # TODO: compact this back as was in last commit once
-                        portal1.open_stream_from(
+                    # 3.9+, see https://github.com/goodboy/tractor/issues/207
                    async with portal1.open_stream_from(
                        stream_forever
                    ) as agen1:
                        async with portal2.open_stream_from(
                            stream_forever
-                        ) as agen1,
+                        ) as agen2:
                        portal2.open_stream_from(
                            stream_forever
                        ) as agen2,
                    ):
                            async with (
                                collapse_eg(),
                                trio.open_nursery() as tn,
@ -437,7 +349,7 @@ async def close_chans_before_nursery(
                                    await streamer(agen2)
                                finally:
                                    # Kill the root nursery thus resulting in
-                                    # normal registrar channel ops to fail during
+                                    # normal arbiter channel ops to fail during
                                    # teardown. It doesn't seem like this is
                                    # reliably triggered by an external SIGINT.
                                    # tractor.current_actor()._root_nursery.cancel_scope.cancel()
@ -449,30 +361,27 @@ async def close_chans_before_nursery(
                                    # also kill off channels cuz why not
                                    await agen1.aclose()
                                    await agen2.aclose()
            finally:
                with trio.CancelScope(shield=True):
                    await trio.sleep(1)
                    # all subactors should have de-registered
                    registry = await get_reg()
-                    assert portal1.channel.aid.uid not in registry
+                    assert portal1.channel.uid not in registry
-                    assert portal2.channel.aid.uid not in registry
+                    assert portal2.channel.uid not in registry
                    assert len(registry) == entries_at_end
@pytest.mark.parametrize('use_signal', [False, True])
 def test_close_channel_explicit(
-    start_method: str,
+    start_method,
-    use_signal: bool,
+    use_signal,
-    reg_addr: tuple,
+    reg_addr,
 ):
-    '''
+    """Verify that closing a stream explicitly and killing the actor's
    Verify that closing a stream explicitly and killing the actor's
    "root nursery" **before** the containing nursery tears down also
-    results in subactor(s) deregistering from the registrar.
+    results in subactor(s) deregistering from the arbiter.
-
+    """
    '''
    with pytest.raises(KeyboardInterrupt):
        trio.run(
            partial(
@ -485,18 +394,16 @@ def test_close_channel_explicit(
@pytest.mark.parametrize('use_signal', [False, True])
-def test_close_channel_explicit_remote_registrar(
+def test_close_channel_explicit_remote_arbiter(
    daemon: subprocess.Popen,
-    start_method: str,
+    start_method,
-    use_signal: bool,
+    use_signal,
-    reg_addr: tuple,
+    reg_addr,
 ):
-    '''
+    """Verify that closing a stream explicitly and killing the actor's
    Verify that closing a stream explicitly and killing the actor's
    "root nursery" **before** the containing nursery tears down also
-    results in subactor(s) deregistering from the registrar.
+    results in subactor(s) deregistering from the arbiter.
-
+    """
    '''
    with pytest.raises(KeyboardInterrupt):
        trio.run(
            partial(
@ -506,138 +413,3 @@ def test_close_channel_explicit_remote_registrar(
                remote_arbiter=True,
            ),
        )
@tractor.context
 async def kill_transport(
    ctx: tractor.Context,
 ) -> None:
    await ctx.started()
    actor: tractor.Actor = tractor.current_actor()
    actor.ipc_server.cancel()
    await trio.sleep_forever()
 # Wall-clock bound via `pytest-timeout` (`method='thread'`).
 # Under `--spawn-backend=subint` this test can wedge in an
 # un-Ctrl-C-able state (abandoned-subint + shared-GIL
 # starvation → signal-wakeup-fd pipe fills → SIGINT silently
 # dropped; see `ai/conc-anal/subint_sigint_starvation_issue.md`).
 # `method='thread'` is specifically required because `signal`-
 # method SIGALRM suffers the same GIL-starvation path and
 # wouldn't fire the Python-level handler.
 # At timeout the plugin hard-kills the pytest process — that's
 # the intended behavior here; the alternative is an unattended
 # suite run that never returns.
@pytest.mark.timeout(
    3,  # NOTE should be a 2.1s happy path.
    method='thread',
 )
@pytest.mark.skipon_spawn_backend(
    'subint',
    reason=(
        'XXX SUBINT HANGING TEST XXX\n'
        'See oustanding issue(s)\n'
        # TODO, put issue link!
    )
 )
 # @pytest.mark.parametrize('use_signal', [False, True])
 #
 def test_stale_entry_is_deleted(
    debug_mode: bool,
    daemon: subprocess.Popen,
    start_method: str,
    reg_addr: tuple,
 ):
    '''
    Ensure that when a stale entry is detected in the registrar's
    table that the `find_actor()` API takes care of deleting the
    stale entry and not delivering a bad portal.
    '''
    async def main():
        name: str = 'transport_fails_actor'
        _reg_ptl: tractor.Portal
        an: tractor.ActorNursery
        async with (
            tractor.open_nursery(
                debug_mode=debug_mode,
                registry_addrs=[reg_addr],
            ) as an,
            tractor.get_registry(reg_addr) as _reg_ptl,
        ):
            ptl: tractor.Portal = await an.start_actor(
                name,
                enable_modules=[__name__],
            )
            async with ptl.open_context(
                kill_transport,
            ) as (first, ctx):
                async with tractor.find_actor(
                    name,
                    registry_addrs=[reg_addr],
                ) as maybe_portal:
                    # because the transitive
                    # `._api.maybe_open_portal()` call should
                    # fail and implicitly call `.delete_addr()`
                    assert maybe_portal is None
                    registry: dict = await unpack_reg(_reg_ptl)
                    assert ptl.chan.aid.uid not in registry
                # should fail since we knocked out the IPC tpt XD
                await ptl.cancel_actor()
                await an.cancel()
    # TODO, remove once the `[subint]` variant no longer hangs.
    #
    # Status (as of Phase B hard-kill landing):
    #
    # - `[trio]`/`[mp_*]` variants: completes normally; `dump_on_hang`
    #   is a no-op safety net here.
    #
    # - `[subint]` variant: hangs indefinitely AND is un-Ctrl-C-able.
    #   `strace -p <pytest_pid>` while in the hang reveals a silently-
    #   dropped SIGINT — the C signal handler tries to write the
    #   signum byte to Python's signal-wakeup fd and gets `EAGAIN`,
    #   meaning the pipe is full (nobody's draining it).
    #
    #   Root-cause chain: our hard-kill in `spawn._subint` abandoned
    #   the driver OS-thread (which is `daemon=True`) after the soft-
    #   kill timeout, but the *sub-interpreter* inside that thread is
    #   still running `trio.run()` — `_interpreters.destroy()` can't
    #   force-stop a running subint (raises `InterpreterError`), and
    #   legacy-config subints share the main GIL. The abandoned subint
    #   starves the parent's trio event loop from iterating often
    #   enough to drain its wakeup pipe → SIGINT silently drops.
    #
    #   This is structurally a CPython-level limitation: there's no
    #   public force-destroy primitive for a running subint. We
    #   escape on the harness side via a SIGINT-loop in the `daemon`
    #   fixture teardown (killing the bg registrar subproc closes its
    #   end of the IPC, which eventually unblocks a recv in main trio,
    #   which lets the loop drain the wakeup pipe). Long-term fix path:
    #   msgspec PEP 684 support (jcrist/msgspec#563) → isolated-mode
    #   subints with per-interp GIL.
    #
    #   Full analysis:
    #   `ai/conc-anal/subint_sigint_starvation_issue.md`
    #
    #   See also the *sibling* hang class documented in
    #   `ai/conc-anal/subint_cancel_delivery_hang_issue.md` — same
    #   subint backend, different root cause (Ctrl-C-able hang, main
    #   trio loop iterating fine; ours to fix, not CPython's).
    #   Reproduced by `tests/test_subint_cancellation.py
    #   ::test_subint_non_checkpointing_child`.
    #
    # Kept here (and not behind a `pytestmark.skip`) so we can still
    # inspect the dump file if the hang ever returns after a refactor.
    # `pytest`'s stderr capture eats `faulthandler` output otherwise,
    # so we route `dump_on_hang` to a file.
    with dump_on_hang(
        seconds=20,
        path=f'/tmp/test_stale_entry_is_deleted_{start_method}.dump',
    ):
        trio.run(main)
--- a/tests/test_docs_examples.py
+++ b/tests/test_docs_examples.py
@ -9,17 +9,12 @@ import sys
 import subprocess
 import platform
 import shutil
 from typing import Callable
 import pytest
 import tractor
 from tractor._testing import (
    examples_dir,
 )
 _non_linux: bool = platform.system() != 'Linux'
 _friggin_macos: bool = platform.system() == 'Darwin'
@pytest.fixture
 def run_example_in_subproc(
@ -94,10 +89,8 @@ def run_example_in_subproc(
        for f in p[2]
        if (
-            '__' not in f  # ignore any pkg-mods
+            '__' not in f
-            # ignore any `__pycache__` subdir
+            and f[0] != '_'
            and '__pycache__' not in str(p[0])
            and f[0] != '_'  # ignore any WIP "examplel mods"
            and 'debugging' not in p[0]
            and 'integration' not in p[0]
            and 'advanced_faults' not in p[0]
@ -108,10 +101,8 @@ def run_example_in_subproc(
    ids=lambda t: t[1],
 )
 def test_example(
-    run_example_in_subproc: Callable,
+    run_example_in_subproc,
-    example_script: str,
+    example_script,
    test_log: tractor.log.StackLevelAdapter,
    ci_env: bool,
 ):
    '''
    Load and run scripts from this repo's ``examples/`` dir as a user
@ -125,39 +116,9 @@ def test_example(
    '''
    ex_file: str = os.path.join(*example_script)
-    if (
+    if 'rpc_bidir_streaming' in ex_file and sys.version_info < (3, 9):
        'rpc_bidir_streaming' in ex_file
        and
        sys.version_info < (3, 9)
    ):
        pytest.skip("2-way streaming example requires py3.9 async with syntax")
    if (
        'full_fledged_streaming_service' in ex_file
        and
        _friggin_macos
        and
        ci_env
    ):
        pytest.skip(
            'Streaming example is too flaky in CI\n'
            'AND their competitor runs this CI service..\n'
            'This test does run just fine "in person" however..'
        )
    from .conftest import cpu_scaling_factor
    timeout: float = (
        60
        if ci_env and _non_linux
        else 16
    )
    # add latency headroom for CPU freq scaling (auto-cpufreq et al.)
    headroom: float = cpu_scaling_factor()
    if headroom != 1.:
        timeout *= headroom
    with open(ex_file, 'r') as ex:
        code = ex.read()
@ -165,12 +126,9 @@ def test_example(
            err = None
            try:
                if not proc.poll():
-                    _, err = proc.communicate(timeout=timeout)
+                    _, err = proc.communicate(timeout=15)
            except subprocess.TimeoutExpired as e:
                test_log.exception(
                    f'Example failed to finish within {timeout}s ??\n'
                )
                proc.kill()
                err = e.stderr
--- a/tests/msg/test_ext_types_msgspec.py
+++ b/tests/msg/test_ext_types_msgspec.py
--- a/tests/test_infected_asyncio.py
+++ b/tests/test_infected_asyncio.py
@ -26,8 +26,8 @@ from tractor import (
    to_asyncio,
    RemoteActorError,
    ContextCancelled,
    _state,
 )
 from tractor.runtime import _state
 from tractor.trionics import BroadcastReceiver
 from tractor._testing import expect_ctxc
@ -47,11 +47,12 @@ async def sleep_and_err(
    # just signature placeholders for compat with
    # ``to_asyncio.open_channel_from()``
-    chan: to_asyncio.LinkedTaskChannel|None = None,
+    to_trio: trio.MemorySendChannel|None = None,
    from_trio: asyncio.Queue|None = None,
 ):
-    if chan:
+    if to_trio:
-        chan.started_nowait('start')
+        to_trio.send_nowait('start')
    await asyncio.sleep(sleep_for)
    assert 0
@ -183,7 +184,6 @@ def test_tractor_cancels_aio(
    async def main():
        async with tractor.open_nursery(
            debug_mode=debug_mode,
            registry_addrs=[reg_addr],
        ) as an:
            portal = await an.run_in_actor(
                asyncio_actor,
@ -206,11 +206,11 @@ def test_trio_cancels_aio(
    '''
    async def main():
-        # cancel the nursery shortly after boot
+
        with trio.move_on_after(1):
-            async with tractor.open_nursery(
+            # cancel the nursery shortly after boot
-                registry_addrs=[reg_addr],
+
-            ) as tn:
+            async with tractor.open_nursery() as tn:
                await tn.run_in_actor(
                    asyncio_actor,
                    target='aio_sleep_forever',
@ -238,7 +238,7 @@ async def trio_ctx(
                trio.open_nursery() as tn,
                tractor.to_asyncio.open_channel_from(
                    sleep_and_err,
-                ) as (chan, first),
+                ) as (first, chan),
            ):
                assert first == 'start'
@ -278,9 +278,7 @@ def test_context_spawns_aio_task_that_errors(
    '''
    async def main():
        with trio.fail_after(1 + delay):
-            async with tractor.open_nursery(
+            async with tractor.open_nursery() as an:
                registry_addrs=[reg_addr],
            ) as an:
                p = await an.start_actor(
                    'aio_daemon',
                    enable_modules=[__name__],
@ -363,9 +361,7 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(
    async def main():
        an: tractor.ActorNursery
-        async with tractor.open_nursery(
+        async with tractor.open_nursery() as an:
            registry_addrs=[reg_addr],
        ) as an:
            p: tractor.Portal = await an.run_in_actor(
                asyncio_actor,
                target='aio_cancel',
@ -403,7 +399,7 @@ async def no_to_trio_in_args():
 async def push_from_aio_task(
    sequence: Iterable,
-    chan: to_asyncio.LinkedTaskChannel,
+    to_trio: trio.abc.SendChannel,
    expect_cancel: False,
    fail_early: bool,
    exit_early: bool,
@ -411,12 +407,15 @@ async def push_from_aio_task(
 ) -> None:
    try:
        # print('trying breakpoint')
        # breakpoint()
        # sync caller ctx manager
-        chan.started_nowait(True)
+        to_trio.send_nowait(True)
        for i in sequence:
            print(f'asyncio sending {i}')
-            chan.send_nowait(i)
+            to_trio.send_nowait(i)
            await asyncio.sleep(0.001)
            if (
@ -479,7 +478,7 @@ async def stream_from_aio(
                trio_exit_early
            ))
-        ) as (chan, first):
+        ) as (first, chan):
            assert first is True
@ -574,9 +573,7 @@ def test_basic_interloop_channel_stream(
    async def main():
        # TODO, figure out min timeout here!
        with trio.fail_after(6):
-            async with tractor.open_nursery(
+            async with tractor.open_nursery() as an:
                registry_addrs=[reg_addr],
            ) as an:
                portal = await an.run_in_actor(
                    stream_from_aio,
                    infect_asyncio=True,
@ -589,13 +586,9 @@ def test_basic_interloop_channel_stream(
 # TODO: parametrize the above test and avoid the duplication here?
-def test_trio_error_cancels_intertask_chan(
+def test_trio_error_cancels_intertask_chan(reg_addr):
    reg_addr: tuple[str, int],
 ):
    async def main():
-        async with tractor.open_nursery(
+        async with tractor.open_nursery() as an:
            registry_addrs=[reg_addr],
        ) as an:
            portal = await an.run_in_actor(
                stream_from_aio,
                trio_raise_err=True,
@ -630,7 +623,6 @@ def test_trio_closes_early_causes_aio_checkpoint_raise(
            async with tractor.open_nursery(
                debug_mode=debug_mode,
                # enable_stack_on_sig=True,
                registry_addrs=[reg_addr],
            ) as an:
                portal = await an.run_in_actor(
                    stream_from_aio,
@ -679,7 +671,6 @@ def test_aio_exits_early_relays_AsyncioTaskExited(
    async def main():
        with trio.fail_after(1 + delay):
            async with tractor.open_nursery(
                registry_addrs=[reg_addr],
                debug_mode=debug_mode,
                # enable_stack_on_sig=True,
            ) as an:
@ -720,7 +711,6 @@ def test_aio_errors_and_channel_propagates_and_closes(
 ):
    async def main():
        async with tractor.open_nursery(
            registry_addrs=[reg_addr],
            debug_mode=debug_mode,
        ) as an:
            portal = await an.run_in_actor(
@ -742,21 +732,15 @@ def test_aio_errors_and_channel_propagates_and_closes(
 async def aio_echo_server(
-    chan: to_asyncio.LinkedTaskChannel,
+    to_trio: trio.MemorySendChannel,
    from_trio: asyncio.Queue,
 ) -> None:
    '''
    An IPC-msg "echo server" with msgs received and relayed by
    a parent `trio.Task` into a child `asyncio.Task`
    and then repeated back to that local parent (`trio.Task`)
    and sent again back to the original calling remote actor.
-    '''
+    to_trio.send_nowait('start')
    # same semantics as `trio.TaskStatus.started()`
    chan.started_nowait('start')
    while True:
        try:
-            msg = await chan.get()
+            msg = await from_trio.get()
        except to_asyncio.TrioTaskExited:
            print(
                'breaking aio echo loop due to `trio` exit!'
@ -764,7 +748,7 @@ async def aio_echo_server(
            break
        # echo the msg back
-        chan.send_nowait(msg)
+        to_trio.send_nowait(msg)
        # if we get the terminate sentinel
        # break the echo loop
@ -781,10 +765,7 @@ async def trio_to_aio_echo_server(
 ):
    async with to_asyncio.open_channel_from(
        aio_echo_server,
-    ) as (
+    ) as (first, chan):
        chan,
        first,  # value from `chan.started_nowait()` above
    ):
        assert first == 'start'
        await ctx.started(first)
@ -795,8 +776,7 @@ async def trio_to_aio_echo_server(
                await chan.send(msg)
                out = await chan.receive()
-
+                # echo back to parent actor-task
                # echo back to parent-actor's remote parent-ctx-task!
                await stream.send(out)
                if out is None:
@ -820,7 +800,6 @@ def test_echoserver_detailed_mechanics(
 ):
    async def main():
        async with tractor.open_nursery(
            registry_addrs=[reg_addr],
            debug_mode=debug_mode,
        ) as an:
            p = await an.start_actor(
@ -999,7 +978,7 @@ async def manage_file(
    ],
    ids=[
        'bg_aio_task',
-        'just_trio_sleep',
+        'just_trio_slee',
    ],
 )
@pytest.mark.parametrize(
@ -1015,14 +994,11 @@ async def manage_file(
 )
 def test_sigint_closes_lifetime_stack(
    tmp_path: Path,
    reg_addr: tuple,
    debug_mode: bool,
    wait_for_ctx: bool,
    bg_aio_task: bool,
    trio_side_is_shielded: bool,
    debug_mode: bool,
    send_sigint_to: str,
    start_method: str,
 ):
    '''
    Ensure that an infected child can use the `Actor.lifetime_stack`
@ -1032,22 +1008,12 @@ def test_sigint_closes_lifetime_stack(
    '''
    async def main():
-        delay: float = (
+        delay = 999 if tractor.debug_mode() else 1
            999
            if debug_mode
            else 1
        )
        try:
            an: tractor.ActorNursery
            async with tractor.open_nursery(
                registry_addrs=[reg_addr],
                debug_mode=debug_mode,
            ) as an:
                # sanity
                if debug_mode:
                    assert tractor.debug_mode()
                p: tractor.Portal = await an.start_actor(
                    'file_mngr',
                    enable_modules=[__name__],
@ -1082,10 +1048,6 @@ def test_sigint_closes_lifetime_stack(
                        cpid if send_sigint_to == 'child'
                        else os.getpid()
                    )
                    print(
                        f'Sending SIGINT to {send_sigint_to!r}\n'
                        f'pid: {pid!r}\n'
                    )
                    os.kill(
                        pid,
                        signal.SIGINT,
@ -1096,37 +1058,13 @@ def test_sigint_closes_lifetime_stack(
                    # timeout should trigger!
                    if wait_for_ctx:
                        print('waiting for ctx outcome in parent..')
                        if debug_mode:
                            assert delay == 999
                        try:
-                            with trio.fail_after(
+                            with trio.fail_after(1 + delay):
                                1 + delay
                            ):
                                await ctx.wait_for_result()
                        except tractor.ContextCancelled as ctxc:
                            assert ctxc.canceller == ctx.chan.uid
                            raise
                        except trio.TooSlowError:
                            if (
                                send_sigint_to == 'child'
                                and
                                start_method == 'subint_forkserver'
                            ):
                                pytest.xfail(
                                    reason=(
                                        'SIGINT delivery to fork-child subactor is known '
                                        'to NOT SUCCEED, precisely bc we have not wired up a'
                                        '"trio SIGINT mode" in the child pre-fork.\n'
                                        'Also see `test_orphaned_subactor_sigint_cleanup_DRAFT` for'
                                        'a dedicated suite demonstrating this expected limitation as '
                                        'well as the detailed doc:\n'
                                        '`ai/conc-anal/subint_forkserver_orphan_sigint_hang_issue.md`.\n'
                                    ),
                                )
                    # XXX CASE 2: this seems to be the source of the
                    # original issue which exhibited BEFORE we put
                    # a `Actor.cancel_soon()` inside
@ -1152,21 +1090,24 @@ def test_sigint_closes_lifetime_stack(
 # ?TODO asyncio.Task fn-deco?
 # -[ ] do sig checkingat import time like @context?
 # -[ ] maybe name it @aio_task ??
 # -[ ] chan: to_asyncio.InterloopChannel ??
 # -[ ] do fn-sig checking at import time like @context?
 #  |_[ ] maybe name it @a(sync)io_task ??
 # @asyncio_task  <- not bad ??
 async def raise_before_started(
    # from_trio: asyncio.Queue,
    # to_trio: trio.abc.SendChannel,
    chan: to_asyncio.LinkedTaskChannel,
 ) -> None:
    '''
    `asyncio.Task` entry point which RTEs before calling
-    `chan.started_nowait()`.
+    `to_trio.send_nowait()`.
    '''
    await asyncio.sleep(0.2)
    raise RuntimeError('Some shite went wrong before `.send_nowait()`!!')
    # to_trio.send_nowait('Uhh we shouldve RTE-d ^^ ??')
    chan.started_nowait('Uhh we shouldve RTE-d ^^ ??')
    await asyncio.sleep(float('inf'))
@ -1226,7 +1167,6 @@ def test_aio_side_raises_before_started(
        with trio.fail_after(3):
            an: tractor.ActorNursery
            async with tractor.open_nursery(
                registry_addrs=[reg_addr],
                debug_mode=debug_mode,
                loglevel=loglevel,
            ) as an:
--- a/tests/test_inter_peer_cancellation.py
+++ b/tests/test_inter_peer_cancellation.py
@ -11,30 +11,18 @@ import trio
 import tractor
 from tractor import (  # typing
    Actor,
    Context,
    ContextCancelled,
    MsgStream,
    Portal,
    RemoteActorError,
    current_actor,
    open_nursery,
    Portal,
    Context,
    ContextCancelled,
    RemoteActorError,
 )
 from tractor._testing import (
    # tractor_test,
    expect_ctxc,
 )
 from .conftest import cpu_scaling_factor
 pytestmark = pytest.mark.skipon_spawn_backend(
    'subint',
    reason=(
        'XXX SUBINT GIL-CONTENTION HANGING TEST XXX\n'
        'See oustanding issue(s)\n'
        # TODO, put issue link!
    )
 )
 # XXX TODO cases:
 # - [x] WE cancelled the peer and thus should not see any raised
 #   `ContextCancelled` as it should be reaped silently?
@ -212,7 +200,7 @@ async def stream_from_peer(
 ) -> None:
    # sanity
-    assert tractor.debug_mode() == debug_mode
+    assert tractor._state.debug_mode() == debug_mode
    peer: Portal
    try:
@ -592,7 +580,7 @@ def test_peer_canceller(
                            assert (
                                re.canceller
                                ==
-                                root.aid.uid
+                                root.uid
                            )
                        else:  # the other 2 ctxs
@ -601,7 +589,7 @@ def test_peer_canceller(
                                and (
                                    re.canceller
                                    ==
-                                    canceller.channel.aid.uid
+                                    canceller.channel.uid
                                )
                            )
@ -756,7 +744,7 @@ def test_peer_canceller(
                        # -> each context should have received
                        # a silently absorbed context cancellation
                        # in its remote nursery scope.
-                        # assert ctx.chan.aid.uid == ctx.canceller
+                        # assert ctx.chan.uid == ctx.canceller
                    # NOTE: when an inter-peer cancellation
                    # occurred, we DO NOT expect this
@ -808,12 +796,12 @@ async def basic_echo_server(
 ) -> None:
    '''
-    Just the simplest `MsgStream` echo server which resays what you
+    Just the simplest `MsgStream` echo server which resays what
-    told it but with its uid in front ;)
+    you told it but with its uid in front ;)
    '''
    actor: Actor = tractor.current_actor()
-    uid: tuple = actor.aid.uid
+    uid: tuple = actor.uid
    await ctx.started(uid)
    async with ctx.open_stream() as ipc:
        async for msg in ipc:
@ -852,7 +840,7 @@ async def serve_subactors(
    async with open_nursery() as an:
        # sanity
-        assert tractor.debug_mode() == debug_mode
+        assert tractor._state.debug_mode() == debug_mode
        await ctx.started(peer_name)
        async with ctx.open_stream() as ipc:
@ -868,7 +856,7 @@ async def serve_subactors(
                    f'|_{peer}\n'
                )
                await ipc.send((
-                    peer.chan.aid.uid,
+                    peer.chan.uid,
                    peer.chan.raddr.unwrap(),
                ))
@ -891,7 +879,7 @@ async def client_req_subactor(
 ) -> None:
    # sanity
    if debug_mode:
-        assert tractor.debug_mode()
+        assert tractor._state.debug_mode()
    # TODO: other cases to do with sub lifetimes:
    # -[ ] test that we can have the server spawn a sub
@ -978,14 +966,9 @@ async def tell_little_bro(
    caller: str = '',
    err_after: float|None = None,
-    rng_seed: int = 100,
+    rng_seed: int = 50,
    # NOTE, ensure ^ is large enough (on fast hw anyway)
    # to ensure the peer cancel req arrives before the
    # echoing dialog does itself Bp
 ):
    # contact target actor, do a stream dialog.
    lb: Portal
    echo_ipc: MsgStream
    async with (
        tractor.wait_for_actor(
            name=actor_name
@ -1000,17 +983,17 @@ async def tell_little_bro(
                else None
            ),
        ) as (sub_ctx, first),
        sub_ctx.open_stream() as echo_ipc,
    ):
        actor: Actor = current_actor()
-        uid: tuple = actor.aid.uid
+        uid: tuple = actor.uid
        for i in range(rng_seed):
            msg: tuple = (
                uid,
                i,
            )
            await echo_ipc.send(msg)
            await trio.sleep(0.001)
            resp = await echo_ipc.receive()
            print(
                f'{caller} => {actor_name}: {msg}\n'
@ -1023,9 +1006,6 @@ async def tell_little_bro(
            assert sub_uid != uid
            assert _i == i
    # XXX, usually should never get here!
    # await tractor.pause()
@pytest.mark.parametrize(
    'raise_client_error',
@ -1040,10 +1020,6 @@ def test_peer_spawns_and_cancels_service_subactor(
    raise_client_error: str,
    reg_addr: tuple[str, int],
    raise_sub_spawn_error_after: float|None,
    loglevel: str,
    test_log: tractor.log.StackLevelAdapter,
    # ^XXX, set to 'warning' to see masked-exc warnings
    # that may transpire during actor-nursery teardown.
 ):
    # NOTE: this tests for the modden `mod wks open piker` bug
    # discovered as part of implementing workspace ctx
@ -1073,7 +1049,6 @@ def test_peer_spawns_and_cancels_service_subactor(
            # NOTE: to halt the peer tasks on ctxc, uncomment this.
            debug_mode=debug_mode,
            registry_addrs=[reg_addr],
            loglevel=loglevel,
        ) as an:
            server: Portal = await an.start_actor(
                (server_name := 'spawn_server'),
@ -1109,7 +1084,7 @@ def test_peer_spawns_and_cancels_service_subactor(
                    ) as (client_ctx, client_says),
                ):
                    root: Actor = current_actor()
-                    spawner_uid: tuple = spawn_ctx.chan.aid.uid
+                    spawner_uid: tuple = spawn_ctx.chan.uid
                    print(
                        f'Server says: {first}\n'
                        f'Client says: {client_says}\n'
@ -1128,7 +1103,7 @@ def test_peer_spawns_and_cancels_service_subactor(
                    print(
                        'Sub-spawn came online\n'
                        f'portal: {sub}\n'
-                        f'.uid: {sub.actor.aid.uid}\n'
+                        f'.uid: {sub.actor.uid}\n'
                        f'chan.raddr: {sub.chan.raddr}\n'
                    )
@ -1162,7 +1137,7 @@ def test_peer_spawns_and_cancels_service_subactor(
                        assert isinstance(res, ContextCancelled)
                        assert client_ctx.cancel_acked
-                        assert res.canceller == root.aid.uid
+                        assert res.canceller == root.uid
                        assert not raise_sub_spawn_error_after
                        # cancelling the spawner sub should
@ -1196,8 +1171,8 @@ def test_peer_spawns_and_cancels_service_subactor(
                        # little_bro: a `RuntimeError`.
                        #
                        check_inner_rte(rae)
-                        assert rae.relay_uid == client.chan.aid.uid
+                        assert rae.relay_uid == client.chan.uid
-                        assert rae.src_uid == sub.chan.aid.uid
+                        assert rae.src_uid == sub.chan.uid
                        assert not client_ctx.cancel_acked
                        assert (
@ -1226,12 +1201,12 @@ def test_peer_spawns_and_cancels_service_subactor(
            except ContextCancelled as ctxc:
                _ctxc = ctxc
                print(
-                    f'{root.aid.uid} caught ctxc from ctx with {client_ctx.chan.aid.uid}\n'
+                    f'{root.uid} caught ctxc from ctx with {client_ctx.chan.uid}\n'
                    f'{repr(ctxc)}\n'
                )
                if not raise_sub_spawn_error_after:
-                    assert ctxc.canceller == root.aid.uid
+                    assert ctxc.canceller == root.uid
                else:
                    assert ctxc.canceller == spawner_uid
@ -1262,20 +1237,9 @@ def test_peer_spawns_and_cancels_service_subactor(
                # assert spawn_ctx.cancelled_caught
    async def _main():
        headroom: float = cpu_scaling_factor()
        this_fast_on_linux: float = 3
        this_fast = this_fast_on_linux * headroom
        if headroom != 1.:
            test_log.warning(
                f'Adding latency headroom on linux bc CPU scaling,\n'
                f'headroom: {headroom}\n'
                f'this_fast_on_linux: {this_fast_on_linux} -> {this_fast}\n'
            )
        with trio.fail_after(
-            this_fast
+            3 if not debug_mode
            if not debug_mode
            else 999
        ):
            await main()
--- a/tests/test_legacy_one_way_streaming.py
+++ b/tests/test_legacy_one_way_streaming.py
@ -1,11 +1,9 @@
 """
-Streaming via the, now legacy, "async-gen API".
+Streaming via async gen api
 """
 import time
 from functools import partial
 import platform
 from typing import Callable
 import trio
 import tractor
@ -21,11 +19,7 @@ def test_must_define_ctx():
        async def no_ctx():
            pass
-    assert (
+    assert "no_ctx must be `ctx: tractor.Context" in str(err.value)
        "no_ctx must be `ctx: tractor.Context"
        in
        str(err.value)
    )
    @tractor.stream
    async def has_ctx(ctx):
@ -75,14 +69,14 @@ async def stream_from_single_subactor(
    async with tractor.open_nursery(
        registry_addrs=[reg_addr],
        start_method=start_method,
-    ) as an:
+    ) as nursery:
        async with tractor.find_actor('streamerd') as portals:
            if not portals:
                # no brokerd actor found
-                portal = await an.start_actor(
+                portal = await nursery.start_actor(
                    'streamerd',
                    enable_modules=[__name__],
                )
@ -122,22 +116,11 @@ async def stream_from_single_subactor(
@pytest.mark.parametrize(
-    'stream_func',
+    'stream_func', [async_gen_stream, context_stream]
    [
        async_gen_stream,
        context_stream,
    ],
    ids='stream_func={}'.format
 )
-def test_stream_from_single_subactor(
+def test_stream_from_single_subactor(reg_addr, start_method, stream_func):
-    reg_addr: tuple,
+    """Verify streaming from a spawned async generator.
-    start_method: str,
+    """
    stream_func: Callable,
 ):
    '''
    Verify streaming from a spawned async generator.
    '''
    trio.run(
        partial(
            stream_from_single_subactor,
@ -149,9 +132,10 @@ def test_stream_from_single_subactor(
 # this is the first 2 actors, streamer_1 and streamer_2
-async def stream_data(seed: int):
+async def stream_data(seed):
    for i in range(seed):
        yield i
        # trigger scheduler to simulate practical usage
@ -159,17 +143,15 @@ async def stream_data(seed: int):
 # this is the third actor; the aggregator
-async def aggregate(seed: int):
+async def aggregate(seed):
-    '''
+    """Ensure that the two streams we receive match but only stream
    Ensure that the two streams we receive match but only stream
    a single set of values to the parent.
-
+    """
-    '''
+    async with tractor.open_nursery() as nursery:
    async with tractor.open_nursery() as an:
        portals = []
        for i in range(1, 3):
            # fork point
-            portal = await an.start_actor(
+            portal = await nursery.start_actor(
                name=f'streamer_{i}',
                enable_modules=[__name__],
            )
@ -182,28 +164,20 @@ async def aggregate(seed: int):
            async with send_chan:
                async with portal.open_stream_from(
-                    stream_data,
+                    stream_data, seed=seed,
                    seed=seed,
                ) as stream:
                    async for value in stream:
                        # leverage trio's built-in backpressure
                        await send_chan.send(value)
-            print(
+            print(f"FINISHED ITERATING {portal.channel.uid}")
                f'FINISHED ITERATING!\n'
                f'peer: {portal.channel.aid.uid}'
            )
        # spawn 2 trio tasks to collect streams and push to a local queue
-        async with trio.open_nursery() as tn:
+        async with trio.open_nursery() as n:
            for portal in portals:
-                tn.start_soon(
+                n.start_soon(push_to_chan, portal, send_chan.clone())
                    push_to_chan,
                    portal,
                    send_chan.clone(),
                )
            # close this local task's reference to send side
            await send_chan.aclose()
@ -220,21 +194,20 @@ async def aggregate(seed: int):
            print("FINISHED ITERATING in aggregator")
-        await an.cancel()
+        await nursery.cancel()
        print("WAITING on `ActorNursery` to finish")
    print("AGGREGATOR COMPLETE!")
-async def a_quadruple_example() -> list[int]:
+# this is the main actor and *arbiter*
-    '''
+async def a_quadruple_example():
-    Open the root-actor which is also a "registrar".
+    # a nursery which spawns "actors"
    async with tractor.open_nursery() as nursery:
    '''
    async with tractor.open_nursery() as an:
        seed = int(1e3)
        pre_start = time.time()
-        portal = await an.start_actor(
+        portal = await nursery.start_actor(
            name='aggregator',
            enable_modules=[__name__],
        )
@ -255,14 +228,8 @@ async def a_quadruple_example() -> list[int]:
        return result_stream
-async def cancel_after(
+async def cancel_after(wait, reg_addr):
-    wait: float,
+    async with tractor.open_root_actor(registry_addrs=[reg_addr]):
    reg_addr: tuple,
 ) -> list[int]:
    async with tractor.open_root_actor(
        registry_addrs=[reg_addr],
    ):
        with trio.move_on_after(wait):
            return await a_quadruple_example()
@ -273,10 +240,6 @@ def time_quad_ex(
    ci_env: bool,
    spawn_backend: str,
 ):
    non_linux: bool = (_sys := platform.system()) != 'Linux'
    if ci_env and non_linux:
        pytest.skip(f'Test is too flaky on {_sys!r} in CI')
    if spawn_backend == 'mp':
        '''
        no idea but the  mp *nix runs are flaking out here often...
@ -284,59 +247,32 @@ def time_quad_ex(
        '''
        pytest.skip("Test is too flaky on mp in CI")
-    timeout = 7 if non_linux else 4
+    timeout = 7 if platform.system() in ('Windows', 'Darwin') else 4
    start = time.time()
-    results: list[int] = trio.run(
+    results = trio.run(cancel_after, timeout, reg_addr)
-        cancel_after,
+    diff = time.time() - start
        timeout,
        reg_addr,
    )
    diff: float = time.time() - start
    assert results
    return results, diff
 def test_a_quadruple_example(
-    time_quad_ex: tuple[list[int], float],
+    time_quad_ex: tuple,
    ci_env: bool,
    spawn_backend: str,
    test_log: tractor.log.StackLevelAdapter,
 ):
    '''
-    This also serves as a "we'd like to be this fast" smoke test
+    This also serves as a kind of "we'd like to be this fast test".
    given past empirical eval of this suite.
    '''
    non_linux: bool = (_sys := platform.system()) != 'Linux'
    this_fast_on_linux: float = 3
    this_fast = (
        6 if non_linux
        else this_fast_on_linux
    )
    # ^ XXX NOTE,
    # i've noticed that tweaking the CPU governor setting
    # to not "always" enable "turbo" mode can result in latency
    # which causes this limit to be too little. Not sure if it'd
    # be worth it to adjust the linux value based on reading the
    # CPU conf from the sys?
    #
    # For ex, see the `auto-cpufreq` docs on such settings,
    # https://github.com/AdnanHodzic/auto-cpufreq?tab=readme-ov-file#example-config-file-contents
    #
    # HENCE this below latency-headroom compensation logic..
    from .conftest import cpu_scaling_factor
    headroom: float = cpu_scaling_factor()
    if headroom != 1.:
        this_fast = this_fast_on_linux * headroom
        test_log.warning(
            f'Adding latency headroom on linux bc CPU scaling,\n'
            f'headroom: {headroom}\n'
            f'this_fast_on_linux: {this_fast_on_linux} -> {this_fast}\n'
        )
    results, diff = time_quad_ex
    assert results
    this_fast = (
        6 if platform.system() in (
            'Windows',
            'Darwin',
        )
        else 3
    )
    assert diff < this_fast
@ -345,58 +281,43 @@ def test_a_quadruple_example(
    list(map(lambda i: i/10, range(3, 9)))
 )
 def test_not_fast_enough_quad(
-    reg_addr: tuple,
+    reg_addr, time_quad_ex, cancel_delay, ci_env, spawn_backend
    time_quad_ex: tuple[list[int], float],
    cancel_delay: float,
    ci_env: bool,
    spawn_backend: str,
 ):
-    '''
+    """Verify we can cancel midway through the quad example and all actors
-    Verify we can cancel midway through the quad example and all
+    cancel gracefully.
-    actors cancel gracefully.
+    """
    '''
    results, diff = time_quad_ex
    delay = max(diff - cancel_delay, 0)
-    results = trio.run(
+    results = trio.run(cancel_after, delay, reg_addr)
-        cancel_after,
+    system = platform.system()
-        delay,
+    if system in ('Windows', 'Darwin') and results is not None:
        reg_addr,
    )
    system: str = platform.system()
    if (
        system in ('Windows', 'Darwin')
        and
        results is not None
    ):
        # In CI envoirments it seems later runs are quicker then the first
        # so just ignore these
-        print(f'Woa there {system} caught your breath eh?')
+        print(f"Woa there {system} caught your breath eh?")
    else:
        # should be cancelled mid-streaming
        assert results is None
-@tractor_test(timeout=20)
+@tractor_test
 async def test_respawn_consumer_task(
-    reg_addr: tuple,
+    reg_addr,
-    spawn_backend: str,
+    spawn_backend,
-    loglevel: str,
+    loglevel,
 ):
-    '''
+    """Verify that ``._portal.ReceiveStream.shield()``
    Verify that ``._portal.ReceiveStream.shield()``
    sucessfully protects the underlying IPC channel from being closed
    when cancelling and respawning a consumer task.
    This also serves to verify that all values from the stream can be
    received despite the respawns.
-    '''
+    """
    stream = None
-    async with tractor.open_nursery() as an:
+    async with tractor.open_nursery() as n:
-        portal = await an.start_actor(
+        portal = await n.start_actor(
            name='streamer',
            enable_modules=[__name__]
        )
--- a/tests/test_local.py
+++ b/tests/test_local.py
@ -1,5 +1,5 @@
 """
-Registrar and "local" actor api
+Arbiter and "local" actor api
 """
 import time
@ -12,11 +12,11 @@ from tractor._testing import tractor_test
@pytest.mark.trio
 async def test_no_runtime():
-    """A registrar must be established before any nurseries
+    """An arbitter must be established before any nurseries
    can be created.
-    (In other words ``tractor.open_root_actor()`` must be
+    (In other words ``tractor.open_root_actor()`` must be engaged at
-    engaged at some point?)
+    some point?)
    """
    with pytest.raises(RuntimeError) :
        async with tractor.find_actor('doggy'):
@ -25,9 +25,9 @@ async def test_no_runtime():
@tractor_test
 async def test_self_is_registered(reg_addr):
-    "Verify waiting on the registrar to register itself using the standard api."
+    "Verify waiting on the arbiter to register itself using the standard api."
    actor = tractor.current_actor()
-    assert actor.is_registrar
+    assert actor.is_arbiter
    with trio.fail_after(0.2):
        async with tractor.wait_for_actor('root') as portal:
            assert portal.channel.uid[0] == 'root'
@ -35,11 +35,11 @@ async def test_self_is_registered(reg_addr):
@tractor_test
 async def test_self_is_registered_localportal(reg_addr):
-    "Verify waiting on the registrar to register itself using a local portal."
+    "Verify waiting on the arbiter to register itself using a local portal."
    actor = tractor.current_actor()
-    assert actor.is_registrar
+    assert actor.is_arbiter
    async with tractor.get_registry(reg_addr) as portal:
-        assert isinstance(portal, tractor.runtime._portal.LocalPortal)
+        assert isinstance(portal, tractor._portal.LocalPortal)
        with trio.fail_after(0.2):
            sockaddr = await portal.run_from_ns(
@ -57,8 +57,8 @@ def test_local_actor_async_func(reg_addr):
        async with tractor.open_root_actor(
            registry_addrs=[reg_addr],
        ):
-            # registrar is started in-proc if dne
+            # arbiter is started in-proc if dne
-            assert tractor.current_actor().is_registrar
+            assert tractor.current_actor().is_arbiter
            for i in range(10):
                nums.append(i)
--- a/tests/test_log_sys.py
+++ b/tests/test_log_sys.py
@ -1,185 +0,0 @@
 '''
 `tractor.log`-wrapping unit tests.
 '''
 from pathlib import Path
 import shutil
 from types import ModuleType
 import pytest
 import tractor
 from tractor import (
    _code_load,
    log,
 )
 def test_root_pkg_not_duplicated_in_logger_name():
    '''
    When both `pkg_name` and `name` are passed and they have
    a common `<root_name>.< >` prefix, ensure that it is not
    duplicated in the child's `StackLevelAdapter.name: str`.
    '''
    project_name: str = 'pylib'
    pkg_path: str = 'pylib.subpkg.mod'
    assert not tractor.current_actor(
        err_on_no_runtime=False,
    )
    proj_log = log.get_logger(
        pkg_name=project_name,
        mk_sublog=False,
    )
    sublog = log.get_logger(
        pkg_name=project_name,
        name=pkg_path,
    )
    assert proj_log is not sublog
    assert sublog.name.count(proj_log.name) == 1
    assert 'mod' not in sublog.name
 def test_implicit_mod_name_applied_for_child(
    testdir: pytest.Pytester,
    loglevel: str,
 ):
    '''
    Verify that when `.log.get_logger(pkg_name='pylib')` is called
    from a given sub-mod from within the `pylib` pkg-path, we
    implicitly set the equiv of `name=__name__` from the caller's
    module.
    '''
    # tractor.log.get_console_log(level=loglevel)
    proj_name: str = 'snakelib'
    mod_code: str = (
        f'import tractor\n'
        f'\n'
        # if you need to trace `testdir` stuff @ import-time..
        # f'breakpoint()\n'
        f'log = tractor.log.get_logger(pkg_name="{proj_name}")\n'
    )
    # create a sub-module for each pkg layer
    _lib = testdir.mkpydir(proj_name)
    pkg: Path = Path(_lib)
    pkg_init_mod: Path = pkg / "__init__.py"
    pkg_init_mod.write_text(mod_code)
    subpkg: Path = pkg / 'subpkg'
    subpkg.mkdir()
    subpkgmod: Path = subpkg / "__init__.py"
    subpkgmod.touch()
    subpkgmod.write_text(mod_code)
    _submod: Path = testdir.makepyfile(
        _mod=mod_code,
    )
    pkg_submod = pkg / 'mod.py'
    pkg_subpkg_submod = subpkg / 'submod.py'
    shutil.copyfile(
        _submod,
        pkg_submod,
    )
    shutil.copyfile(
        _submod,
        pkg_subpkg_submod,
    )
    testdir.chdir()
    # NOTE, to introspect the py-file-module-layout use (in .xsh
    # syntax): `ranger @str(testdir)`
    # XXX NOTE, once the "top level" pkg mod has been
    # imported, we can then use `import` syntax to
    # import it's sub-pkgs and modules.
    subpkgmod: ModuleType = _code_load.load_module_from_path(
        Path(pkg / '__init__.py'),
        module_name=proj_name,
    )
    pkg_root_log = log.get_logger(
        pkg_name=proj_name,
        mk_sublog=False,
    )
    # the top level pkg-mod, created just now,
    # by above API call.
    assert pkg_root_log.name == proj_name
    assert not pkg_root_log.logger.getChildren()
    #
    # ^TODO! test this same output but created via a `get_logger()`
    # call in the `snakelib.__init__py`!!
    # NOTE, the pkg-level "init mod" should of course
    # have the same name as the package ns-path.
    import snakelib as init_mod
    assert init_mod.log.name == proj_name
    # NOTE, a first-pkg-level sub-module should only
    # use the package-name since the leaf-node-module
    # will be included in log headers by default.
    from snakelib import mod
    assert mod.log.name == proj_name
    from snakelib import subpkg
    assert (
        subpkg.log.name
        ==
        subpkg.__package__ 
        ==
        f'{proj_name}.subpkg'
    )
    from snakelib.subpkg import submod
    assert (
        submod.log.name
        ==
        submod.__package__ 
        ==
        f'{proj_name}.subpkg'
    )
    sub_logs = pkg_root_log.logger.getChildren()
    assert len(sub_logs) == 1  # only one nested sub-pkg module
    assert submod.log.logger in sub_logs
 # TODO, moar tests against existing feats:
 # ------ - ------
 # - [ ] color settings?
 # - [ ] header contents like,
 #   - actor + thread + task names from various conc-primitives,
 # - [ ] `StackLevelAdapter` extensions,
 #   - our custom levels/methods: `transport|runtime|cance|pdb|devx`
 # - [ ] custom-headers support?
 #
 # TODO, test driven dev of new-ideas/long-wanted feats,
 # ------ - ------
 # - [ ] https://github.com/goodboy/tractor/issues/244
 #  - [ ] @catern mentioned using a sync / deterministic sys
 #       and in particular `svlogd`?
 #       |_ https://smarden.org/runit/svlogd.8
 # - [ ] using adapter vs. filters?
 #    - https://stackoverflow.com/questions/60691759/add-information-to-every-log-message-in-python-logging/61830838#61830838
 # - [ ] `.at_least_level()` optimization which short circuits wtv
 #      `logging` is doing behind the scenes when the level filters
 #      the emission..?
 # - [ ] use of `.log.get_console_log()` in subactors and the
 #    subtleties of ensuring it actually emits from a subproc.
 # - [ ] this idea of activating per-subsys emissions with some
 #    kind of `.name` filter passed to the runtime or maybe configured
 #    via the root `StackLevelAdapter`?
 # - [ ] use of `logging.dict.dictConfig()` to simplify the impl
 #      of any of ^^ ??
 #    - https://stackoverflow.com/questions/7507825/where-is-a-complete-example-of-logging-config-dictconfig
 #    - https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema
 #    - https://docs.python.org/3/library/logging.config.html#logging.config.dictConfig
--- a/tests/test_multi_program.py
+++ b/tests/test_multi_program.py
@ -1,13 +1,8 @@
 """
 Multiple python programs invoking the runtime.
 """
 from __future__ import annotations
 import platform
 import subprocess
 import time
 from typing import (
    TYPE_CHECKING,
 )
 import pytest
 import trio
@ -15,32 +10,14 @@ import tractor
 from tractor._testing import (
    tractor_test,
 )
 from tractor import (
    current_actor,
    Actor,
    Context,
    Portal,
 )
 from tractor.runtime import _state
 from .conftest import (
    sig_prog,
    _INT_SIGNAL,
    _INT_RETURN_CODE,
 )
 if TYPE_CHECKING:
    from tractor.msg import Aid
    from tractor.discovery._addr import (
        UnwrappedAddress,
    )
-
+def test_abort_on_sigint(daemon):
 _non_linux: bool = platform.system() != 'Linux'
 def test_abort_on_sigint(
    daemon: subprocess.Popen,
 ):
    assert daemon.returncode is None
    time.sleep(0.1)
    sig_prog(daemon, _INT_SIGNAL)
@ -53,131 +30,39 @@ def test_abort_on_sigint(
@tractor_test
-async def test_cancel_remote_registrar(
+async def test_cancel_remote_arbiter(daemon, reg_addr):
-    daemon: subprocess.Popen,
+    assert not tractor.current_actor().is_arbiter
    reg_addr: UnwrappedAddress,
 ):
    assert not current_actor().is_registrar
    async with tractor.get_registry(reg_addr) as portal:
        await portal.cancel_actor()
    time.sleep(0.1)
-    # the registrar channel server is cancelled but not its main task
+    # the arbiter channel server is cancelled but not its main task
    assert daemon.returncode is None
-    # no registrar socket should exist
+    # no arbiter socket should exist
    with pytest.raises(OSError):
        async with tractor.get_registry(reg_addr) as portal:
            pass
-def test_register_duplicate_name(
+def test_register_duplicate_name(daemon, reg_addr):
-    daemon: subprocess.Popen,
+
    reg_addr: UnwrappedAddress,
 ):
    async def main():
        async with tractor.open_nursery(
            registry_addrs=[reg_addr],
-        ) as an:
+        ) as n:
-            assert not current_actor().is_registrar
+            assert not tractor.current_actor().is_arbiter
-            p1 = await an.start_actor('doggy')
+            p1 = await n.start_actor('doggy')
-            p2 = await an.start_actor('doggy')
+            p2 = await n.start_actor('doggy')
            async with tractor.wait_for_actor('doggy') as portal:
                assert portal.channel.uid in (p2.channel.uid, p1.channel.uid)
-            await an.cancel()
+            await n.cancel()
-    # XXX, run manually since we want to start this root **after**
+    # run it manually since we want to start **after**
-    # the other "daemon" program with it's own root.
+    # the other "daemon" program
    trio.run(main)
@tractor.context
 async def get_root_portal(
    ctx: Context,
 ):
    '''
    Connect back to the root actor manually (using `._discovery` API)
    and ensure it's contact info is the same as our immediate parent.
    '''
    sub: Actor = current_actor()
    rtvs: dict = _state._runtime_vars
    raddrs: list[UnwrappedAddress] = rtvs['_root_addrs']
    # await tractor.pause()
    # XXX, in case the sub->root discovery breaks you might need
    # this (i know i did Xp)!!
    # from tractor.devx import mk_pdb
    # mk_pdb().set_trace()
    assert (
        len(raddrs) == 1
        and
        list(sub._parent_chan.raddr.unwrap()) in raddrs
    )
    # connect back to our immediate parent which should also
    # be the actor-tree's root.
    from tractor.discovery._api import get_root
    ptl: Portal
    async with get_root() as ptl:
        root_aid: Aid = ptl.chan.aid
        parent_ptl: Portal = current_actor().get_parent()
        assert (
            root_aid.name == 'root'
            and
            parent_ptl.chan.aid == root_aid
        )
        await ctx.started()
 def test_non_registrar_spawns_child(
    daemon: subprocess.Popen,
    reg_addr: UnwrappedAddress,
    loglevel: str,
    debug_mode: bool,
    ci_env: bool,
 ):
    '''
    Ensure a non-regristar (serving) root actor can spawn a sub and
    that sub can connect back (manually) to it's rent that is the
    root without issue.
    More or less this audits the global contact info in
    `._state._runtime_vars`.
    '''
    async def main():
        # XXX, since apparently on macos in GH's CI it can be a race
        # with the `daemon` registrar on grabbing the socket-addr..
        if ci_env and _non_linux:
            await trio.sleep(.5)
        async with tractor.open_nursery(
            registry_addrs=[reg_addr],
            loglevel=loglevel,
            debug_mode=debug_mode,
        ) as an:
            actor: Actor = tractor.current_actor()
            assert not actor.is_registrar
            sub_ptl: Portal = await an.start_actor(
                name='sub',
                enable_modules=[__name__],
            )
            async with sub_ptl.open_context(
                get_root_portal,
            ) as (ctx, _):
                print('Waiting for `sub` to connect back to us..')
            await an.cancel()
    # XXX, run manually since we want to start this root **after**
    # the other "daemon" program with it's own root.
    trio.run(main)
--- a/tests/msg/test_pldrx_limiting.py
+++ b/tests/msg/test_pldrx_limiting.py
@ -61,7 +61,7 @@ async def maybe_expect_raises(
    Async wrapper for ensuring errors propagate from the inner scope.
    '''
-    if tractor.debug_mode():
+    if tractor._state.debug_mode():
        timeout += 999
    with trio.fail_after(timeout):
--- a/tests/test_pubsub.py
+++ b/tests/test_pubsub.py
@ -7,14 +7,6 @@ import tractor
 from tractor.experimental import msgpub
 from tractor._testing import tractor_test
 pytestmark = pytest.mark.skipon_spawn_backend(
    'subint',
    reason=(
        'XXX SUBINT HANGING TEST XXX\n'
        'See oustanding issue(s)\n'
        # TODO, put issue link!
    )
 )
 def test_type_checks():
--- a/tests/test_reg_err_types.py
+++ b/tests/test_reg_err_types.py
@ -1,333 +0,0 @@
 '''
 Verify that externally registered remote actor error
 types are correctly relayed, boxed, and re-raised across
 IPC actor hops via `reg_err_types()`.
 Also ensure that when custom error types are NOT registered
 the framework indicates the lookup failure to the user.
 '''
 import pytest
 import trio
 import tractor
 from tractor import (
    Context,
    Portal,
    RemoteActorError,
 )
 from tractor._exceptions import (
    get_err_type,
    reg_err_types,
 )
 # -- custom app-level errors for testing --
 class CustomAppError(Exception):
    '''
    A hypothetical user-app error that should be
    boxed+relayed by `tractor` IPC when registered.
    '''
 class AnotherAppError(Exception):
    '''
    A second custom error for multi-type registration.
    '''
 class UnregisteredAppError(Exception):
    '''
    A custom error that is intentionally NEVER
    registered via `reg_err_types()` so we can
    verify the framework's failure indication.
    '''
 # -- remote-task endpoints --
@tractor.context
 async def raise_custom_err(
    ctx: Context,
 ) -> None:
    '''
    Remote ep that raises a `CustomAppError`
    after sync-ing with the caller.
    '''
    await ctx.started()
    raise CustomAppError(
        'the app exploded remotely'
    )
@tractor.context
 async def raise_another_err(
    ctx: Context,
 ) -> None:
    '''
    Remote ep that raises `AnotherAppError`.
    '''
    await ctx.started()
    raise AnotherAppError(
        'another app-level kaboom'
    )
@tractor.context
 async def raise_unreg_err(
    ctx: Context,
 ) -> None:
    '''
    Remote ep that raises an `UnregisteredAppError`
    which has NOT been `reg_err_types()`-registered.
    '''
    await ctx.started()
    raise UnregisteredAppError(
        'this error type is unknown to tractor'
    )
 # -- unit tests for the type-registry plumbing --
 class TestRegErrTypesPlumbing:
    '''
    Low-level checks on `reg_err_types()` and
    `get_err_type()` without requiring IPC.
    '''
    def test_unregistered_type_returns_none(self):
        '''
        An unregistered custom error name should yield
        `None` from `get_err_type()`.
        '''
        result = get_err_type('CustomAppError')
        assert result is None
    def test_register_and_lookup(self):
        '''
        After `reg_err_types()`, the custom type should
        be discoverable via `get_err_type()`.
        '''
        reg_err_types([CustomAppError])
        result = get_err_type('CustomAppError')
        assert result is CustomAppError
    def test_register_multiple_types(self):
        '''
        Registering a list of types should make each
        one individually resolvable.
        '''
        reg_err_types([
            CustomAppError,
            AnotherAppError,
        ])
        assert (
            get_err_type('CustomAppError')
            is CustomAppError
        )
        assert (
            get_err_type('AnotherAppError')
            is AnotherAppError
        )
    def test_builtin_types_always_resolve(self):
        '''
        Builtin error types like `RuntimeError` and
        `ValueError` should always be found without
        any prior registration.
        '''
        assert (
            get_err_type('RuntimeError')
            is RuntimeError
        )
        assert (
            get_err_type('ValueError')
            is ValueError
        )
    def test_tractor_native_types_resolve(self):
        '''
        `tractor`-internal exc types (e.g.
        `ContextCancelled`) should always resolve.
        '''
        assert (
            get_err_type('ContextCancelled')
            is tractor.ContextCancelled
        )
    def test_boxed_type_str_without_ipc_msg(self):
        '''
        When a `RemoteActorError` is constructed
        without an IPC msg (and no resolvable type),
        `.boxed_type_str` should return `'<unknown>'`.
        '''
        rae = RemoteActorError('test')
        assert rae.boxed_type_str == '<unknown>'
 # -- IPC-level integration tests --
 def test_registered_custom_err_relayed(
    debug_mode: bool,
    tpt_proto: str,
 ):
    '''
    When a custom error type is registered via
    `reg_err_types()` on BOTH sides of an IPC dialog,
    the parent should receive a `RemoteActorError`
    whose `.boxed_type` matches the original custom
    error type.
    '''
    reg_err_types([CustomAppError])
    async def main():
        async with tractor.open_nursery(
            debug_mode=debug_mode,
            enable_transports=[tpt_proto],
        ) as an:
            ptl: Portal = await an.start_actor(
                'custom-err-raiser',
                enable_modules=[__name__],
            )
            async with ptl.open_context(
                raise_custom_err,
            ) as (ctx, sent):
                assert not sent
                try:
                    await ctx.wait_for_result()
                except RemoteActorError as rae:
                    assert rae.boxed_type is CustomAppError
                    assert rae.src_type is CustomAppError
                    assert 'the app exploded remotely' in str(
                        rae.tb_str
                    )
                    raise
    with pytest.raises(RemoteActorError) as excinfo:
        trio.run(main)
    rae = excinfo.value
    assert rae.boxed_type is CustomAppError
 def test_registered_another_err_relayed(
    debug_mode: bool,
    tpt_proto: str,
 ):
    '''
    Same as above but for a different custom error
    type to verify multi-type registration works
    end-to-end over IPC.
    '''
    reg_err_types([AnotherAppError])
    async def main():
        async with tractor.open_nursery(
            debug_mode=debug_mode,
            enable_transports=[tpt_proto],
        ) as an:
            ptl: Portal = await an.start_actor(
                'another-err-raiser',
                enable_modules=[__name__],
            )
            async with ptl.open_context(
                raise_another_err,
            ) as (ctx, sent):
                assert not sent
                try:
                    await ctx.wait_for_result()
                except RemoteActorError as rae:
                    assert (
                        rae.boxed_type
                        is AnotherAppError
                    )
                    raise
            await an.cancel()
    with pytest.raises(RemoteActorError) as excinfo:
        trio.run(main)
    rae = excinfo.value
    assert rae.boxed_type is AnotherAppError
 def test_unregistered_err_still_relayed(
    debug_mode: bool,
    tpt_proto: str,
 ):
    '''
    Verify that even when a custom error type is NOT registered via
    `reg_err_types()`, the remote error is still relayed as
    a `RemoteActorError` with all string-level info preserved
    (traceback, type name, source actor uid).
    The `.boxed_type` will be `None` (type obj can't be resolved) but
    `.boxed_type_str` and `.src_type_str` still report the original
    type name from the IPC msg.
    This documents the expected limitation: without `reg_err_types()`
    the `.boxed_type` property can NOT resolve to the original Python
    type.
    '''
    # NOTE: intentionally do NOT call
    # `reg_err_types([UnregisteredAppError])`
    async def main():
        async with tractor.open_nursery(
            debug_mode=debug_mode,
            enable_transports=[tpt_proto],
        ) as an:
            ptl: Portal = await an.start_actor(
                'unreg-err-raiser',
                enable_modules=[__name__],
            )
            async with ptl.open_context(
                raise_unreg_err,
            ) as (ctx, sent):
                assert not sent
                await ctx.wait_for_result()
            await an.cancel()
    with pytest.raises(RemoteActorError) as excinfo:
        trio.run(main)
    rae = excinfo.value
    # the error IS relayed even without
    # registration; type obj is unresolvable but
    # all string-level info is preserved.
    assert rae.boxed_type is None # NOT `UnregisteredAppError`
    assert rae.src_type is None
    # string names survive the IPC round-trip
    # via the `Error` msg fields.
    assert (
        rae.src_type_str
        ==
        'UnregisteredAppError'
    )
    assert (
        rae.boxed_type_str
        ==
        'UnregisteredAppError'
    )
    # original traceback content is preserved
    assert 'this error type is unknown' in rae.tb_str
    assert 'UnregisteredAppError' in rae.tb_str
--- a/tests/test_resource_cache.py
+++ b/tests/test_resource_cache.py
@ -12,14 +12,14 @@ import trio
 import tractor
 from tractor.trionics import (
    maybe_open_context,
    collapse_eg,
 )
 from tractor.log import (
    get_console_log,
    get_logger,
 )
 log = get_logger(__name__)
 log = get_logger()
 _resource: int = 0
@ -213,12 +213,9 @@ def test_open_local_sub_to_stream(
    N local tasks using `trionics.maybe_open_context()`.
    '''
-    from .conftest import cpu_scaling_factor
+    timeout: float = 3.6
-    timeout: float = (
+    if platform.system() == "Windows":
-        4
+        timeout: float = 10
        if not platform.system() == "Windows"
        else 10
    ) * cpu_scaling_factor()
    if debug_mode:
        timeout = 999
@ -322,7 +319,7 @@ def test_open_local_sub_to_stream(
@acm
-async def maybe_cancel_outer_cs(
+async def cancel_outer_cs(
    cs: trio.CancelScope|None = None,
    delay: float = 0,
 ):
@ -336,31 +333,12 @@ async def maybe_cancel_outer_cs(
    if cs:
        log.info('task calling cs.cancel()')
        cs.cancel()
-
+    trio.lowlevel.checkpoint()
    yield
-
+    await trio.sleep_forever()
    if cs:
        await trio.sleep_forever()
    # XXX, if not cancelled we'll leak this inf-blocking
    # subtask to the actor's service tn..
    else:
        await trio.lowlevel.checkpoint()
@pytest.mark.parametrize(
    'delay',
    [0.05, 0.5, 1],
    ids="pre_sleep_delay={}".format,
 )
@pytest.mark.parametrize(
    'cancel_by_cs',
    [True, False],
    ids="cancel_by_cs={}".format,
 )
 def test_lock_not_corrupted_on_fast_cancel(
    delay: float,
    cancel_by_cs: bool,
    debug_mode: bool,
    loglevel: str,
 ):
@ -377,14 +355,17 @@ def test_lock_not_corrupted_on_fast_cancel(
      due to it having erronously exited without calling
      `lock.release()`.
    '''
    delay: float = 1.
    async def use_moc(
        cs: trio.CancelScope|None,
        delay: float,
        cs: trio.CancelScope|None = None,
    ):
        log.info('task entering moc')
        async with maybe_open_context(
-            maybe_cancel_outer_cs,
+            cancel_outer_cs,
            kwargs={
                'cs': cs,
                'delay': delay,
@ -395,13 +376,7 @@ def test_lock_not_corrupted_on_fast_cancel(
            else:
                log.info('1st task entered')
-            if cs:
+            await trio.sleep_forever()
                await trio.sleep_forever()
            else:
                await trio.sleep(delay)
        # ^END, exit shared ctx.
    async def main():
        with trio.fail_after(delay + 2):
@ -410,7 +385,6 @@ def test_lock_not_corrupted_on_fast_cancel(
                    debug_mode=debug_mode,
                    loglevel=loglevel,
                ),
                # ?TODO, pass this as the parent tn?
                trio.open_nursery() as tn,
            ):
                get_console_log('info')
@ -418,206 +392,15 @@ def test_lock_not_corrupted_on_fast_cancel(
                cs = tn.cancel_scope
                tn.start_soon(
                    use_moc,
                    cs,
                    delay,
                    cs if cancel_by_cs else None,
                    name='child',
                )
                with trio.CancelScope() as rent_cs:
                    await use_moc(
                        cs=rent_cs,
                        delay=delay,
                        cs=rent_cs if cancel_by_cs else None,
                    )
    trio.run(main)
@acm
 async def acm_with_resource(resource_id: str):
    '''
    Yield `resource_id` as the cached value.
    Used to verify per-`ctx_key` isolation when the same
    `acm_func` is called with different kwargs.
    '''
    yield resource_id
 def test_per_ctx_key_resource_lifecycle(
    debug_mode: bool,
    loglevel: str,
 ):
    '''
    Verify that `maybe_open_context()` correctly isolates resource
    lifecycle **per `ctx_key`** when the same `acm_func` is called
    with different kwargs.
    Previously `_Cache.users` was a single global `int` and
    `_Cache.locks` was keyed on `fid` (function ID), so calling
    the same `acm_func` with different kwargs (producing different
    `ctx_key`s) meant:
    - teardown for one key was skipped bc the *other* key's users
      kept the global count > 0,
    - and re-entry could hit the old
      `assert not resources.get(ctx_key)` crash during the
      teardown window.
    This was the root cause of a long-standing bug in piker's
    `brokerd.kraken` backend.
    '''
    timeout: float = 6
    if debug_mode:
        timeout = 999
    async def main():
        a_ready = trio.Event()
        a_exit = trio.Event()
        async def hold_resource_a():
            '''
            Open resource 'a' and keep it alive until signalled.
            '''
            async with maybe_open_context(
                acm_with_resource,
                kwargs={'resource_id': 'a'},
            ) as (cache_hit, value):
                assert not cache_hit
                assert value == 'a'
                log.info("resource 'a' entered (holding)")
                a_ready.set()
                await a_exit.wait()
                log.info("resource 'a' exiting")
        with trio.fail_after(timeout):
            async with (
                tractor.open_root_actor(
                    debug_mode=debug_mode,
                    loglevel=loglevel,
                ),
                trio.open_nursery() as tn,
            ):
                # Phase 1: bg task holds resource 'a' open.
                tn.start_soon(hold_resource_a)
                await a_ready.wait()
                # Phase 2: open resource 'b' (different kwargs,
                # same acm_func) then exit it while 'a' is still
                # alive.
                async with maybe_open_context(
                    acm_with_resource,
                    kwargs={'resource_id': 'b'},
                ) as (cache_hit, value):
                    assert not cache_hit
                    assert value == 'b'
                    log.info("resource 'b' entered")
                log.info("resource 'b' exited, waiting for teardown")
                await trio.lowlevel.checkpoint()
                # Phase 3: re-open 'b'; must be a fresh cache MISS
                # proving 'b' was torn down independently of 'a'.
                #
                # With the old global `_Cache.users` counter this
                # would be a stale cache HIT (leaked resource) or
                # trigger `assert not resources.get(ctx_key)`.
                async with maybe_open_context(
                    acm_with_resource,
                    kwargs={'resource_id': 'b'},
                ) as (cache_hit, value):
                    assert not cache_hit, (
                        "resource 'b' was NOT torn down despite "
                        "having zero users! (global user count bug)"
                    )
                    assert value == 'b'
                    log.info(
                        "resource 'b' re-entered "
                        "(cache miss, correct)"
                    )
                # Phase 4: let 'a' exit, clean shutdown.
                a_exit.set()
    trio.run(main)
 def test_moc_reentry_during_teardown(
    debug_mode: bool,
    loglevel: str,
 ):
    '''
    Reproduce the piker `open_cached_client('kraken')` race:
    - same `acm_func`, NO kwargs (identical `ctx_key`)
    - multiple tasks share the cached resource
    - all users exit -> teardown starts
    - a NEW task enters during `_Cache.run_ctx.__aexit__`
    - `values[ctx_key]` is gone (popped in inner finally)
      but `resources[ctx_key]` still exists (outer finally
      hasn't run yet bc the acm cleanup has checkpoints)
    - old code: `assert not resources.get(ctx_key)` FIRES
    This models the real-world scenario where `brokerd.kraken`
    tasks concurrently call `open_cached_client('kraken')`
    (same `acm_func`, empty kwargs, shared `ctx_key`) and
    the teardown/re-entry race triggers intermittently.
    '''
    async def main():
        in_aexit = trio.Event()
        @acm
        async def cached_client():
            '''
            Simulates `kraken.api.get_client()`:
            - no params (all callers share one `ctx_key`)
            - slow-ish cleanup to widen the race window
              between `values.pop()` and `resources.pop()`
              inside `_Cache.run_ctx`.
            '''
            yield 'the-client'
            # Signal that we're in __aexit__ — at this
            # point `values` has already been popped by
            # `run_ctx`'s inner finally, but `resources`
            # is still alive (outer finally hasn't run).
            in_aexit.set()
            await trio.sleep(10)
        first_done = trio.Event()
        async def use_and_exit():
            async with maybe_open_context(
                cached_client,
            ) as (cache_hit, value):
                assert value == 'the-client'
            first_done.set()
        async def reenter_during_teardown():
            '''
            Wait for the acm's `__aexit__` to start (meaning
            `values` is popped but `resources` still exists),
            then re-enter — triggering the assert.
            '''
            await in_aexit.wait()
            async with maybe_open_context(
                cached_client,
            ) as (cache_hit, value):
                assert value == 'the-client'
        with trio.fail_after(5):
            async with (
                tractor.open_root_actor(
                    debug_mode=debug_mode,
                    loglevel=loglevel,
                ),
                collapse_eg(),
                trio.open_nursery() as tn,
            ):
                tn.start_soon(use_and_exit)
                tn.start_soon(reenter_during_teardown)
    trio.run(main)
--- a/tests/test_ringbuf.py
+++ b/tests/test_ringbuf.py
@ -4,10 +4,6 @@ import trio
 import pytest
 import tractor
 # XXX `cffi` dun build on py3.14 yet..
 cffi = pytest.importorskip("cffi")
 from tractor.ipc._ringbuf import (
    open_ringbuf,
    RBToken,
@ -18,7 +14,7 @@ from tractor._testing.samples import (
    generate_sample_messages,
 )
-# XXX, in case you want to melt your cores, comment this skip line XD
+# in case you don't want to melt your cores, uncomment dis!
 pytestmark = pytest.mark.skip
--- a/tests/test_root_infect_asyncio.py
+++ b/tests/test_root_infect_asyncio.py
@ -49,7 +49,7 @@ def test_infected_root_actor(
                ),
                to_asyncio.open_channel_from(
                    aio_echo_server,
-                ) as (chan, first),
+                ) as (first, chan),
            ):
                assert first == 'start'
@ -91,12 +91,13 @@ def test_infected_root_actor(
 async def sync_and_err(
    # just signature placeholders for compat with
    # ``to_asyncio.open_channel_from()``
-    chan: tractor.to_asyncio.LinkedTaskChannel,
+    to_trio: trio.MemorySendChannel,
    from_trio: asyncio.Queue,
    ev: asyncio.Event,
 ):
-    if chan:
+    if to_trio:
-        chan.started_nowait('start')
+        to_trio.send_nowait('start')
    await ev.wait()
    raise RuntimeError('asyncio-side')
@ -173,7 +174,7 @@ def test_trio_prestarted_task_bubbles(
                            sync_and_err,
                            ev=aio_ev,
                        )
-                    ) as (chan, first),
+                    ) as (first, chan),
                ):
                    for i in range(5):
--- a/tests/test_root_runtime.py
+++ b/tests/test_root_runtime.py
@ -94,15 +94,15 @@ def test_runtime_vars_unset(
    after the root actor-runtime exits!
    '''
-    assert not tractor.runtime._state._runtime_vars['_debug_mode']
+    assert not tractor._state._runtime_vars['_debug_mode']
    async def main():
-        assert not tractor.runtime._state._runtime_vars['_debug_mode']
+        assert not tractor._state._runtime_vars['_debug_mode']
        async with tractor.open_nursery(
            debug_mode=True,
        ):
-            assert tractor.runtime._state._runtime_vars['_debug_mode']
+            assert tractor._state._runtime_vars['_debug_mode']
        # after runtime closure, should be reverted!
-        assert not tractor.runtime._state._runtime_vars['_debug_mode']
+        assert not tractor._state._runtime_vars['_debug_mode']
    trio.run(main)
--- a/tests/test_rpc.py
+++ b/tests/test_rpc.py
@ -110,7 +110,7 @@ def test_rpc_errors(
        ) as n:
            actor = tractor.current_actor()
-            assert actor.is_registrar
+            assert actor.is_arbiter
            await n.run_in_actor(
                sleep_back_actor,
                actor_name=subactor_requests_to,
--- a/tests/test_runtime.py
+++ b/tests/test_runtime.py
@ -22,10 +22,6 @@ def unlink_file():
 async def crash_and_clean_tmpdir(
    tmp_file_path: str,
    error: bool = True,
    rent_cancel: bool = True,
    # XXX unused, but do we really need to test these cases?
    self_cancel: bool = False,
 ):
    global _file_path
    _file_path = tmp_file_path
@ -36,75 +32,43 @@ async def crash_and_clean_tmpdir(
    assert os.path.isfile(tmp_file_path)
    await trio.sleep(0.1)
    if error:
        print('erroring in subactor!')
        assert 0
-
+    else:
    elif self_cancel:
        print('SELF-cancelling subactor!')
        actor.cancel_soon()
    elif rent_cancel:
        await trio.sleep_forever()
    print('subactor exiting task!')
@pytest.mark.parametrize(
    'error_in_child',
    [True, False],
    ids='error_in_child={}'.format,
 )
@tractor_test
 async def test_lifetime_stack_wipes_tmpfile(
    tmp_path,
    error_in_child: bool,
    loglevel: str,
    # log: tractor.log.StackLevelAdapter,
    # ^TODO, once landed via macos support!
 ):
    child_tmp_file = tmp_path / "child.txt"
    child_tmp_file.touch()
    assert child_tmp_file.exists()
    path = str(child_tmp_file)
    # NOTE, this is expected to cancel the sub
    # in the `error_in_child=False` case!
    timeout: float = (
        1.6 if error_in_child
        else 1
    )
    try:
-        with trio.move_on_after(timeout) as cs:
+        with trio.move_on_after(0.5):
-            async with tractor.open_nursery(
+            async with tractor.open_nursery() as n:
-                loglevel=loglevel,
+                    await (  # inlined portal
-            ) as an:
+                        await n.run_in_actor(
-                await (  # inlined `tractor.Portal`
+                            crash_and_clean_tmpdir,
-                    await an.run_in_actor(
+                            tmp_file_path=path,
-                        crash_and_clean_tmpdir,
+                            error=error_in_child,
-                        tmp_file_path=path,
+                        )
-                        error=error_in_child,
+                    ).result()
-                    )
+
                ).result()
    except (
        tractor.RemoteActorError,
        # tractor.BaseExceptionGroup,
        BaseExceptionGroup,
-    ) as _exc:
+    ):
-        exc = _exc
+        pass
        from tractor.log import get_console_log
        log = get_console_log(
            level=loglevel,
            name=__name__,
        )
        log.exception(
            f'Subactor failed as expected with {type(exc)!r}\n'
        )
    # tmp file should have been wiped by
    # teardown stack.
    assert not child_tmp_file.exists()
    if error_in_child:
        assert not cs.cancel_called
    else:
        # expect timeout in some cases?
        assert cs.cancel_called
--- a/tests/test_shm.py
+++ b/tests/test_shm.py
@ -2,7 +2,6 @@
 Shared mem primitives and APIs.
 """
 import platform
 import uuid
 # import numpy
@ -14,18 +13,6 @@ from tractor.ipc._shm import (
    attach_shm_list,
 )
 pytestmark = pytest.mark.skipon_spawn_backend(
    'subint',
    'subint_forkserver',
    reason=(
        'subint: GIL-contention hanging class.\n'
        'subint_forkserver: `multiprocessing.SharedMemory` '
        'has known issues with fork-without-exec (mp\'s '
        'resource_tracker and SharedMemory internals assume '
        'fresh-process state). RemoteActorError surfaces from '
        'the shm-attach path. TODO, put issue link!\n'
    )
 )
@tractor.context
 async def child_attach_shml_alot(
@ -66,18 +53,7 @@ def test_child_attaches_alot():
                    shm_key=shml.key,
                ) as (ctx, start_val),
            ):
-                assert (_key := shml.key) == start_val
+                assert start_val == key
                if platform.system() != 'Darwin':
                    # XXX, macOS has a char limit..
                    # see `ipc._shm._shorten_key_for_macos`
                    assert (
                        start_val
                        ==
                        key
                        ==
                        _key
                    )
                await ctx.result()
            await portal.cancel_actor()
--- a/tests/test_spawning.py
+++ b/tests/test_spawning.py
@ -1,12 +1,5 @@
 """
-Spawning basics including audit of,
+Spawning basics
 - subproc bootstrap, such as subactor runtime-data/config inheritance,
 - basic (and mostly legacy) `ActorNursery` subactor starting and
  cancel APIs.
 Simple (and generally legacy) examples from the original
 API design.
 """
 from functools import partial
@ -40,13 +33,13 @@ async def spawn(
        assert actor is None  # no runtime yet
        async with (
            tractor.open_root_actor(
-                registry_addrs=[reg_addr],
+                arbiter_addr=reg_addr,
            ),
            tractor.open_nursery() as an,
        ):
            # now runtime exists
            actor: tractor.Actor = tractor.current_actor()
-            assert actor.is_registrar == should_be_root
+            assert actor.is_arbiter == should_be_root
            # spawns subproc here
            portal: tractor.Portal = await an.run_in_actor(
@ -75,7 +68,7 @@ async def spawn(
            assert result == 10
            return result
    else:
-        assert actor.is_registrar == should_be_root
+        assert actor.is_arbiter == should_be_root
        return 10
@ -105,9 +98,7 @@ async def movie_theatre_question():
@tractor_test
-async def test_movie_theatre_convo(
+async def test_movie_theatre_convo(start_method):
    start_method: str,
 ):
    '''
    The main ``tractor`` routine.
@ -160,16 +151,13 @@ async def test_most_beautiful_word(
                name='some_linguist',
            )
-            res: Any = await portal.wait_for_result()
+            print(await portal.result())
            assert res == return_value
    # The ``async with`` will unblock here since the 'some_linguist'
    # actor has completed its main task ``cellar_door``.
    # this should pull the cached final result already captured during
    # the nursery block exit.
-    res: Any = await portal.wait_for_result()
+    print(await portal.result())
    assert res == return_value
    print(res)
 async def check_loglevel(level):
@ -180,35 +168,22 @@ async def check_loglevel(level):
    log.critical('yoyoyo')
@pytest.mark.parametrize(
    'level', [
        'debug',
        'cancel',
        'critical'
    ],
    ids='loglevel={}'.format,
 )
 def test_loglevel_propagated_to_subactor(
-    capfd: pytest.CaptureFixture,
+    start_method,
-    start_method: str,
+    capfd,
-    reg_addr: tuple,
+    reg_addr,
    level: str,
 ):
-    if start_method in ('mp_forkserver', 'subint_forkserver'):
+    if start_method == 'mp_forkserver':
        pytest.skip(
-            "a bug with `capfd` seems to make forkserver capture not work? "
+            "a bug with `capfd` seems to make forkserver capture not work?")
-            "(same class as the `mp_forkserver` pre-existing skip — fork-"
+
-            "based backends inherit pytest's capfd temp-file fds into the "
+    level = 'critical'
            "subactor and the IPC handshake reads garbage (`unclean EOF "
            "read only X/HUGE_NUMBER bytes`). Work around by using "
            "`capsys` instead or skip entirely."
        )
    async def main():
        async with tractor.open_nursery(
-            name='registrar',
+            name='arbiter',
            start_method=start_method,
-            registry_addrs=[reg_addr],
+            arbiter_addr=reg_addr,
        ) as tn:
            await tn.run_in_actor(
@ -222,121 +197,3 @@ def test_loglevel_propagated_to_subactor(
    # ensure subactor spits log message on stderr
    captured = capfd.readouterr()
    assert 'yoyoyo' in captured.err
 async def check_parent_main_inheritance(
    expect_inherited: bool,
 ) -> bool:
    '''
    Assert that the child actor's ``_parent_main_data`` matches the
    ``inherit_parent_main`` flag it was spawned with.
    With the trio spawn backend the parent's ``__main__`` bootstrap
    data is captured and forwarded to each child so it can replay
    the parent's ``__main__`` as ``__mp_main__``, mirroring the
    stdlib ``multiprocessing`` bootstrap:
    https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods
    When ``inherit_parent_main=False`` the data dict is empty
    (``{}``) so no fixup ever runs and the child keeps its own
    ``__main__`` untouched.
    NOTE: under `pytest` the parent ``__main__`` is
    ``pytest.__main__`` whose ``_fixup_main_from_name()`` is a no-op
    (the name ends with ``.__main__``), so we cannot observe
    a difference in ``sys.modules['__main__'].__name__`` between the
    two modes.  Checking ``_parent_main_data`` directly is the most
    reliable verification that the flag is threaded through
    correctly; a ``RemoteActorError[AssertionError]`` propagates on
    mismatch.
    '''
    import tractor
    actor: tractor.Actor = tractor.current_actor()
    has_data: bool = bool(actor._parent_main_data)
    assert has_data == expect_inherited, (
        f'Expected _parent_main_data to be '
        f'{"non-empty" if expect_inherited else "empty"}, '
        f'got: {actor._parent_main_data!r}'
    )
    return has_data
 def test_run_in_actor_can_skip_parent_main_inheritance(
    start_method: str,  # <- only support on `trio` backend rn.
 ):
    '''
    Verify ``inherit_parent_main=False`` on ``run_in_actor()``
    prevents parent ``__main__`` data from reaching the child.
    '''
    if start_method != 'trio':
        pytest.skip(
            'parent main-inheritance opt-out only affects the trio backend'
        )
    async def main():
        async with tractor.open_nursery(start_method='trio') as an:
            # Default: child receives parent __main__ bootstrap data
            replaying = await an.run_in_actor(
                check_parent_main_inheritance,
                name='replaying-parent-main',
                expect_inherited=True,
            )
            await replaying.result()
            # Opt-out: child gets no parent __main__ data
            isolated = await an.run_in_actor(
                check_parent_main_inheritance,
                name='isolated-parent-main',
                inherit_parent_main=False,
                expect_inherited=False,
            )
            await isolated.result()
    trio.run(main)
 def test_start_actor_can_skip_parent_main_inheritance(
    start_method: str,  # <- only support on `trio` backend rn.
 ):
    '''
    Verify ``inherit_parent_main=False`` on ``start_actor()``
    prevents parent ``__main__`` data from reaching the child.
    '''
    if start_method != 'trio':
        pytest.skip(
            'parent main-inheritance opt-out only affects the trio backend'
        )
    async def main():
        async with tractor.open_nursery(start_method='trio') as an:
            # Default: child receives parent __main__ bootstrap data
            replaying = await an.start_actor(
                'replaying-parent-main',
                enable_modules=[__name__],
            )
            result = await replaying.run(
                check_parent_main_inheritance,
                expect_inherited=True,
            )
            assert result is True
            await replaying.cancel_actor()
            # Opt-out: child gets no parent __main__ data
            isolated = await an.start_actor(
                'isolated-parent-main',
                enable_modules=[__name__],
                inherit_parent_main=False,
            )
            result = await isolated.run(
                check_parent_main_inheritance,
                expect_inherited=False,
            )
            assert result is False
            await isolated.cancel_actor()
    trio.run(main)
--- a/tests/test_task_broadcasting.py
+++ b/tests/test_task_broadcasting.py
@ -75,7 +75,7 @@ async def open_sequence_streamer(
 ) -> tractor.MsgStream:
    async with tractor.open_nursery(
-        registry_addrs=[reg_addr],
+        arbiter_addr=reg_addr,
        start_method=start_method,
    ) as an:
--- a/tractor/init.py
+++ b/tractor/init.py
@ -30,23 +30,21 @@ from ._streaming import (
    MsgStream as MsgStream,
    stream as stream,
 )
-from .discovery._api import (
+from ._discovery import (
    get_registry as get_registry,
    find_actor as find_actor,
    wait_for_actor as wait_for_actor,
    query_actor as query_actor,
 )
-from .runtime._supervise import (
+from ._supervise import (
    open_nursery as open_nursery,
    ActorNursery as ActorNursery,
 )
-from .runtime._state import (
+from ._state import (
    RuntimeVars as RuntimeVars,
    current_actor as current_actor,
    current_ipc_ctx as current_ipc_ctx,
    debug_mode as debug_mode,
    get_runtime_vars as get_runtime_vars,
    is_root_process as is_root_process,
    current_ipc_ctx as current_ipc_ctx,
    debug_mode as debug_mode
 )
 from ._exceptions import (
    ContextCancelled as ContextCancelled,
@ -67,10 +65,6 @@ from ._root import (
    open_root_actor as open_root_actor,
 )
 from .ipc import Channel as Channel
-from .runtime._portal import Portal as Portal
+from ._portal import Portal as Portal
-from .runtime._runtime import Actor as Actor
+from ._runtime import Actor as Actor
 from .discovery._registry import (
    Registrar as Registrar,
    Arbiter as Arbiter,
 )
 # from . import hilevel as hilevel
--- a/Show More
+++ b/Show More