Toss in masked `.set_trace()` for unshielded `.pause()` debug

Mask tpt-closed handling of `chan.send(return_msg)`
A partial revert of commit c05d08e426 since it seem we already suppress tpt-closed errors lower down in `.ipc.Channel.send()`; given that i'm pretty sure this new handler code should basically never run? Left in a todo to remove the masked content once i'm done more thoroughly testing under `piker`.
2025-08-20 13:02:51 -04:00 · 2025-08-20 12:45:54 -04:00 · 2025-08-20 12:45:54 -04:00 · 2025-08-20 12:45:54 -04:00 · 2025-08-20 12:45:49 -04:00 · 2025-08-20 12:45:49 -04:00
21 changed files with 345 additions and 152 deletions
--- a/examples/advanced_faults/ipc_failure_during_stream.py
+++ b/examples/advanced_faults/ipc_failure_during_stream.py
@ -16,6 +16,7 @@ from tractor import (
    ContextCancelled,
    MsgStream,
    _testing,
+    trionics,
 )
 import trio
 import pytest
@ -62,9 +63,8 @@ async def recv_and_spawn_net_killers(
    await ctx.started()
    async with (
        ctx.open_stream() as stream,
-        trio.open_nursery(
-            strict_exception_groups=False,
-        ) as tn,
+        trionics.collapse_eg(),
+        trio.open_nursery() as tn,
    ):
        async for i in stream:
            print(f'child echoing {i}')
--- a/examples/debugging/root_self_cancelled_w_error.py
+++ b/examples/debugging/root_self_cancelled_w_error.py
@ -0,0 +1,35 @@
+import trio
+import tractor
+
+
+async def main():
+    async with tractor.open_root_actor(
+        debug_mode=True,
+        loglevel='cancel',
+    ) as _root:
+
+        # manually trigger self-cancellation and wait
+        # for it to fully trigger.
+        _root.cancel_soon()
+        await _root._cancel_complete.wait()
+        print('root cancelled')
+
+        # now ensure we can still use the REPL
+        try:
+            await tractor.pause()
+        except trio.Cancelled as _taskc:
+            assert (root_cs := _root._root_tn.cancel_scope).cancel_called
+            # NOTE^^ above logic but inside `open_root_actor()` and
+            # passed to the `shield=` expression is effectively what
+            # we're testing here!
+            await tractor.pause(shield=root_cs.cancel_called)
+
+        # XXX, if shield logic *is wrong* inside `open_root_actor()`'s
+        # crash-handler block this should never be interacted,
+        # instead `trio.Cancelled` would be bubbled up: the original
+        # BUG.
+        assert 0
+
+
+if __name__ == '__main__':
+    trio.run(main)
--- a/examples/quick_cluster.py
+++ b/examples/quick_cluster.py
@ -23,9 +23,8 @@ async def main():
            modules=[__name__]
        ) as portal_map,

-        trio.open_nursery(
-            strict_exception_groups=False,
-        ) as tn,
+        tractor.trionics.collapse_eg(),
+        trio.open_nursery() as tn,
    ):

        for (name, portal) in portal_map.items():
--- a/tests/devx/test_debugger.py
+++ b/tests/devx/test_debugger.py
@ -1,13 +1,13 @@
 """
 That "native" debug mode better work!

-All these tests can be understood (somewhat) by running the equivalent
-`examples/debugging/` scripts manually.
+All these tests can be understood (somewhat) by running the
+equivalent `examples/debugging/` scripts manually.

 TODO:
-    - none of these tests have been run successfully on windows yet but
-      there's been manual testing that verified it works.
-    - wonder if any of it'll work on OS X?
+  - none of these tests have been run successfully on windows yet but
+    there's been manual testing that verified it works.
+  - wonder if any of it'll work on OS X?

 """
 from __future__ import annotations
@ -1156,6 +1156,54 @@ def test_ctxep_pauses_n_maybe_ipc_breaks(
            )


+def test_crash_handling_within_cancelled_root_actor(
+    spawn: PexpectSpawner,
+):
+    '''
+    Ensure that when only a root-actor is started via `open_root_actor()`
+    we can crash-handle in debug-mode despite self-cancellation.
+
+    More-or-less ensures we conditionally shield the pause in
+    `._root.open_root_actor()`'s `await debug._maybe_enter_pm()`
+    call.
+
+    '''
+    child = spawn('root_self_cancelled_w_error')
+    child.expect(PROMPT)
+
+    assert_before(
+        child,
+        [
+            "Actor.cancel_soon()` was called!",
+            "root cancelled",
+            _pause_msg,
+            "('root'",  # actor name
+        ]
+    )
+
+    child.sendline('c')
+    child.expect(PROMPT)
+    assert_before(
+        child,
+        [
+            _crash_msg,
+            "('root'",  # actor name
+            "AssertionError",
+            "assert 0",
+        ]
+    )
+
+    child.sendline('c')
+    child.expect(EOF)
+    assert_before(
+        child,
+        [
+            "AssertionError",
+            "assert 0",
+        ]
+    )
+
+
 # TODO: better error for "non-ideal" usage from the root actor.
 # -[ ] if called from an async scope emit a message that suggests
 #    using `await tractor.pause()` instead since it's less overhead
--- a/tests/ipc/test_each_tpt.py
+++ b/tests/ipc/test_each_tpt.py
@ -18,8 +18,9 @@ from tractor import (
@pytest.fixture
 def bindspace_dir_str() -> str:

-    bs_dir_str: str = '/run/user/1000/doggy'
-    bs_dir = Path(bs_dir_str)
+    rt_dir: Path = tractor._state.get_rt_dir()
+    bs_dir: Path = rt_dir / 'doggy'
+    bs_dir_str: str = str(bs_dir)
    assert not bs_dir.is_dir()

    yield bs_dir_str
--- a/tests/test_advanced_streaming.py
+++ b/tests/test_advanced_streaming.py
@ -313,9 +313,8 @@ async def inf_streamer(
        # `trio.EndOfChannel` doesn't propagate directly to the above
        # .open_stream() parent, resulting in it also raising instead
        # of gracefully absorbing as normal.. so how to handle?
-        trio.open_nursery(
-            strict_exception_groups=False,
-        ) as tn,
+        tractor.trionics.collapse_eg(),
+        trio.open_nursery() as tn,
    ):
        async def close_stream_on_sentinel():
            async for msg in stream:
--- a/tests/test_cancellation.py
+++ b/tests/test_cancellation.py
@ -236,7 +236,10 @@ async def stream_forever():
 async def test_cancel_infinite_streamer(start_method):

    # stream for at most 1 seconds
-    with trio.move_on_after(1) as cancel_scope:
+    with (
+        trio.fail_after(4),
+        trio.move_on_after(1) as cancel_scope
+    ):
        async with tractor.open_nursery() as n:
            portal = await n.start_actor(
                'donny',
@ -532,10 +535,15 @@ def test_cancel_via_SIGINT_other_task(
    async def main():
        # should never timeout since SIGINT should cancel the current program
        with trio.fail_after(timeout):
-            async with trio.open_nursery(
-                strict_exception_groups=False,
-            ) as n:
-                await n.start(spawn_and_sleep_forever)
+            async with (
+
+                # XXX ?TODO? why no work!?
+                # tractor.trionics.collapse_eg(),
+                trio.open_nursery(
+                    strict_exception_groups=False,
+                ) as tn,
+            ):
+                await tn.start(spawn_and_sleep_forever)
                if 'mp' in spawn_backend:
                    time.sleep(0.1)
                os.kill(pid, signal.SIGINT)
--- a/tests/test_child_manages_service_nursery.py
+++ b/tests/test_child_manages_service_nursery.py
@ -117,9 +117,10 @@ async def open_actor_local_nursery(
    ctx: tractor.Context,
 ):
    global _nursery
-    async with trio.open_nursery(
-        strict_exception_groups=False,
-    ) as tn:
+    async with (
+        tractor.trionics.collapse_eg(),
+        trio.open_nursery() as tn
+    ):
        _nursery = tn
        await ctx.started()
        await trio.sleep(10)
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@ -11,6 +11,7 @@ import psutil
 import pytest
 import subprocess
 import tractor
+from tractor.trionics import collapse_eg
 from tractor._testing import tractor_test
 import trio

@ -193,10 +194,10 @@ async def spawn_and_check_registry(

            try:
                async with tractor.open_nursery() as an:
-                    async with trio.open_nursery(
-                        strict_exception_groups=False,
-                    ) as trion:
-
+                    async with (
+                        collapse_eg(),
+                        trio.open_nursery() as trion,
+                    ):
                        portals = {}
                        for i in range(3):
                            name = f'a{i}'
@ -338,11 +339,12 @@ async def close_chans_before_nursery(
                        async with portal2.open_stream_from(
                            stream_forever
                        ) as agen2:
-                            async with trio.open_nursery(
-                                strict_exception_groups=False,
-                            ) as n:
-                                n.start_soon(streamer, agen1)
-                                n.start_soon(cancel, use_signal, .5)
+                            async with (
+                                collapse_eg(),
+                                trio.open_nursery() as tn,
+                            ):
+                                tn.start_soon(streamer, agen1)
+                                tn.start_soon(cancel, use_signal, .5)
                                try:
                                    await streamer(agen2)
                                finally:
--- a/tests/test_infected_asyncio.py
+++ b/tests/test_infected_asyncio.py
@ -234,10 +234,8 @@ async def trio_ctx(
    with trio.fail_after(1 + delay):
        try:
            async with (
-                trio.open_nursery(
-                    # TODO, for new `trio` / py3.13
-                    # strict_exception_groups=False,
-                ) as tn,
+                tractor.trionics.collapse_eg(),
+                trio.open_nursery() as tn,
                tractor.to_asyncio.open_channel_from(
                    sleep_and_err,
                ) as (first, chan),
--- a/tests/test_trioisms.py
+++ b/tests/test_trioisms.py
@ -8,6 +8,7 @@ from contextlib import (
 )

 import pytest
+from tractor.trionics import collapse_eg
 import trio
 from trio import TaskStatus

@ -64,9 +65,8 @@ def test_stashed_child_nursery(use_start_soon):
    async def main():

        async with (
-            trio.open_nursery(
-                strict_exception_groups=False,
-            ) as pn,
+            collapse_eg(),
+            trio.open_nursery() as pn,
        ):
            cn = await pn.start(mk_child_nursery)
            assert cn
@ -195,10 +195,8 @@ def test_gatherctxs_with_memchan_breaks_multicancelled(
        async with (
            # XXX should ensure ONLY the KBI
            # is relayed upward
-            trionics.collapse_eg(),
-            trio.open_nursery(
-                # strict_exception_groups=False,
-            ), # as tn,
+            collapse_eg(),
+            trio.open_nursery(), # as tn,

            trionics.gather_contexts([
                open_memchan(),
--- a/tractor/_root.py
+++ b/tractor/_root.py
@ -478,16 +478,14 @@ async def open_root_actor(

            # start runtime in a bg sub-task, yield to caller.
            async with (
-                collapse_eg(
-                    # bp=True,
-                    hide_tb=False,
-                ),
+                collapse_eg(),
                trio.open_nursery() as root_tn,

-                # XXX, finally-footgun below?
+                # ?TODO? finally-footgun below?
                # -> see note on why shielding.
                # maybe_raise_from_masking_exc(),
            ):
+                actor._root_tn = root_tn
                # `_runtime.async_main()` creates an internal nursery
                # and blocks here until any underlying actor(-process)
                # tree has terminated thereby conducting so called
@ -531,7 +529,6 @@ async def open_root_actor(
                        # crashes under cancellation conditions; so
                        # most of them!
                        shield=root_tn.cancel_scope.cancel_called,
-                        # ^TODO? write a (debugger) test for this ya?
                    )

                    if (
@ -571,6 +568,7 @@ async def open_root_actor(
                        f'{op_nested_actor_repr}'
                    )
                    # XXX, THIS IS A *finally-footgun*!
+                    # (also mentioned in with-block above)
                    # -> though already shields iternally it can
                    # taskc here and mask underlying errors raised in
                    # the try-block above?
--- a/tractor/_rpc.py
+++ b/tractor/_rpc.py
@ -403,7 +403,7 @@ async def _errors_relayed_via_ipc(

    # RPC task bookeeping.
    # since RPC tasks are scheduled inside a flat
-    # `Actor._service_n`, we add "handles" to each such that
+    # `Actor._service_tn`, we add "handles" to each such that
    # they can be individually ccancelled.
    finally:

@ -481,7 +481,7 @@ async def _invoke(
    connected IPC channel.

    This is the core "RPC" `trio.Task` scheduling machinery used to start every
-    remotely invoked function, normally in `Actor._service_n: Nursery`.
+    remotely invoked function, normally in `Actor._service_tn: Nursery`.

    '''
    __tracebackhide__: bool = hide_tb
@ -970,7 +970,7 @@ async def process_messages(

    Receive (multiplexed) per-`Channel` RPC requests as msgs from
    remote processes; schedule target async funcs as local
-    `trio.Task`s inside the `Actor._service_n: Nursery`.
+    `trio.Task`s inside the `Actor._service_tn: Nursery`.

    Depending on msg type, non-`cmd` (task spawning/starting)
    request payloads (eg. `started`, `yield`, `return`, `error`)
@ -995,7 +995,7 @@ async def process_messages(

    '''
    actor: Actor = _state.current_actor()
-    assert actor._service_n  # runtime state sanity
+    assert actor._service_tn  # runtime state sanity

    # TODO: once `trio` get's an "obvious way" for req/resp we
    # should use it?
@ -1206,7 +1206,7 @@ async def process_messages(
                        start_status += '->( scheduling new task..\n'
                        log.runtime(start_status)
                        try:
-                            ctx: Context = await actor._service_n.start(
+                            ctx: Context = await actor._service_tn.start(
                                partial(
                                    _invoke,
                                    actor,
@ -1346,7 +1346,7 @@ async def process_messages(
    ) as err:

        if nursery_cancelled_before_task:
-            sn: Nursery = actor._service_n
+            sn: Nursery = actor._service_tn
            assert sn and sn.cancel_scope.cancel_called  # sanity
            log.cancel(
                f'Service nursery cancelled before it handled {funcname}'
--- a/tractor/_runtime.py
+++ b/tractor/_runtime.py
@ -35,6 +35,15 @@ for running all lower level spawning, supervision and msging layers:
  SC-transitive RPC via scheduling of `trio` tasks.
 - registration of newly spawned actors with the discovery sys.

+Glossary:
+--------
+ - tn: a `trio.Nursery` or "task nursery".
+ - an: an `ActorNursery` or "actor nursery".
+ - root: top/parent-most scope/task/process/actor (or other runtime
+         primitive) in a hierarchical tree.
+ - parent-ish: "higher-up" in the runtime-primitive hierarchy.
+ - child-ish: "lower-down" in the runtime-primitive hierarchy.
+
 '''
 from __future__ import annotations
 from contextlib import (
@ -76,6 +85,7 @@ from tractor.msg import (
 )
 from .trionics import (
    collapse_eg,
+    maybe_open_nursery,
 )
 from .ipc import (
    Channel,
@ -173,9 +183,11 @@ class Actor:

    msg_buffer_size: int = 2**6

-    # nursery placeholders filled in by `async_main()` after fork
-    _service_n: Nursery|None = None
-
+    # nursery placeholders filled in by `async_main()`,
+    # - after fork for subactors.
+    # - during boot for the root actor.
+    _root_tn: Nursery|None = None
+    _service_tn: Nursery|None = None
    _ipc_server: _server.IPCServer|None = None

    @property
@ -1009,12 +1021,48 @@ class Actor:
        the RPC service nursery.

        '''
-        assert self._service_n
-        self._service_n.start_soon(
+        actor_repr: str = _pformat.nest_from_op(
+            input_op='>c(',
+            text=self.pformat(),
+            nest_indent=1,
+        )
+        log.cancel(
+            'Actor.cancel_soon()` was called!\n'
+            f'>> scheduling `Actor.cancel()`\n'
+            f'{actor_repr}'
+        )
+        assert self._service_tn
+        self._service_tn.start_soon(
            self.cancel,
            None,  # self cancel all rpc tasks
        )

+        # schedule a "canceller task" in the `._root_tn` once the
+        # `._service_tn` is fully shutdown; task waits for child-ish
+        # scopes to fully exit then finally cancels its parent,
+        # root-most, scope.
+        async def cancel_root_tn_after_services():
+            log.runtime(
+                'Waiting on service-tn to cancel..\n'
+                f'c>)\n'
+                f'|_{self._service_tn.cancel_scope!r}\n'
+            )
+            await self._cancel_complete.wait()
+            log.cancel(
+                f'`._service_tn` cancelled\n'
+                f'>c)\n'
+                f'|_{self._service_tn.cancel_scope!r}\n'
+                f'\n'
+                f'>> cancelling `._root_tn`\n'
+                f'c>(\n'
+                f' |_{self._root_tn.cancel_scope!r}\n'
+            )
+            self._root_tn.cancel_scope.cancel()
+
+        self._root_tn.start_soon(
+            cancel_root_tn_after_services
+        )
+
    @property
    def cancel_complete(self) -> bool:
        return self._cancel_complete.is_set()
@ -1119,8 +1167,8 @@ class Actor:
                await ipc_server.wait_for_shutdown()

            # cancel all rpc tasks permanently
-            if self._service_n:
-                self._service_n.cancel_scope.cancel()
+            if self._service_tn:
+                self._service_tn.cancel_scope.cancel()

        log_meth(msg)
        self._cancel_complete.set()
@ -1257,7 +1305,7 @@ class Actor:
        '''
        Cancel all ongoing RPC tasks owned/spawned for a given
        `parent_chan: Channel` or simply all tasks (inside
-        `._service_n`) when `parent_chan=None`.
+        `._service_tn`) when `parent_chan=None`.

        '''
        tasks: dict = self._rpc_tasks
@ -1469,46 +1517,55 @@ async def async_main(
                    accept_addrs.append(addr.unwrap())

        assert accept_addrs
-        # The "root" nursery ensures the channel with the immediate
-        # parent is kept alive as a resilient service until
-        # cancellation steps have (mostly) occurred in
-        # a deterministic way.
+
+        ya_root_tn: bool = bool(actor._root_tn)
+        ya_service_tn: bool = bool(actor._service_tn)
+
+        # NOTE, a top-most "root" nursery in each actor-process
+        # enables a lifetime priority for the IPC-channel connection
+        # with a sub-actor's immediate parent. I.e. this connection
+        # is kept alive as a resilient service connection until all
+        # other machinery has exited, cancellation of all
+        # embedded/child scopes have completed. This helps ensure
+        # a deterministic (and thus "graceful")
+        # first-class-supervision style teardown where a parent actor
+        # (vs. say peers) is always the last to be contacted before
+        # disconnect.
        root_tn: trio.Nursery
        async with (
            collapse_eg(),
-            trio.open_nursery() as root_tn,
+            maybe_open_nursery(
+                nursery=actor._root_tn,
+            ) as root_tn,
        ):
-            # actor._root_n = root_tn
-            # assert actor._root_n
+            if ya_root_tn:
+                assert root_tn is actor._root_tn
+            else:
+                actor._root_tn = root_tn

            ipc_server: _server.IPCServer
            async with (
                collapse_eg(),
-                trio.open_nursery() as service_nursery,
+                maybe_open_nursery(
+                    nursery=actor._service_tn,
+                ) as service_tn,
                _server.open_ipc_server(
-                    parent_tn=service_nursery,
-                    stream_handler_tn=service_nursery,
+                    parent_tn=service_tn,  # ?TODO, why can't this be the root-tn
+                    stream_handler_tn=service_tn,
                ) as ipc_server,
-                # ) as actor._ipc_server,
-                # ^TODO? prettier?

            ):
-                # This nursery is used to handle all inbound
-                # connections to us such that if the TCP server
-                # is killed, connections can continue to process
-                # in the background until this nursery is cancelled.
-                actor._service_n = service_nursery
+                if ya_service_tn:
+                    assert service_tn is actor._service_tn
+                else:
+                    # This nursery is used to handle all inbound
+                    # connections to us such that if the TCP server
+                    # is killed, connections can continue to process
+                    # in the background until this nursery is cancelled.
+                    actor._service_tn = service_tn
+
+                # set after allocate
                actor._ipc_server = ipc_server
-                assert (
-                    actor._service_n
-                    and (
-                        actor._service_n
-                        is
-                        actor._ipc_server._parent_tn
-                        is
-                        ipc_server._stream_handler_tn
-                    )
-                )

                # load exposed/allowed RPC modules
                # XXX: do this **after** establishing a channel to the parent
@ -1534,10 +1591,11 @@ async def async_main(
                # - root actor: the ``accept_addr`` passed to this method

                # TODO: why is this not with the root nursery?
+                # - see above that the `._service_tn` is what's used?
                try:
                    eps: list = await ipc_server.listen_on(
                        accept_addrs=accept_addrs,
-                        stream_handler_nursery=service_nursery,
+                        stream_handler_nursery=service_tn,
                    )
                    log.runtime(
                        f'Booted IPC server\n'
@ -1545,7 +1603,7 @@ async def async_main(
                    )
                    assert (
                        (eps[0].listen_tn)
-                        is not service_nursery
+                        is not service_tn
                    )

                except OSError as oserr:
@ -1707,7 +1765,7 @@ async def async_main(

        # XXX TODO but hard XXX
        # we can't actually do this bc the debugger uses the
-        # _service_n to spawn the lock task, BUT, in theory if we had
+        # _service_tn to spawn the lock task, BUT, in theory if we had
        # the root nursery surround this finally block it might be
        # actually possible to debug THIS machinery in the same way
        # as user task code?
--- a/tractor/_supervise.py
+++ b/tractor/_supervise.py
@ -643,8 +643,9 @@ _shutdown_msg: str = (
    'Actor-runtime-shutdown'
 )

-# @api_frame
+
@acm
+# @api_frame
 async def open_nursery(
    *,  # named params only!
    hide_tb: bool = True,
--- a/tractor/devx/debug/_trace.py
+++ b/tractor/devx/debug/_trace.py
@ -481,12 +481,12 @@ async def _pause(
            # we have to figure out how to avoid having the service nursery
            # cancel on this task start? I *think* this works below:
            # ```python
-            #   actor._service_n.cancel_scope.shield = shield
+            #   actor._service_tn.cancel_scope.shield = shield
            # ```
            # but not entirely sure if that's a sane way to implement it?

            # NOTE currently we spawn the lock request task inside this
-            # subactor's global `Actor._service_n` so that the
+            # subactor's global `Actor._service_tn` so that the
            # lifetime of the lock-request can outlive the current
            # `._pause()` scope while the user steps through their
            # application code and when they finally exit the
@ -510,7 +510,7 @@ async def _pause(
                f'|_{task}\n'
            )
            with trio.CancelScope(shield=shield):
-                req_ctx: Context = await actor._service_n.start(
+                req_ctx: Context = await actor._service_tn.start(
                    partial(
                        request_root_stdio_lock,
                        actor_uid=actor.uid,
@ -544,7 +544,7 @@ async def _pause(
            _repl_fail_report = None

        # when the actor is mid-runtime cancellation the
-        # `Actor._service_n` might get closed before we can spawn
+        # `Actor._service_tn` might get closed before we can spawn
        # the request task, so just ignore expected RTE.
        elif (
            isinstance(pause_err, RuntimeError)
@ -561,6 +561,9 @@ async def _pause(
            return

        elif isinstance(pause_err, trio.Cancelled):
+            __tracebackhide__: bool = False
+            # XXX, unmask to REPL it.
+            # mk_pdb().set_trace(frame=inspect.currentframe())
            _repl_fail_report += (
                'You called `tractor.pause()` from an already cancelled scope!\n\n'
                'Consider `await tractor.pause(shield=True)` to make it work B)\n'
@ -989,7 +992,7 @@ def pause_from_sync(
                # that output and assign the `repl` created above!
                bg_task, _ = trio.from_thread.run(
                    afn=partial(
-                        actor._service_n.start,
+                        actor._service_tn.start,
                        partial(
                            _pause_from_bg_root_thread,
                            behalf_of_thread=thread,
--- a/tractor/ipc/_mp_bs.py
+++ b/tractor/ipc/_mp_bs.py
@ -17,36 +17,59 @@
 Utils to tame mp non-SC madeness

 '''
+import platform
+

-# !TODO! in 3.13 this can be disabled (the-same/similarly) using
-# a flag,
-# - [ ] soo if it works like this, drop this module entirely for
-#   3.13+ B)
-#  |_https://docs.python.org/3/library/multiprocessing.shared_memory.html
-#
 def disable_mantracker():
    '''
    Disable all `multiprocessing` "resource tracking" machinery since
    it's an absolute multi-threaded mess of non-SC madness.

    '''
-    from multiprocessing import resource_tracker as mantracker
+    from multiprocessing.shared_memory import SharedMemory

-    # Tell the "resource tracker" thing to fuck off.
-    class ManTracker(mantracker.ResourceTracker):
-        def register(self, name, rtype):
-            pass

-        def unregister(self, name, rtype):
-            pass
+    # 3.13+ only.. can pass `track=False` to disable
+    # all the resource tracker bs.
+    # https://docs.python.org/3/library/multiprocessing.shared_memory.html
+    if (_py_313 := (
+            platform.python_version_tuple()[:-1]
+            >=
+            ('3', '13')
+        )
+    ):
+        from functools import partial
+        return partial(
+            SharedMemory,
+            track=False,
+        )

-        def ensure_running(self):
-            pass
+    # !TODO, once we drop 3.12- we can obvi remove all this!
+    else:
+        from multiprocessing import (
+            resource_tracker as mantracker,
+        )

-    # "know your land and know your prey"
-    # https://www.dailymotion.com/video/x6ozzco
-    mantracker._resource_tracker = ManTracker()
-    mantracker.register = mantracker._resource_tracker.register
-    mantracker.ensure_running = mantracker._resource_tracker.ensure_running
-    mantracker.unregister = mantracker._resource_tracker.unregister
-    mantracker.getfd = mantracker._resource_tracker.getfd
+        # Tell the "resource tracker" thing to fuck off.
+        class ManTracker(mantracker.ResourceTracker):
+            def register(self, name, rtype):
+                pass
+
+            def unregister(self, name, rtype):
+                pass
+
+            def ensure_running(self):
+                pass
+
+        # "know your land and know your prey"
+        # https://www.dailymotion.com/video/x6ozzco
+        mantracker._resource_tracker = ManTracker()
+        mantracker.register = mantracker._resource_tracker.register
+        mantracker.ensure_running = mantracker._resource_tracker.ensure_running
+        mantracker.unregister = mantracker._resource_tracker.unregister
+        mantracker.getfd = mantracker._resource_tracker.getfd
+
+        # use std type verbatim
+        shmT = SharedMemory
+
+    return shmT
--- a/tractor/ipc/_server.py
+++ b/tractor/ipc/_server.py
@ -1001,7 +1001,11 @@ class Server(Struct):
            partial(
                _serve_ipc_eps,
                server=self,
-                stream_handler_tn=stream_handler_nursery,
+                stream_handler_tn=(
+                    stream_handler_nursery
+                    or
+                    self._stream_handler_tn
+                ),
                listen_addrs=accept_addrs,
            )
        )
@ -1145,13 +1149,17 @@ async def open_ipc_server(

    async with maybe_open_nursery(
        nursery=parent_tn,
-    ) as rent_tn:
+    ) as parent_tn:
        no_more_peers = trio.Event()
        no_more_peers.set()

        ipc_server = IPCServer(
-            _parent_tn=rent_tn,
-            _stream_handler_tn=stream_handler_tn or rent_tn,
+            _parent_tn=parent_tn,
+            _stream_handler_tn=(
+                stream_handler_tn
+                or
+                parent_tn
+            ),
            _no_more_peers=no_more_peers,
        )
        try:
--- a/tractor/ipc/_shm.py
+++ b/tractor/ipc/_shm.py
@ -23,14 +23,15 @@ considered optional within the context of this runtime-library.

 """
 from __future__ import annotations
+from multiprocessing import shared_memory as shm
+from multiprocessing.shared_memory import (
+    # SharedMemory,
+    ShareableList,
+)
+import platform
 from sys import byteorder
 import time
 from typing import Optional
-from multiprocessing import shared_memory as shm
-from multiprocessing.shared_memory import (
-    SharedMemory,
-    ShareableList,
-)

 from msgspec import (
    Struct,
@ -61,7 +62,7 @@ except ImportError:
 log = get_logger(__name__)


-disable_mantracker()
+SharedMemory = disable_mantracker()


 class SharedInt:
@ -797,8 +798,15 @@ def open_shm_list(
    # "close" attached shm on actor teardown
    try:
        actor = tractor.current_actor()
+
        actor.lifetime_stack.callback(shml.shm.close)
-        actor.lifetime_stack.callback(shml.shm.unlink)
+
+        # XXX on 3.13+ we don't need to call this?
+        # -> bc we pass `track=False` for `SharedMemeory` orr?
+        if (
+            platform.python_version_tuple()[:-1] < ('3', '13')
+        ):
+            actor.lifetime_stack.callback(shml.shm.unlink)
    except RuntimeError:
        log.warning('tractor runtime not active, skipping teardown steps')

--- a/tractor/to_asyncio.py
+++ b/tractor/to_asyncio.py
@ -215,7 +215,7 @@ class LinkedTaskChannel(
        val: Any = None,
    ) -> None:
        '''
-        Synchronize aio-sde with its trio-parent.
+        Synchronize aio-side with its trio-parent.

        '''
        self._aio_started_val = val
@ -459,14 +459,22 @@ def _run_asyncio_task(
                        f'Task exited with final result: {result!r}\n'
                    )

-                # only close the aio (child) side which will relay
-                # a `trio.EndOfChannel` to the trio (parent) side.
+                # XXX ALWAYS close the child-`asyncio`-task-side's
+                # `to_trio` handle which will in turn relay
+                # a `trio.EndOfChannel` to the `trio`-parent.
+                # Consequently the parent `trio` task MUST ALWAYS
+                # check for any `chan._aio_err` to be raised when it
+                # receives an EoC.
+                #
+                # NOTE, there are 2 EoC cases,
+                # - normal/graceful EoC due to the aio-side actually
+                #   terminating its "streaming", but the task did not
+                #   error and is not yet complete.
+                #
+                # - the aio-task terminated and we specially mark the
+                #   closure as due to the `asyncio.Task`'s exit.
                #
-                # XXX NOTE, that trio-side MUST then in such cases
-                # check for a `chan._aio_err` and raise it!!
                to_trio.close()
-                # specially mark the closure as due to the
-                # asyncio.Task terminating!
                chan._closed_by_aio_task = True

            aio_task_complete.set()
@ -846,8 +854,6 @@ async def translate_aio_errors(
            chan._trio_to_raise = aio_err
            trio_err = chan._trio_err = eoc
            #
-            # await tractor.pause(shield=True)
-            #
            # ?TODO?, raise something like a,
            # chan._trio_to_raise = AsyncioErrored()
            # BUT, with the tb rewritten to reflect the underlying
--- a/tractor/trionics/_mngrs.py
+++ b/tractor/trionics/_mngrs.py
@ -31,7 +31,6 @@ from typing import (
    AsyncIterator,
    Callable,
    Hashable,
-    Optional,
    Sequence,
    TypeVar,
    TYPE_CHECKING,
@ -204,7 +203,7 @@ class _Cache:
    a kept-alive-while-in-use async resource.

    '''
-    service_n: Optional[trio.Nursery] = None
+    service_tn: trio.Nursery|None = None
    locks: dict[Hashable, trio.Lock] = {}
    users: int = 0
    values: dict[Any,  Any] = {}
@ -213,7 +212,7 @@ class _Cache:
        tuple[trio.Nursery, trio.Event]
    ] = {}
    # nurseries: dict[int, trio.Nursery] = {}
-    no_more_users: Optional[trio.Event] = None
+    no_more_users: trio.Event|None = None

    @classmethod
    async def run_ctx(
@ -296,15 +295,15 @@ async def maybe_open_context(
                f'task: {task}\n'
                f'task_tn: {task_tn}\n'
            )
-        service_n = tn
+        service_tn = tn
    else:
-        service_n: trio.Nursery = current_actor()._service_n
+        service_tn: trio.Nursery = current_actor()._service_tn

    # TODO: is there any way to allocate
    # a 'stays-open-till-last-task-finshed nursery?
-    # service_n: trio.Nursery
-    # async with maybe_open_nursery(_Cache.service_n) as service_n:
-    #     _Cache.service_n = service_n
+    # service_tn: trio.Nursery
+    # async with maybe_open_nursery(_Cache.service_tn) as service_tn:
+    #     _Cache.service_tn = service_tn

    cache_miss_ke: KeyError|None = None
    maybe_taskc: trio.Cancelled|None = None
@ -326,8 +325,8 @@ async def maybe_open_context(
            mngr = acm_func(**kwargs)
            resources = _Cache.resources
            assert not resources.get(ctx_key), f'Resource exists? {ctx_key}'
-            resources[ctx_key] = (service_n, trio.Event())
-            yielded: Any = await service_n.start(
+            resources[ctx_key] = (service_tn, trio.Event())
+            yielded: Any = await service_tn.start(
                _Cache.run_ctx,
                mngr,
                ctx_key,