Compare commits

...

10 Commits

Author SHA1 Message Date
Tyler Goodlet 249e99a2d9 Pass `hide_tb` to embedded collapser in `open_root_actor()` 2025-08-08 16:35:07 -04:00
Tyler Goodlet b464c9805c Add timeout around inf-streamer suite
Since with the new actorc injection seems to be hanging?
Not sure what exactly the issue is but likely races again
during teardown between the `.run_in_actor()` remote-exc capture
and any actorc after the `portal.cancel()`..

Also tossed in a bp to figure out why actorcs aren't actually showing
outside the `trio.run()`..?
2025-08-08 16:33:46 -04:00
Tyler Goodlet 0aba4dde28 Adjust nested-subs debug test for tbs output
Such that we don't require every single src/relay_uid in the final
output but instead at some point in the pre-output of some prompt.
Added some comments to match each actor sub-layer.
2025-08-06 12:57:40 -04:00
Tyler Goodlet 49aac86167 Add temp breakpoint support to `collapse_eg()` 2025-08-05 16:44:58 -04:00
Tyler Goodlet 39d2a3ee3d WIP, actor-nursery non-graceful-cancel raises EG
Attempting a rework of the post-cancellation "raising semantics" such
that subactors which are `ActorCancelled` as a result of a non-graceful
in-scope error, are acked via a re-raised
`ExceptionGroup[ActorCancelled*N, Exception]`
*outside the an-block*. Eventually, the idea is to have `ActorCancelled`
be relayed from each subactor in response to any
`Actor.cancel()/Portal.cancel_actor()` request much like
`Context.cancel()/ContextCancelled`.

This is a WIP bc it does break a few tests and requires related
`_spawn`-mod-machinery changes to match some of which I'm not yet sure
are required; need to dig into to the details of the currently failing
suites first.

`._supervise` patch deats,
- add `ActorNursery.maybe_error` which delivers the maybe-EG or
  `._scope_error` depending on `.errors` (now `._errors`, a mapping from
  `Aid`-keys) has entries seet for subs.
- raise ^ if non-null in a new outer-`finally` in
  `_open_and_supervise_one_cancels_all_nursery()`; an "outer" block is
  added to ensure all sub-actor-excs are emited/captured as part of
  `ActorNursery.cancel()` being called (as prior) as well as the
  `da_nursery` being explicitly cancelled alongside it (to unblock the
  tn-block, but still not sure why this is necessary yet?..).
- (now masked) tried injecting actorcs from `.cancel()` loop, but (again
  per more explanation in section below) seems to be suffering a race
  issue with RAE relay?
- left in buncha notes obvi for all this..

`._spawn` patch deats,
- as above, expect `errors: dict` to map from `Aid`-keys.
- pass `errors: dict` into `soft_kill()` since it seemed like we'd want
  to (for now) inject `ActoreCancelled` in some cases (but now i'm not
  sure XD).
- tried out a couple spots (which are now masked) to inject
  `ActorCancelled` after calling `Portal.cancel()` in various
  subactor-supervision routines whenev an RAE is not set..
  - oddly seems to be overwriting actual errors (likely due to racing
    with RAE receive and/or actorc-request timeout?) despite the guard
    logic..which clearly doesn't resolve the issue..
- buncha `tn`-style renaming.
2025-08-05 16:34:57 -04:00
Tyler Goodlet 1f269d8c32 Add todo for `tn` to `gather_contexts()` from `find_actor()`? 2025-08-05 11:59:17 -04:00
Tyler Goodlet 0304cac2e8 Use `an` var name in nested subactor debugging ex. 2025-08-05 11:55:45 -04:00
Tyler Goodlet 30d3ccf826 TOSQUASH 313ad93: yeah dun use `._message` as tb-str.. 2025-08-05 01:05:46 -04:00
Tyler Goodlet f614856673 Add an `actorc` test-driven-dev suite
Defining how an actor-nursery should emit an eg based on non-graceful
cancellation in a new `test_actor_nursery` module. Obviously fails atm
until the implementation is completed.
2025-08-04 17:19:35 -04:00
Tyler Goodlet 313ad93e06 Add `ActorCancelled` as an runtime-wide-signal
As in a layer "above" a KBI/SIGINT but "below" a `ContextCancelled` and
generally signalling an interrupt which requests cancellation of the
actor's `trio.run()`.

Impl deats,
- mk the new exc type inherit from our ctxc (for now) but overriding the
  `.canceller` impl to,
  * pull from the `RemoteActorError._extra_msgdata: dict` when no
    `._ipc_msg` is set (which is always to start, until we incorporate
    a new `CancelActor` msg type).
  * not allow a `None` value since we should key-error if not set per
    prev bullet.
- Mk adjustments (related) to parent `RemoteActorError.pformat()` to
  accommodate showing the `.canceller` field in repr output,
  * change `.relay_uid` to not crash when `._ipc_msg` is unset.
  * support `.msg.types.Aid` and use its `.reprol()` from `._mk_fields_str()`.
  * always call `._mk_fields_str()`, not just when `tb_str` is provided,
    and for now use any `._message` in-place of a `tb_str` when
    undefined.
2025-08-04 16:21:24 -04:00
10 changed files with 699 additions and 291 deletions

View File

@ -21,12 +21,12 @@ async def breakpoint_forever():
async def spawn_until(depth=0): async def spawn_until(depth=0):
""""A nested nursery that triggers another ``NameError``. """"A nested nursery that triggers another ``NameError``.
""" """
async with tractor.open_nursery() as n: async with tractor.open_nursery() as an:
if depth < 1: if depth < 1:
await n.run_in_actor(breakpoint_forever) await an.run_in_actor(breakpoint_forever)
p = await n.run_in_actor( p = await an.run_in_actor(
name_error, name_error,
name='name_error' name='name_error'
) )
@ -38,7 +38,7 @@ async def spawn_until(depth=0):
# recusrive call to spawn another process branching layer of # recusrive call to spawn another process branching layer of
# the tree # the tree
depth -= 1 depth -= 1
await n.run_in_actor( await an.run_in_actor(
spawn_until, spawn_until,
depth=depth, depth=depth,
name=f'spawn_until_{depth}', name=f'spawn_until_{depth}',

View File

@ -709,10 +709,41 @@ def test_multi_nested_subactors_error_through_nurseries(
child = spawn('multi_nested_subactors_error_up_through_nurseries') child = spawn('multi_nested_subactors_error_up_through_nurseries')
# timed_out_early: bool = False # timed_out_early: bool = False
at_least_one: list[str] = [
"bdb.BdbQuit",
for send_char in itertools.cycle(['c', 'q']): # leaf subs, which actually raise in "user code"
"src_uid=('breakpoint_forever'",
"src_uid=('name_error'",
# 2nd layer subs
"src_uid=('spawn_until_1'",
"src_uid=('spawn_until_2'",
"src_uid=('spawn_until_3'",
"relay_uid=('spawn_until_0'",
# 1st layer subs
"src_uid=('spawner0'",
"src_uid=('spawner1'",
]
for i, send_char in enumerate(
itertools.cycle(['c', 'q'])
):
try: try:
child.expect(PROMPT) child.expect(PROMPT)
for patt in at_least_one.copy():
if in_prompt_msg(
child,
[patt],
):
print(
f'Found patt in prompt {i}\n'
f'patt: {patt!r}\n'
)
at_least_one.remove(patt)
child.sendline(send_char) child.sendline(send_char)
time.sleep(0.01) time.sleep(0.01)
@ -721,27 +752,15 @@ def test_multi_nested_subactors_error_through_nurseries(
assert_before( assert_before(
child, child,
[ # boxed source errors [
"NameError: name 'doggypants' is not defined", # boxed source errors should show in final
# post-prompt tb to console.
"tractor._exceptions.RemoteActorError:", "tractor._exceptions.RemoteActorError:",
"('name_error'", "NameError: name 'doggypants' is not defined",
"bdb.BdbQuit",
# first level subtrees # TODO? once we get more pedantic with `relay_uid` should
# "tractor._exceptions.RemoteActorError: ('spawner0'", # prolly include all actor-IDs we expect to see in final
"src_uid=('spawner0'", # tb?
# "tractor._exceptions.RemoteActorError: ('spawner1'",
# propagation of errors up through nested subtrees
# "tractor._exceptions.RemoteActorError: ('spawn_until_0'",
# "tractor._exceptions.RemoteActorError: ('spawn_until_1'",
# "tractor._exceptions.RemoteActorError: ('spawn_until_2'",
# ^-NOTE-^ old RAE repr, new one is below with a field
# showing the src actor's uid.
"src_uid=('spawn_until_0'",
"relay_uid=('spawn_until_1'",
"src_uid=('spawn_until_2'",
] ]
) )

View File

@ -0,0 +1,98 @@
'''
Basic `ActorNursery` operations and closure semantics,
- basic remote error collection,
- basic multi-subactor cancellation.
'''
# import os
# import signal
# import platform
# import time
# from itertools import repeat
import pytest
import trio
import tractor
from tractor._exceptions import ActorCancelled
# from tractor._testing import (
# tractor_test,
# )
# from .conftest import no_windows
@pytest.mark.parametrize(
'num_subs',
[
1,
3,
]
)
def test_one_cancels_all(
start_method: str,
loglevel: str,
debug_mode: bool,
num_subs: int,
):
'''
Verify that ifa a single error bubbles to the an-scope the
nursery will be cancelled (just like in `trio`); this is a
one-cancels-all style strategy and are only supervision policy
at the moment.
'''
async def main():
try:
rte = RuntimeError('Uh oh something bad in parent')
async with tractor.open_nursery(
start_method=start_method,
loglevel=loglevel,
debug_mode=debug_mode,
) as an:
# spawn the same number of deamon actors which should be cancelled
dactor_portals = []
for i in range(num_subs):
name: str= f'sub_{i}'
ptl: tractor.Portal = await an.start_actor(
name=name,
enable_modules=[__name__],
)
dactor_portals.append(ptl)
# wait for booted
async with tractor.wait_for_actor(name):
print(f'{name!r} is up.')
# simulate uncaught exc
raise rte
# should error here with a ``RemoteActorError`` or ``MultiError``
except BaseExceptionGroup as _beg:
beg = _beg
# ?TODO? why can't we do `is` on beg?
assert (
beg.exceptions
==
an.maybe_error.exceptions
)
assert len(beg.exceptions) == (
num_subs
+
1 # rte from root
)
# all subactors should have been implicitly
# `Portal.cancel_actor()`ed.
excs = list(beg.exceptions)
excs.remove(rte)
for exc in excs:
assert isinstance(exc, ActorCancelled)
assert an._scope_error is rte
assert not an._children
assert an.cancelled is True
trio.run(main)

View File

@ -11,6 +11,9 @@ from itertools import repeat
import pytest import pytest
import trio import trio
import tractor import tractor
from tractor._exceptions import (
ActorCancelled,
)
from tractor._testing import ( from tractor._testing import (
tractor_test, tractor_test,
) )
@ -124,7 +127,10 @@ def test_multierror(
) as nursery: ) as nursery:
await nursery.run_in_actor(assert_err, name='errorer1') await nursery.run_in_actor(assert_err, name='errorer1')
portal2 = await nursery.run_in_actor(assert_err, name='errorer2') portal2 = await nursery.run_in_actor(
assert_err,
name='errorer2',
)
# get result(s) from main task # get result(s) from main task
try: try:
@ -137,7 +143,15 @@ def test_multierror(
# here we should get a ``BaseExceptionGroup`` containing exceptions # here we should get a ``BaseExceptionGroup`` containing exceptions
# from both subactors # from both subactors
with pytest.raises(BaseExceptionGroup): with pytest.raises(
expected_exception=(
tractor.RemoteActorError,
# ?TODO, should it be this??
# like `trio`'s strict egs?
BaseExceptionGroup,
),
):
trio.run(main) trio.run(main)
@ -233,8 +247,9 @@ async def stream_forever():
@tractor_test @tractor_test
async def test_cancel_infinite_streamer(start_method): async def test_cancel_infinite_streamer(
start_method: str,
):
# stream for at most 1 seconds # stream for at most 1 seconds
with trio.move_on_after(1) as cancel_scope: with trio.move_on_after(1) as cancel_scope:
async with tractor.open_nursery() as n: async with tractor.open_nursery() as n:
@ -288,6 +303,7 @@ async def test_some_cancels_all(
num_actors_and_errs: tuple, num_actors_and_errs: tuple,
start_method: str, start_method: str,
loglevel: str, loglevel: str,
debug_mode: bool,
): ):
''' '''
Verify a subset of failed subactors causes all others in Verify a subset of failed subactors causes all others in
@ -303,6 +319,11 @@ async def test_some_cancels_all(
ria_func, ria_func,
da_func, da_func,
) = num_actors_and_errs ) = num_actors_and_errs
with trio.fail_after(
3
if not debug_mode
else 999
):
try: try:
async with tractor.open_nursery() as an: async with tractor.open_nursery() as an:
@ -351,13 +372,21 @@ async def test_some_cancels_all(
except first_err as _err: except first_err as _err:
err = _err err = _err
if isinstance(err, BaseExceptionGroup): if isinstance(err, BaseExceptionGroup):
assert len(err.exceptions) == num_actors assert len(err.exceptions) == num_actors
for exc in err.exceptions: for exc in err.exceptions:
# TODO, figure out why these aren't being set?
if isinstance(exc, ActorCancelled):
breakpoint()
if isinstance(exc, tractor.RemoteActorError): if isinstance(exc, tractor.RemoteActorError):
assert exc.boxed_type == err_type assert exc.boxed_type == err_type
else: else:
assert isinstance(exc, trio.Cancelled) assert isinstance(exc, trio.Cancelled)
elif isinstance(err, tractor.RemoteActorError): elif isinstance(err, tractor.RemoteActorError):
assert err.boxed_type == err_type assert err.boxed_type == err_type

View File

@ -27,7 +27,7 @@ from typing import (
) )
from contextlib import asynccontextmanager as acm from contextlib import asynccontextmanager as acm
from tractor.log import get_logger from .log import get_logger
from .trionics import ( from .trionics import (
gather_contexts, gather_contexts,
collapse_eg, collapse_eg,
@ -217,7 +217,7 @@ async def find_actor(
raise_on_none: bool = False, raise_on_none: bool = False,
) -> AsyncGenerator[ ) -> AsyncGenerator[
Portal | list[Portal] | None, Portal|list[Portal]|None,
None, None,
]: ]:
''' '''
@ -259,6 +259,7 @@ async def find_actor(
collapse_eg(), collapse_eg(),
gather_contexts( gather_contexts(
mngrs=maybe_portals, mngrs=maybe_portals,
# tn=tn, # ?TODO, helps to pass rent tn here?
) as portals, ) as portals,
): ):
# log.runtime( # log.runtime(

View File

@ -46,6 +46,7 @@ from msgspec import (
from tractor._state import current_actor from tractor._state import current_actor
from tractor.log import get_logger from tractor.log import get_logger
from tractor.msg import ( from tractor.msg import (
Aid,
Error, Error,
PayloadMsg, PayloadMsg,
MsgType, MsgType,
@ -479,8 +480,9 @@ class RemoteActorError(Exception):
@property @property
def relay_uid(self) -> tuple[str, str]|None: def relay_uid(self) -> tuple[str, str]|None:
if msg := self._ipc_msg:
return tuple( return tuple(
self._ipc_msg.relay_path[-1] msg.relay_path[-1]
) )
@property @property
@ -521,7 +523,8 @@ class RemoteActorError(Exception):
for key in fields: for key in fields:
if ( if (
key == 'relay_uid' key == 'relay_uid'
and not self.is_inception() and
not self.is_inception()
): ):
continue continue
@ -534,6 +537,13 @@ class RemoteActorError(Exception):
None, None,
) )
) )
if (
key == 'canceller'
and
isinstance(val, Aid)
):
val: str = val.reprol(sin_uuid=False)
# TODO: for `.relay_path` on multiline? # TODO: for `.relay_path` on multiline?
# if not isinstance(val, str): # if not isinstance(val, str):
# val_str = pformat(val) # val_str = pformat(val)
@ -623,12 +633,22 @@ class RemoteActorError(Exception):
# IFF there is an embedded traceback-str we always # IFF there is an embedded traceback-str we always
# draw the ascii-box around it. # draw the ascii-box around it.
body: str = '' body: str = ''
if tb_str := self.tb_str:
fields: str = self._mk_fields_str( fields: str = self._mk_fields_str(
_body_fields _body_fields
+ +
self.extra_body_fields, self.extra_body_fields,
) )
tb_str: str = (
self.tb_str
#
# ^TODO? what to use instead? if anything?
# -[ ] ensure the `.message` doesn't show up 2x in output ya?
# -[ ] ._message isn't really right?
# or
# self._message
)
if tb_str:
from tractor.devx import ( from tractor.devx import (
pformat_boxed_tb, pformat_boxed_tb,
) )
@ -640,7 +660,7 @@ class RemoteActorError(Exception):
# just after <Type( # just after <Type(
# |___ .. # |___ ..
tb_body_indent=1, tb_body_indent=1,
boxer_header=self.relay_uid, boxer_header=self.relay_uid or '-',
) )
# !TODO, it'd be nice to import these top level without # !TODO, it'd be nice to import these top level without
@ -713,6 +733,10 @@ class RemoteActorError(Exception):
class ContextCancelled(RemoteActorError): class ContextCancelled(RemoteActorError):
''' '''
IPC context cancellation signal/msg.
Often reffed with the short-hand: "ctxc".
Inter-actor task context was cancelled by either a call to Inter-actor task context was cancelled by either a call to
``Portal.cancel_actor()`` or ``Context.cancel()``. ``Portal.cancel_actor()`` or ``Context.cancel()``.
@ -737,8 +761,8 @@ class ContextCancelled(RemoteActorError):
- (simulating) an IPC transport network outage - (simulating) an IPC transport network outage
- a (malicious) pkt sent specifically to cancel an actor's - a (malicious) pkt sent specifically to cancel an actor's
runtime non-gracefully without ensuring ongoing RPC tasks are runtime non-gracefully without ensuring ongoing RPC tasks
incrementally cancelled as is done with: are incrementally cancelled as is done with:
`Actor` `Actor`
|_`.cancel()` |_`.cancel()`
|_`.cancel_soon()` |_`.cancel_soon()`
@ -759,6 +783,59 @@ class ContextCancelled(RemoteActorError):
# src_actor_uid = canceller # src_actor_uid = canceller
class ActorCancelled(ContextCancelled):
'''
Runtime-layer cancellation signal/msg.
Indicates a "graceful interrupt" of the machinery scheduled by
the py-proc's `trio.run()`.
Often reffed with the short-hand: "actorc".
Raised from within `an: ActorNursery` (via an `ExceptionGroup`)
when an actor has been "process wide" cancel-called using any of,
- `ActorNursery.cancel()`
- `Portal.cancel_actor()`
**and** that cancel request was part of a "non graceful" cancel
condition.
That is, whenever an exception is to be raised outside an `an`
scope-block due to some error raised-in/relayed-to that scope. In
such cases for every subactor which was cancelledand subsequently
( and according to the `an`'s supervision strat ) this is
normally raised per subactor portal.
'''
@property
def canceller(self) -> Aid:
'''
Return the (maybe) `Actor.aid: Aid` for the requesting-author
of this actorc.
Emit a warning msg when `.canceller` has not been set.
See additional relevant notes in
`ContextCancelled.canceller`.
'''
value: tuple[str, str]|None
if msg := self._ipc_msg:
value = msg.canceller
else:
value = self._extra_msgdata['canceller']
if value:
return value
log.warning(
'IPC Context cancelled without a requesting actor?\n'
'Maybe the IPC transport ended abruptly?\n\n'
f'{self}'
)
class MsgTypeError( class MsgTypeError(
RemoteActorError, RemoteActorError,
): ):

View File

@ -88,7 +88,8 @@ async def maybe_block_bp(
bp_blocked: bool bp_blocked: bool
if ( if (
debug_mode debug_mode
and maybe_enable_greenback and
maybe_enable_greenback
and ( and (
maybe_mod := await debug.maybe_init_greenback( maybe_mod := await debug.maybe_init_greenback(
raise_not_found=False, raise_not_found=False,
@ -478,7 +479,10 @@ async def open_root_actor(
# start runtime in a bg sub-task, yield to caller. # start runtime in a bg sub-task, yield to caller.
async with ( async with (
collapse_eg(), collapse_eg(
hide_tb=hide_tb,
# bp=True,
),
trio.open_nursery() as root_tn, trio.open_nursery() as root_tn,
# XXX, finally-footgun below? # XXX, finally-footgun below?

View File

@ -50,7 +50,11 @@ from tractor._addr import UnwrappedAddress
from tractor._portal import Portal from tractor._portal import Portal
from tractor._runtime import Actor from tractor._runtime import Actor
from tractor._entry import _mp_main from tractor._entry import _mp_main
from tractor._exceptions import ActorFailure from tractor._exceptions import (
ActorCancelled,
ActorFailure,
# NoResult,
)
from tractor.msg import ( from tractor.msg import (
types as msgtypes, types as msgtypes,
pretty_struct, pretty_struct,
@ -137,7 +141,6 @@ def try_set_start_method(
async def exhaust_portal( async def exhaust_portal(
portal: Portal, portal: Portal,
actor: Actor actor: Actor
@ -185,10 +188,12 @@ async def exhaust_portal(
async def cancel_on_completion( async def cancel_on_completion(
portal: Portal, portal: Portal,
actor: Actor, actor: Actor,
errors: dict[tuple[str, str], Exception], errors: dict[
msgtypes.Aid,
Exception,
],
) -> None: ) -> None:
''' '''
@ -209,24 +214,57 @@ async def cancel_on_completion(
portal, portal,
actor, actor,
) )
aid: msgtypes.Aid = actor.aid
repr_aid: str = aid.reprol(sin_uuid=False)
if isinstance(result, Exception): if isinstance(result, Exception):
errors[actor.uid]: Exception = result errors[aid]: Exception = result
log.cancel( log.cancel(
'Cancelling subactor runtime due to error:\n\n' 'Cancelling subactor {repr_aid!r} runtime due to error\n'
f'Portal.cancel_actor() => {portal.channel.uid}\n\n' f'\n'
f'error: {result}\n' f'Portal.cancel_actor() => {portal.channel.uid}\n'
f'\n'
f'{result!r}\n'
) )
else: else:
log.runtime( report: str = (
'Cancelling subactor gracefully:\n\n' f'Cancelling subactor {repr_aid!r} gracefully..\n'
f'Portal.cancel_actor() => {portal.channel.uid}\n\n' f'\n'
f'result: {result}\n' )
canc_info: str = (
f'Portal.cancel_actor() => {portal.chan.uid}\n'
f'\n'
f'final-result => {result!r}\n'
)
log.cancel(
report
+
canc_info
) )
# cancel the process now that we have a final result # cancel the process now that we have a final result
await portal.cancel_actor() await portal.cancel_actor()
if (
not errors.get(aid)
# and
# result is NoResult
):
pass
# await debug.pause(shield=True)
# errors[aid] = ActorCancelled(
# message=(
# f'Cancelled subactor {repr_aid!r}\n'
# f'{canc_info}\n'
# ),
# canceller=current_actor().aid,
# # TODO? should we have a ack-msg?
# # ipc_msg=??
# # boxed_type=trio.Cancelled,
# )
async def hard_kill( async def hard_kill(
proc: trio.Process, proc: trio.Process,
@ -314,6 +352,10 @@ async def soft_kill(
Awaitable, Awaitable,
], ],
portal: Portal, portal: Portal,
errors: dict[
msgtypes.Aid,
Exception,
],
) -> None: ) -> None:
''' '''
@ -357,8 +399,8 @@ async def soft_kill(
# below. This means we try to do a graceful teardown # below. This means we try to do a graceful teardown
# via sending a cancel message before getting out # via sending a cancel message before getting out
# zombie killing tools. # zombie killing tools.
async with trio.open_nursery() as n: async with trio.open_nursery() as tn:
n.cancel_scope.shield = True tn.cancel_scope.shield = True
async def cancel_on_proc_deth(): async def cancel_on_proc_deth():
''' '''
@ -368,24 +410,35 @@ async def soft_kill(
''' '''
await wait_func(proc) await wait_func(proc)
n.cancel_scope.cancel() tn.cancel_scope.cancel()
# start a task to wait on the termination of the # start a task to wait on the termination of the
# process by itself waiting on a (caller provided) wait # process by itself waiting on a (caller provided) wait
# function which should unblock when the target process # function which should unblock when the target process
# has terminated. # has terminated.
n.start_soon(cancel_on_proc_deth) tn.start_soon(cancel_on_proc_deth)
# send the actor-runtime a cancel request. # send the actor-runtime a cancel request.
await portal.cancel_actor() await portal.cancel_actor()
# if not errors.get(peer_aid):
# errors[peer_aid] = ActorCancelled(
# message=(
# 'Sub-actor cancelled gracefully by parent\n'
# ),
# canceller=current_actor().aid,
# # TODO? should we have a ack-msg?
# # ipc_msg=??
# # boxed_type=trio.Cancelled,
# )
if proc.poll() is None: # type: ignore if proc.poll() is None: # type: ignore
log.warning( log.warning(
'Subactor still alive after cancel request?\n\n' 'Subactor still alive after cancel request?\n\n'
f'uid: {peer_aid}\n' f'uid: {peer_aid}\n'
f'|_{proc}\n' f'|_{proc}\n'
) )
n.cancel_scope.cancel() tn.cancel_scope.cancel()
raise raise
@ -393,7 +446,10 @@ async def new_proc(
name: str, name: str,
actor_nursery: ActorNursery, actor_nursery: ActorNursery,
subactor: Actor, subactor: Actor,
errors: dict[tuple[str, str], Exception], errors: dict[
msgtypes.Aid,
Exception,
],
# passed through to actor main # passed through to actor main
bind_addrs: list[UnwrappedAddress], bind_addrs: list[UnwrappedAddress],
@ -432,7 +488,10 @@ async def trio_proc(
name: str, name: str,
actor_nursery: ActorNursery, actor_nursery: ActorNursery,
subactor: Actor, subactor: Actor,
errors: dict[tuple[str, str], Exception], errors: dict[
msgtypes.Aid,
Exception,
],
# passed through to actor main # passed through to actor main
bind_addrs: list[UnwrappedAddress], bind_addrs: list[UnwrappedAddress],
@ -555,9 +614,9 @@ async def trio_proc(
with trio.CancelScope(shield=True): with trio.CancelScope(shield=True):
await actor_nursery._join_procs.wait() await actor_nursery._join_procs.wait()
async with trio.open_nursery() as nursery: async with trio.open_nursery() as ptl_reaper_tn:
if portal in actor_nursery._cancel_after_result_on_exit: if portal in actor_nursery._cancel_after_result_on_exit:
nursery.start_soon( ptl_reaper_tn.start_soon(
cancel_on_completion, cancel_on_completion,
portal, portal,
subactor, subactor,
@ -570,7 +629,8 @@ async def trio_proc(
await soft_kill( await soft_kill(
proc, proc,
trio.Process.wait, # XXX, uses `pidfd_open()` below. trio.Process.wait, # XXX, uses `pidfd_open()` below.
portal portal,
errors,
) )
# cancel result waiter that may have been spawned in # cancel result waiter that may have been spawned in
@ -579,7 +639,7 @@ async def trio_proc(
'Cancelling portal result reaper task\n' 'Cancelling portal result reaper task\n'
f'c)> {subactor.aid.reprol()!r}\n' f'c)> {subactor.aid.reprol()!r}\n'
) )
nursery.cancel_scope.cancel() ptl_reaper_tn.cancel_scope.cancel()
finally: finally:
# XXX NOTE XXX: The "hard" reap since no actor zombies are # XXX NOTE XXX: The "hard" reap since no actor zombies are
@ -652,7 +712,10 @@ async def mp_proc(
name: str, name: str,
actor_nursery: ActorNursery, # type: ignore # noqa actor_nursery: ActorNursery, # type: ignore # noqa
subactor: Actor, subactor: Actor,
errors: dict[tuple[str, str], Exception], errors: dict[
msgtypes.Aid,
Exception,
],
# passed through to actor main # passed through to actor main
bind_addrs: list[UnwrappedAddress], bind_addrs: list[UnwrappedAddress],
parent_addr: UnwrappedAddress, parent_addr: UnwrappedAddress,
@ -777,7 +840,7 @@ async def mp_proc(
cancel_on_completion, cancel_on_completion,
portal, portal,
subactor, subactor,
errors errors,
) )
# This is a "soft" (cancellable) join/reap which # This is a "soft" (cancellable) join/reap which
@ -786,7 +849,8 @@ async def mp_proc(
await soft_kill( await soft_kill(
proc, proc,
proc_waiter, proc_waiter,
portal portal,
errors,
) )
# cancel result waiter that may have been spawned in # cancel result waiter that may have been spawned in

View File

@ -30,6 +30,9 @@ import warnings
import trio import trio
from .msg import (
types as msgtypes,
)
from .devx import ( from .devx import (
debug, debug,
pformat as _pformat, pformat as _pformat,
@ -48,6 +51,7 @@ from .trionics import (
) )
from ._exceptions import ( from ._exceptions import (
ContextCancelled, ContextCancelled,
ActorCancelled,
) )
from ._root import ( from ._root import (
open_root_actor, open_root_actor,
@ -99,7 +103,10 @@ class ActorNursery:
actor: Actor, actor: Actor,
ria_nursery: trio.Nursery, ria_nursery: trio.Nursery,
da_nursery: trio.Nursery, da_nursery: trio.Nursery,
errors: dict[tuple[str, str], BaseException], errors: dict[
msgtypes.Aid,
BaseException,
],
) -> None: ) -> None:
# self.supervisor = supervisor # TODO # self.supervisor = supervisor # TODO
@ -117,9 +124,11 @@ class ActorNursery:
] ]
] = {} ] = {}
# signals when it is ok to start waiting o subactor procs
# for termination.
self._join_procs = trio.Event() self._join_procs = trio.Event()
self._at_least_one_child_in_debug: bool = False self._at_least_one_child_in_debug: bool = False
self.errors = errors self._errors = errors
self._scope_error: BaseException|None = None self._scope_error: BaseException|None = None
self.exited = trio.Event() self.exited = trio.Event()
@ -260,7 +269,7 @@ class ActorNursery:
name, name,
self, self,
subactor, subactor,
self.errors, self._errors,
bind_addrs, bind_addrs,
parent_addr, parent_addr,
_rtv, # run time vars _rtv, # run time vars
@ -364,7 +373,9 @@ class ActorNursery:
# then `._children`.. # then `._children`..
children: dict = self._children children: dict = self._children
child_count: int = len(children) child_count: int = len(children)
msg: str = f'Cancelling actor nursery with {child_count} children\n' msg: str = (
f'Cancelling actor-nursery with {child_count} children\n'
)
server: IPCServer = self._actor.ipc_server server: IPCServer = self._actor.ipc_server
@ -391,7 +402,9 @@ class ActorNursery:
else: else:
if portal is None: # actor hasn't fully spawned yet if portal is None: # actor hasn't fully spawned yet
event: trio.Event = server._peer_connected[subactor.uid] event: trio.Event = server._peer_connected[
subactor.uid
]
log.warning( log.warning(
f"{subactor.uid} never 't finished spawning?" f"{subactor.uid} never 't finished spawning?"
) )
@ -416,7 +429,20 @@ class ActorNursery:
# spawn cancel tasks for each sub-actor # spawn cancel tasks for each sub-actor
assert portal assert portal
if portal.channel.connected(): if portal.channel.connected():
tn.start_soon(portal.cancel_actor)
async def canc_subactor():
await portal.cancel_actor()
# aid: msgtypes.Aid = subactor.aid
# reprol: str = aid.reprol(sin_uuid=False)
# if not self._errors.get(aid):
# self._errors[aid] = ActorCancelled(
# message=(
# f'Sub-actor {reprol!r} cancelled gracefully by parent nursery\n'
# ),
# canceller=self._actor.aid,
# )
tn.start_soon(canc_subactor)
log.cancel(msg) log.cancel(msg)
# if we cancelled the cancel (we hung cancelling remote actors) # if we cancelled the cancel (we hung cancelling remote actors)
@ -442,6 +468,47 @@ class ActorNursery:
# mark ourselves as having (tried to have) cancelled all subactors # mark ourselves as having (tried to have) cancelled all subactors
self._join_procs.set() self._join_procs.set()
@property
def maybe_error(self) -> (
BaseException|
BaseExceptionGroup|
None
):
'''
Deliver any captured scope errors including those relayed
from subactors such as `ActorCancelled` during a non-graceful
cancellation scenario.
When more then a "graceful cancel" occurrs wrap all collected
sub-exceptions in a raised `ExceptionGroup`.
'''
scope_exc: BaseException|None = self._scope_error
# XXX NOTE, only pack an eg if there i at least one
# non-actorc exception received from a subactor, OR
# return `._scope_error` verbatim.
if (errors := self._errors):
# use `BaseExceptionGroup` as needed
excs: list[BaseException] = list(errors.values())
if (
len(excs) > 1
and
any(
type(exc) not in {ActorCancelled,}
for exc in excs
)
):
return ExceptionGroup(
'ActorNursery multi-errored with',
tuple(excs),
)
# raise the lone subactor exc
return list(excs)[0]
return scope_exc
@acm @acm
async def _open_and_supervise_one_cancels_all_nursery( async def _open_and_supervise_one_cancels_all_nursery(
@ -457,7 +524,10 @@ async def _open_and_supervise_one_cancels_all_nursery(
inner_err: BaseException|None = None inner_err: BaseException|None = None
# the collection of errors retreived from spawned sub-actors # the collection of errors retreived from spawned sub-actors
errors: dict[tuple[str, str], BaseException] = {} errors: dict[
msgtypes.Aid,
BaseException,
] = {}
# This is the outermost level "deamon actor" nursery. It is awaited # This is the outermost level "deamon actor" nursery. It is awaited
# **after** the below inner "run in actor nursery". This allows for # **after** the below inner "run in actor nursery". This allows for
@ -467,6 +537,7 @@ async def _open_and_supervise_one_cancels_all_nursery(
# `ActorNursery.start_actor()`). # `ActorNursery.start_actor()`).
# errors from this daemon actor nursery bubble up to caller # errors from this daemon actor nursery bubble up to caller
try:
async with ( async with (
collapse_eg(), collapse_eg(),
trio.open_nursery() as da_nursery, trio.open_nursery() as da_nursery,
@ -506,7 +577,7 @@ async def _open_and_supervise_one_cancels_all_nursery(
except BaseException as _inner_err: except BaseException as _inner_err:
inner_err = _inner_err inner_err = _inner_err
errors[actor.uid] = inner_err # errors[actor.aid] = inner_err
# If we error in the root but the debugger is # If we error in the root but the debugger is
# engaged we don't want to prematurely kill (and # engaged we don't want to prematurely kill (and
@ -586,8 +657,6 @@ async def _open_and_supervise_one_cancels_all_nursery(
) as _outer_err: ) as _outer_err:
outer_err = _outer_err outer_err = _outer_err
an._scope_error = outer_err or inner_err
# XXX: yet another guard before allowing the cancel # XXX: yet another guard before allowing the cancel
# sequence in case a (single) child is in debug. # sequence in case a (single) child is in debug.
await debug.maybe_wait_for_debugger( await debug.maybe_wait_for_debugger(
@ -605,38 +674,75 @@ async def _open_and_supervise_one_cancels_all_nursery(
) )
with trio.CancelScope(shield=True): with trio.CancelScope(shield=True):
await an.cancel() await an.cancel()
raise raise
finally: finally:
# No errors were raised while awaiting ".run_in_actor()" scope_exc = an._scope_error = outer_err or inner_err
# actors but those actors may have returned remote errors as # await debug.pause(shield=True)
# results (meaning they errored remotely and have relayed # if scope_exc:
# those errors back to this parent actor). The errors are # errors[actor.aid] = scope_exc
# collected in ``errors`` so cancel all actors, summarize
# all errors and re-raise. # show this frame on any internal error
if errors: if (
not an.cancelled
and
scope_exc
):
__tracebackhide__: bool = False
# NOTE, it's possible no errors were raised while
# awaiting ".run_in_actor()" actors but those
# sub-actors may have delivered remote errors as
# results, normally captured via machinery in
# `._spawn.cancel_on_completion()`.
#
# Any such remote errors are collected in `an._errors`
# which is summarized via `ActorNursery.maybe_error`
# which is maybe re-raised in an outer block (below).
#
# So here we first cancel all subactors the summarize
# all errors and then later (in that outer block)
# maybe-raise on a "non-graceful" cancellation
# outcome, normally as a summary EG.
if (
scope_exc
or
errors
):
if an._children: if an._children:
with trio.CancelScope(shield=True): with trio.CancelScope(shield=True):
await an.cancel() await an.cancel()
# use `BaseExceptionGroup` as needed # cancel outer tn so we unblock outside this
if len(errors) > 1: # finally!
raise BaseExceptionGroup( da_nursery.cance_scope.cancel()
'tractor.ActorNursery errored with', #
tuple(errors.values()), # ^TODO? still don't get why needed?
) # - an.cancel() should cause all spawn-subtasks
else: # to eventually exit?
raise list(errors.values())[0] # - also, could (instead) we sync to an event here before
# (ever) calling `an.cancel()`??
# show frame on any (likely) internal error # `da_nursery` scope end, thus a checkpoint.
if ( finally:
not an.cancelled
and an._scope_error
):
__tracebackhide__: bool = False
# da_nursery scope end - nursery checkpoint # raise any eg compiled from all subs
# final exit # ??TODO should we also adopt strict-egs here like
# `trio.Nursery`??
#
# XXX justification notes,
# docs: https://trio.readthedocs.io/en/stable/reference-core.html#historical-note-non-strict-exceptiongroups
# anthropic: https://discuss.python.org/t/using-exceptiongroup-at-anthropic-experience-report/20888
# gh: https://github.com/python-trio/trio/issues/611
if an_exc := an.maybe_error:
raise an_exc
if scope_exc := an._scope_error:
raise scope_exc
# @acm-fn scope exit
_shutdown_msg: str = ( _shutdown_msg: str = (
@ -647,7 +753,7 @@ _shutdown_msg: str = (
@acm @acm
async def open_nursery( async def open_nursery(
*, # named params only! *, # named params only!
hide_tb: bool = True, hide_tb: bool = False,
**kwargs, **kwargs,
# ^TODO, paramspec for `open_root_actor()` # ^TODO, paramspec for `open_root_actor()`
@ -683,16 +789,21 @@ async def open_nursery(
# mark us for teardown on exit # mark us for teardown on exit
implicit_runtime: bool = True implicit_runtime: bool = True
async with open_root_actor( async with (
# collapse_eg(hide_tb=hide_tb),
open_root_actor(
hide_tb=hide_tb, hide_tb=hide_tb,
**kwargs, **kwargs,
) as actor: ) as actor,
):
assert actor is current_actor() assert actor is current_actor()
try: try:
async with _open_and_supervise_one_cancels_all_nursery( async with (
_open_and_supervise_one_cancels_all_nursery(
actor actor
) as an: ) as an
):
# NOTE: mark this nursery as having # NOTE: mark this nursery as having
# implicitly started the root actor so # implicitly started the root actor so

View File

@ -78,7 +78,6 @@ def collapse_exception_group(
def get_collapsed_eg( def get_collapsed_eg(
beg: BaseExceptionGroup, beg: BaseExceptionGroup,
bp: bool = False,
) -> BaseException|None: ) -> BaseException|None:
''' '''
If the input beg can collapse to a single sub-exception which is If the input beg can collapse to a single sub-exception which is
@ -92,7 +91,6 @@ def get_collapsed_eg(
return maybe_exc return maybe_exc
@acm @acm
async def collapse_eg( async def collapse_eg(
hide_tb: bool = True, hide_tb: bool = True,
@ -102,6 +100,8 @@ async def collapse_eg(
# trio.Cancelled, # trio.Cancelled,
}, },
add_notes: bool = True, add_notes: bool = True,
bp: bool = False,
): ):
''' '''
If `BaseExceptionGroup` raised in the body scope is If `BaseExceptionGroup` raised in the body scope is
@ -115,6 +115,11 @@ async def collapse_eg(
yield yield
except BaseExceptionGroup as _beg: except BaseExceptionGroup as _beg:
beg = _beg beg = _beg
if bp:
import tractor
await tractor.pause(shield=True)
if ( if (
(exc := get_collapsed_eg(beg)) (exc := get_collapsed_eg(beg))
and and