From 4e71b57bf5d92563b8dc003f4cd5b51a550a7694 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 25 Mar 2024 16:31:16 -0400 Subject: [PATCH 001/305] Prepare to offer (dynamic) `.msg.Codec` overrides By simply allowing an input `codec: tuple` of funcs for now to the `MsgpackTCPStream` transport but, ideally wrapping this in a `Codec` type with an API for dynamic extension of the interchange lib's msg processing settings. Right now we're tied to `msgspec.msgpack` for this transport but with the right design this can likely extend to other libs in the future. Relates to starting feature work toward #36, #196, #365. --- tractor/_ipc.py | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index f57d3bd8..2b5df698 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -30,6 +30,7 @@ import struct import typing from typing import ( Any, + Callable, runtime_checkable, Protocol, Type, @@ -123,6 +124,16 @@ class MsgpackTCPStream(MsgTransport): stream: trio.SocketStream, prefix_size: int = 4, + # XXX optionally provided codec pair for `msgspec`: + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + # + # TODO: define this as a `Codec` struct which can be + # overriden dynamically by the application/runtime. + codec: tuple[ + Callable[[Any], Any]|None, # coder + Callable[[type, Any], Any]|None, # decoder + ]|None = None, + ) -> None: self.stream = stream @@ -138,12 +149,18 @@ class MsgpackTCPStream(MsgTransport): # public i guess? self.drained: list[dict] = [] - self.recv_stream = BufferedReceiveStream(transport_stream=stream) + self.recv_stream = BufferedReceiveStream( + transport_stream=stream + ) self.prefix_size = prefix_size # TODO: struct aware messaging coders - self.encode = msgspec.msgpack.Encoder().encode - self.decode = msgspec.msgpack.Decoder().decode # dict[str, Any]) + self.encode = msgspec.msgpack.Encoder( + enc_hook=codec[0] if codec else None, + ).encode + self.decode = msgspec.msgpack.Decoder( + dec_hook=codec[1] if codec else None, + ).decode async def _iter_packets(self) -> AsyncGenerator[dict, None]: '''Yield packets from the underlying stream. @@ -349,9 +366,25 @@ class Channel: stream: trio.SocketStream, type_key: tuple[str, str]|None = None, + # XXX optionally provided codec pair for `msgspec`: + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + codec: tuple[ + Callable[[Any], Any], # coder + Callable[[type, Any], Any], # decoder + ]|None = None, + ) -> MsgTransport: - type_key = type_key or self._transport_key - self._transport = get_msg_transport(type_key)(stream) + type_key = ( + type_key + or + self._transport_key + ) + self._transport = get_msg_transport( + type_key + )( + stream, + codec=codec, + ) return self._transport def __repr__(self) -> str: -- 2.34.1 From 65192e80c15a5df98214a3bf0c6ebc2ca8089115 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 26 Mar 2024 15:50:47 -0400 Subject: [PATCH 002/305] Proto `MsgCodec`, an interchange fmt modify API Fitting in line with the issues outstanding: - #36: (msg)spec-ing out our SCIPP (structured-con-inter-proc-prot). (https://github.com/goodboy/tractor/issues/36) - #196: adding strictly typed IPC msg dialog schemas, more or less better described as "dialog/transaction scoped message specs" using `msgspec`'s tagged unions and custom codecs. (https://github.com/goodboy/tractor/issues/196) - #365: using modern static type-annots to drive capability based messaging and RPC. (statically https://github.com/goodboy/tractor/issues/365) This is a first draft of a new API for dynamically overriding IPC msg codecs for a given interchange lib from any task in the runtime. Right now we obviously only support `msgspec` but ideally this API holds general enough to be used for other backends eventually (like `capnproto`, and apache arrow). Impl is in a new `tractor.msg._codec` with: - a new `MsgCodec` type for encapsing `msgspec.msgpack.Encoder/Decoder` pairs and configuring any custom enc/dec_hooks or typed decoding. - factory `mk_codec()` for creating new codecs ad-hoc from a task. - `contextvars` support for a new `trio.Task` scoped `_ctxvar_MsgCodec: ContextVar[MsgCodec]` named 'msgspec_codec'. - `apply_codec()` for temporarily modifying the above per task as needed around `.open_context()` / `.open_stream()` operation. A new test (suite) in `test_caps_msging.py`: - verify a parent and its child can enable the same custom codec (in this case to transmit `NamespacePath`s) with tons of pedantic ctx-vars checks. - ToDo: still need to implement #36 msg types in order to be able to get decodes working (as in `MsgStream.receive()` will deliver an already created `NamespacePath` obj) since currently all msgs come packed in `dict`-msg wrapper packets.. -> use the proto from PR #35 to get nested `msgspec.Raw` processing up and running Bo --- tests/test_caps_msging.py | 198 +++++++++++++++++++++++++++++ tractor/_ipc.py | 74 ++++++++--- tractor/msg/__init__.py | 10 ++ tractor/msg/_codec.py | 253 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 519 insertions(+), 16 deletions(-) create mode 100644 tests/test_caps_msging.py create mode 100644 tractor/msg/_codec.py diff --git a/tests/test_caps_msging.py b/tests/test_caps_msging.py new file mode 100644 index 00000000..f659cb13 --- /dev/null +++ b/tests/test_caps_msging.py @@ -0,0 +1,198 @@ +''' +Functional audits for our "capability based messaging (schema)" feats. + +B~) + +''' +from typing import ( + Any, + Type, +) +from contextvars import ( + Context, +) + +import tractor +from tractor.msg import ( + _def_msgspec_codec, + _ctxvar_MsgCodec, + + NamespacePath, + MsgCodec, + mk_codec, + apply_codec, + current_msgspec_codec, +) +import trio + +# TODO: wrap these into `._codec` such that user can just pass +# a type table of some sort? +def enc_hook(obj: Any) -> Any: + if isinstance(obj, NamespacePath): + return str(obj) + else: + raise NotImplementedError( + f'Objects of type {type(obj)} are not supported' + ) + + +def dec_hook(type: Type, obj: Any) -> Any: + print(f'type is: {type}') + if type is NamespacePath: + return NamespacePath(obj) + else: + raise NotImplementedError( + f'Objects of type {type(obj)} are not supported' + ) + + +def ex_func(*args): + print(f'ex_func({args})') + + +def mk_custom_codec() -> MsgCodec: + # apply custom hooks and set a `Decoder` which only + # loads `NamespacePath` types. + nsp_codec: MsgCodec = mk_codec( + dec_types=NamespacePath, + enc_hook=enc_hook, + dec_hook=dec_hook, + ) + + # TODO: validate `MsgCodec` interface/semantics? + # -[ ] simple field tests to ensure caching + reset is workin? + # -[ ] custom / changing `.decoder()` calls? + # + # dec = nsp_codec.decoder( + # types=NamespacePath, + # ) + # assert nsp_codec.dec is dec + return nsp_codec + + +@tractor.context +async def send_back_nsp( + ctx: tractor.Context, + +) -> None: + ''' + Setup up a custom codec to load instances of `NamespacePath` + and ensure we can round trip a func ref with our parent. + + ''' + task: trio.Task = trio.lowlevel.current_task() + task_ctx: Context = task.context + assert _ctxvar_MsgCodec not in task_ctx + + nsp_codec: MsgCodec = mk_custom_codec() + with apply_codec(nsp_codec) as codec: + chk_codec_applied( + custom_codec=nsp_codec, + enter_value=codec, + ) + + nsp = NamespacePath.from_ref(ex_func) + await ctx.started(nsp) + + async with ctx.open_stream() as ipc: + async for msg in ipc: + + assert msg == f'{__name__}:ex_func' + + # TODO: as per below + # assert isinstance(msg, NamespacePath) + assert isinstance(msg, str) + + +def chk_codec_applied( + custom_codec: MsgCodec, + enter_value: MsgCodec, +) -> MsgCodec: + + task: trio.Task = trio.lowlevel.current_task() + task_ctx: Context = task.context + + assert _ctxvar_MsgCodec in task_ctx + curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + + assert ( + # returned from `mk_codec()` + custom_codec is + + # yielded value from `apply_codec()` + enter_value is + + # read from current task's `contextvars.Context` + curr_codec is + + # public API for all of the above + current_msgspec_codec() + + # the default `msgspec` settings + is not _def_msgspec_codec + ) + + +def test_codec_hooks_mod(): + ''' + Audit the `.msg.MsgCodec` override apis details given our impl + uses `contextvars` to accomplish per `trio` task codec + application around an inter-proc-task-comms context. + + ''' + async def main(): + task: trio.Task = trio.lowlevel.current_task() + task_ctx: Context = task.context + assert _ctxvar_MsgCodec not in task_ctx + + async with tractor.open_nursery() as an: + p: tractor.Portal = await an.start_actor( + 'sub', + enable_modules=[__name__], + ) + + # TODO: 2 cases: + # - codec not modified -> decode nsp as `str` + # - codec modified with hooks -> decode nsp as + # `NamespacePath` + nsp_codec: MsgCodec = mk_custom_codec() + with apply_codec(nsp_codec) as codec: + chk_codec_applied( + custom_codec=nsp_codec, + enter_value=codec, + ) + + async with ( + p.open_context( + send_back_nsp, + ) as (ctx, first), + ctx.open_stream() as ipc, + ): + # ensure codec is still applied across + # `tractor.Context` + its embedded nursery. + chk_codec_applied( + custom_codec=nsp_codec, + enter_value=codec, + ) + + assert first == f'{__name__}:ex_func' + # TODO: actually get the decoder loading + # to native once we spec our SCIPP msgspec + # (structurred-conc-inter-proc-protocol) + # implemented as per, + # https://github.com/goodboy/tractor/issues/36 + # + # assert isinstance(first, NamespacePath) + assert isinstance(first, str) + await ipc.send(first) + + with trio.move_on_after(1): + async for msg in ipc: + + # TODO: as per above + # assert isinstance(msg, NamespacePath) + assert isinstance(msg, str) + + await p.cancel_actor() + + trio.run(main) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 2b5df698..5aafda3f 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -23,7 +23,10 @@ from collections.abc import ( AsyncGenerator, AsyncIterator, ) -from contextlib import asynccontextmanager as acm +from contextlib import ( + asynccontextmanager as acm, + contextmanager as cm, +) import platform from pprint import pformat import struct @@ -37,12 +40,15 @@ from typing import ( TypeVar, ) -import msgspec from tricycle import BufferedReceiveStream import trio from tractor.log import get_logger from tractor._exceptions import TransportClosed +from tractor.msg import ( + _ctxvar_MsgCodec, + MsgCodec, +) log = get_logger(__name__) @@ -154,13 +160,9 @@ class MsgpackTCPStream(MsgTransport): ) self.prefix_size = prefix_size - # TODO: struct aware messaging coders - self.encode = msgspec.msgpack.Encoder( - enc_hook=codec[0] if codec else None, - ).encode - self.decode = msgspec.msgpack.Decoder( - dec_hook=codec[1] if codec else None, - ).decode + # allow for custom IPC msg interchange format + # dynamic override Bo + self.codec: MsgCodec = codec or MsgCodec() async def _iter_packets(self) -> AsyncGenerator[dict, None]: '''Yield packets from the underlying stream. @@ -199,7 +201,23 @@ class MsgpackTCPStream(MsgTransport): log.transport(f"received {msg_bytes}") # type: ignore try: - yield self.decode(msg_bytes) + # NOTE: lookup the `trio.Task.context`'s var for + # the current `MsgCodec`. + yield _ctxvar_MsgCodec.get().decode(msg_bytes) + + # TODO: remove, was only for orig draft impl + # testing. + # + # curr_codec: MsgCodec = _ctxvar_MsgCodec.get() + # obj = curr_codec.decode(msg_bytes) + # if ( + # curr_codec is not + # _codec._def_msgspec_codec + # ): + # print(f'OBJ: {obj}\n') + # + # yield obj + except ( msgspec.DecodeError, UnicodeDecodeError, @@ -235,7 +253,10 @@ class MsgpackTCPStream(MsgTransport): # __tracebackhide__: bool = hide_tb async with self._send_lock: - bytes_data: bytes = self.encode(msg) + # NOTE: lookup the `trio.Task.context`'s var for + # the current `MsgCodec`. + bytes_data: bytes = _ctxvar_MsgCodec.get().encode(msg) + # bytes_data: bytes = self.codec.encode(msg) # supposedly the fastest says, # https://stackoverflow.com/a/54027962 @@ -335,7 +356,9 @@ class Channel: @property def msgstream(self) -> MsgTransport: - log.info('`Channel.msgstream` is an old name, use `._transport`') + log.info( + '`Channel.msgstream` is an old name, use `._transport`' + ) return self._transport @property @@ -368,10 +391,7 @@ class Channel: # XXX optionally provided codec pair for `msgspec`: # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types - codec: tuple[ - Callable[[Any], Any], # coder - Callable[[type, Any], Any], # decoder - ]|None = None, + codec: MsgCodec|None = None, ) -> MsgTransport: type_key = ( @@ -379,14 +399,36 @@ class Channel: or self._transport_key ) + # get transport type, then self._transport = get_msg_transport( type_key + # instantiate an instance of the msg-transport )( stream, codec=codec, ) return self._transport + # TODO: something simliar at the IPC-`Context` + # level so as to support + @cm + def apply_codec( + self, + codec: MsgCodec, + + ) -> None: + ''' + Temporarily override the underlying IPC msg codec for + dynamic enforcement of messaging schema. + + ''' + orig: MsgCodec = self._transport.codec + try: + self._transport.codec = codec + yield + finally: + self._transport.codec = orig + def __repr__(self) -> str: if not self._transport: return '' diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 906627cf..e2296788 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -24,3 +24,13 @@ from .ptr import ( from .types import ( Struct as Struct, ) +from ._codec import ( + + _def_msgspec_codec as _def_msgspec_codec, + _ctxvar_MsgCodec as _ctxvar_MsgCodec, + + apply_codec as apply_codec, + mk_codec as mk_codec, + MsgCodec as MsgCodec, + current_msgspec_codec as current_msgspec_codec, +) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py new file mode 100644 index 00000000..0da454ad --- /dev/null +++ b/tractor/msg/_codec.py @@ -0,0 +1,253 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +IPC msg interchange codec management. + +Supported backend libs: +- `msgspec.msgpack` + +ToDo: backends we prolly should offer: + +- see project/lib list throughout GH issue discussion comments: + https://github.com/goodboy/tractor/issues/196 + +- `capnproto`: https://capnproto.org/rpc.html + - https://capnproto.org/language.html#language-reference + +''' +from contextvars import ( + ContextVar, + Token, +) +from contextlib import ( + contextmanager as cm, +) +from typing import ( + Any, + Callable, + Type, + Union, +) +from types import ModuleType + +import msgspec +from msgspec import msgpack + +from .types import Struct + + +# TODO: API changes towards being interchange lib agnostic! +# -[ ] capnproto has pre-compiled schema for eg.. +# * https://capnproto.org/language.html +# * http://capnproto.github.io/pycapnp/quickstart.html +# * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +class MsgCodec(Struct): + ''' + A IPC msg interchange format lib's encoder + decoder pair. + + ''' + + lib: ModuleType = msgspec + + # ad-hoc type extensions + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + enc_hook: Callable[[Any], Any]|None = None # coder + dec_hook: Callable[[type, Any], Any]|None = None # decoder + + # struct type unions + # https://jcristharif.com/msgspec/structs.html#tagged-unions + types: Union[Type[Struct]]|Any = Any + + # post-configure cached props + _enc: msgpack.Encoder|None = None + _dec: msgpack.Decoder|None = None + + + # TODO: use `functools.cached_property` for these ? + # https://docs.python.org/3/library/functools.html#functools.cached_property + @property + def enc(self) -> msgpack.Encoder: + return self._enc or self.encoder() + + def encoder( + self, + enc_hook: Callable|None = None, + reset: bool = False, + + # TODO: what's the default for this? + # write_buffer_size: int + **kwargs, + + ) -> msgpack.Encoder: + ''' + Set or get the maybe-cached `msgspec.msgpack.Encoder` + instance configured for this codec. + + When `reset=True` any previously configured encoder will + be recreated and then cached with the new settings passed + as input. + + ''' + if ( + self._enc is None + or reset + ): + self._enc = self.lib.msgpack.Encoder( + enc_hook=enc_hook or self.enc_hook, + # write_buffer_size=write_buffer_size, + ) + + return self._enc + + def encode( + self, + py_obj: Any, + + ) -> bytes: + ''' + Encode input python objects to `msgpack` bytes for transfer + on a tranport protocol connection. + + ''' + return self.enc.encode(py_obj) + + @property + def dec(self) -> msgpack.Decoder: + return self._dec or self.decoder() + + def decoder( + self, + types: Union[Type[Struct]]|None = None, + dec_hook: Callable|None = None, + reset: bool = False, + **kwargs, + # ext_hook: ext_hook_sig + + ) -> msgpack.Decoder: + ''' + Set or get the maybe-cached `msgspec.msgpack.Decoder` + instance configured for this codec. + + When `reset=True` any previously configured decoder will + be recreated and then cached with the new settings passed + as input. + + ''' + if ( + self._dec is None + or reset + ): + self._dec = self.lib.msgpack.Decoder( + types or self.types, + dec_hook=dec_hook or self.dec_hook, + **kwargs, + ) + + return self._dec + + def decode( + self, + msg: bytes, + ) -> Any: + ''' + Decode received `msgpack` bytes into a local python object + with special `msgspec.Struct` (or other type) handling + determined by the + + ''' + + return self.dec.decode(msg) + + +# TODO: struct aware messaging coders as per: +# - https://github.com/goodboy/tractor/issues/36 +# - https://github.com/goodboy/tractor/issues/196 +# - https://github.com/goodboy/tractor/issues/365 + +def mk_codec( + libname: str = 'msgspec', + + # struct type unions set for `Decoder` + # https://jcristharif.com/msgspec/structs.html#tagged-unions + dec_types: Union[Type[Struct]]|Any = Any, + + cache_now: bool = True, + + # proxy to the `Struct.__init__()` + **kwargs, + +) -> MsgCodec: + ''' + Convenience factory for creating codecs eventually meant + to be interchange lib agnostic (i.e. once we support more then just + `msgspec` ;). + + ''' + codec = MsgCodec( + types=dec_types, + **kwargs, + ) + assert codec.lib.__name__ == libname + + # by default config and cache the codec pair for given + # input settings. + if cache_now: + assert codec.enc + assert codec.dec + + return codec + + +# instance of the default `msgspec.msgpack` codec settings, i.e. +# no custom structs, hooks or other special types. +_def_msgspec_codec: MsgCodec = mk_codec() + +# NOTE: provides for per-`trio.Task` specificity of the +# IPC msging codec used by the transport layer when doing +# `Channel.send()/.recv()` of wire data. +_ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( + 'msgspec_codec', + default=_def_msgspec_codec, +) + + +@cm +def apply_codec( + codec: MsgCodec, + +) -> MsgCodec: + ''' + Dynamically apply a `MsgCodec` to the current task's + runtime context such that all IPC msgs are processed + with it for that task. + + ''' + token: Token = _ctxvar_MsgCodec.set(codec) + try: + yield _ctxvar_MsgCodec.get() + finally: + _ctxvar_MsgCodec.reset(token) + + +def current_msgspec_codec() -> MsgCodec: + ''' + Return the current `trio.Task.context`'s value + for `msgspec_codec` used by `Channel.send/.recv()` + for wire serialization. + + ''' + return _ctxvar_MsgCodec.get() -- 2.34.1 From 10b52ba98a310b571343036d74b910321c884933 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 6 Jul 2022 17:35:09 -0400 Subject: [PATCH 003/305] WIP tagged union message type API XXX NOTE XXX: this is a heavily modified commit from the original (ec226463) which was super out of date when rebased onto the current branch. I went through a manual conflict rework and removed all the legacy segments as well as rename-moved this original mod `tractor.msg.py` -> `tractor.msg/_old_msg.py`. Further the `NamespacePath` type def was discarded from this mod since it was from a super old version which was already moved to a `.msg.ptr` submod. As per original questions and discussion with `msgspec` author: - https://github.com/jcrist/msgspec/issues/25 - https://github.com/jcrist/msgspec/issues/140 this prototypes a new (but very naive) `msgspec.Struct` codec implementation which will be more filled out in the next commit. --- tractor/_ipc.py | 4 +- tractor/msg/_old_msg.py | 121 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 tractor/msg/_old_msg.py diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 5aafda3f..b1c2ccd2 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -165,7 +165,9 @@ class MsgpackTCPStream(MsgTransport): self.codec: MsgCodec = codec or MsgCodec() async def _iter_packets(self) -> AsyncGenerator[dict, None]: - '''Yield packets from the underlying stream. + ''' + Yield `bytes`-blob decoded packets from the underlying TCP + stream using the current task's `MsgCodec`. ''' import msgspec # noqa diff --git a/tractor/msg/_old_msg.py b/tractor/msg/_old_msg.py new file mode 100644 index 00000000..823228a3 --- /dev/null +++ b/tractor/msg/_old_msg.py @@ -0,0 +1,121 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Capability-based messaging specifications: or colloquially as "msgspecs". + +Includes our SCIPP (structured-con-inter-process-protocol) message type defs +and APIs for applying custom msgspec-sets for implementing un-protocol state machines. + +''' + +# TODO: integration with our ``enable_modules: list[str]`` caps sys. + +# ``pkgutil.resolve_name()`` internally uses +# ``importlib.import_module()`` which can be filtered by inserting +# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before +# entering the ``Actor._process_messages()`` loop). +# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 +# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules +# - https://stackoverflow.com/a/63320902 +# - https://docs.python.org/3/library/sys.html#sys.meta_path + +# the new "Implicit Namespace Packages" might be relevant? +# - https://www.python.org/dev/peps/pep-0420/ + +# add implicit serialized message type support so that paths can be +# handed directly to IPC primitives such as streams and `Portal.run()` +# calls: +# - via ``msgspec``: +# - https://jcristharif.com/msgspec/api.html#struct +# - https://jcristharif.com/msgspec/extending.html +# via ``msgpack-python``: +# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type + +from __future__ import annotations +from contextlib import contextmanager as cm +from typing import ( + Union, + Any, +) + +from msgspec import Struct +from msgspec.msgpack import ( + Encoder, + Decoder, +) + + +# LIFO codec stack that is appended when the user opens the +# ``configure_native_msgs()`` cm below to configure a new codec set +# which will be applied to all new (msgspec relevant) IPC transports +# that are spawned **after** the configure call is made. +_lifo_codecs: list[ + tuple[ + Encoder, + Decoder, + ], +] = [(Encoder(), Decoder())] + + +def get_msg_codecs() -> tuple[ + Encoder, + Decoder, +]: + ''' + Return the currently configured ``msgspec`` codec set. + + The defaults are defined above. + + ''' + global _lifo_codecs + return _lifo_codecs[-1] + + +@cm +def configure_native_msgs( + tagged_structs: list[Struct], +): + ''' + Push a codec set that will natively decode + tagged structs provied in ``tagged_structs`` + in all IPC transports and pop the codec on exit. + + ''' + global _lifo_codecs + + # See "tagged unions" docs: + # https://jcristharif.com/msgspec/structs.html#tagged-unions + + # "The quickest way to enable tagged unions is to set tag=True when + # defining every struct type in the union. In this case tag_field + # defaults to "type", and tag defaults to the struct class name + # (e.g. "Get")." + enc = Encoder() + + types_union = Union[tagged_structs[0]] | Any + for struct in tagged_structs[1:]: + types_union |= struct + + dec = Decoder(types_union) + + _lifo_codecs.append((enc, dec)) + try: + print("YOYOYOOYOYOYOY") + yield enc, dec + finally: + print("NONONONONON") + _lifo_codecs.pop() -- 2.34.1 From 68f170fde105d5a401dc9e955d3efd3409a575fa Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 7 Jul 2022 15:48:16 -0400 Subject: [PATCH 004/305] Re-think, `msgspec`-multi-typed msg dialogs The greasy details are strewn throughout a `msgspec` issue: https://github.com/jcrist/msgspec/issues/140 and specifically this code was mostly written as part of POC example in this comment: https://github.com/jcrist/msgspec/issues/140#issuecomment-1177850792 This work obviously pertains to our desire and prep for typed messaging and capabilities aware msg-oriented-protocols in #196. I added a "wants to have" method to `Context` showing how I think we could offer a pretty neat msg-type-set-as-capability-for-protocol system. XXX NOTE XXX: this commit was rewritten during a rebase from a very old version as per the prior commit. --- tractor/_streaming.py | 11 +++++ tractor/msg/_old_msg.py | 95 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index e0015fe4..90c33d31 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -533,6 +533,17 @@ class MsgStream(trio.abc.Channel): else: raise + # TODO: msg capability context api1 + # @acm + # async def enable_msg_caps( + # self, + # msg_subtypes: Union[ + # list[list[Struct]], + # Protocol, # hypothetical type that wraps a msg set + # ], + # ) -> tuple[Callable, Callable]: # payload enc, dec pair + # ... + def stream(func: Callable) -> Callable: ''' diff --git a/tractor/msg/_old_msg.py b/tractor/msg/_old_msg.py index 823228a3..240b2eca 100644 --- a/tractor/msg/_old_msg.py +++ b/tractor/msg/_old_msg.py @@ -48,11 +48,12 @@ and APIs for applying custom msgspec-sets for implementing un-protocol state mac from __future__ import annotations from contextlib import contextmanager as cm from typing import ( - Union, Any, + Optional, + Union, ) -from msgspec import Struct +from msgspec import Struct, Raw from msgspec.msgpack import ( Encoder, Decoder, @@ -95,8 +96,6 @@ def configure_native_msgs( in all IPC transports and pop the codec on exit. ''' - global _lifo_codecs - # See "tagged unions" docs: # https://jcristharif.com/msgspec/structs.html#tagged-unions @@ -119,3 +118,91 @@ def configure_native_msgs( finally: print("NONONONONON") _lifo_codecs.pop() + + +class Header(Struct, tag=True): + ''' + A msg header which defines payload properties + + ''' + uid: str + msgtype: Optional[str] = None + + +class Msg(Struct, tag=True): + ''' + The "god" msg type, a box for task level msg types. + + ''' + header: Header + payload: Raw + + +_root_dec = Decoder(Msg) +_root_enc = Encoder() + +# sub-decoders for retreiving embedded +# payload data and decoding to a sender +# side defined (struct) type. +_subdecs: dict[ + Optional[str], + Decoder] = { + None: Decoder(Any), +} + + +@cm +def enable_context( + msg_subtypes: list[list[Struct]] +) -> Decoder: + + for types in msg_subtypes: + first = types[0] + + # register using the default tag_field of "type" + # which seems to map to the class "name". + tags = [first.__name__] + + # create a tagged union decoder for this type set + type_union = Union[first] + for typ in types[1:]: + type_union |= typ + tags.append(typ.__name__) + + dec = Decoder(type_union) + + # register all tags for this union sub-decoder + for tag in tags: + _subdecs[tag] = dec + try: + yield dec + finally: + for tag in tags: + _subdecs.pop(tag) + + +def decmsg(msg: Msg) -> Any: + msg = _root_dec.decode(msg) + tag_field = msg.header.msgtype + dec = _subdecs[tag_field] + return dec.decode(msg.payload) + + +def encmsg( + dialog_id: str | int, + payload: Any, +) -> Msg: + + tag_field = None + + plbytes = _root_enc.encode(payload) + if b'type' in plbytes: + assert isinstance(payload, Struct) + tag_field = type(payload).__name__ + payload = Raw(plbytes) + + msg = Msg( + Header(dialog_id, tag_field), + payload, + ) + return _root_enc.encode(msg) -- 2.34.1 From 79c71bfbaf20751c2cf399d3a95021786b2d2686 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 26 Mar 2024 17:47:55 -0400 Subject: [PATCH 005/305] Merge original content from PR #311 into `.msg.types` for now --- tractor/msg/_old_msg.py | 208 ---------------------------------------- tractor/msg/types.py | 185 ++++++++++++++++++++++++++++++++++- 2 files changed, 182 insertions(+), 211 deletions(-) delete mode 100644 tractor/msg/_old_msg.py diff --git a/tractor/msg/_old_msg.py b/tractor/msg/_old_msg.py deleted file mode 100644 index 240b2eca..00000000 --- a/tractor/msg/_old_msg.py +++ /dev/null @@ -1,208 +0,0 @@ -# tractor: structured concurrent "actors". -# Copyright 2018-eternity Tyler Goodlet. - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. - -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -''' -Capability-based messaging specifications: or colloquially as "msgspecs". - -Includes our SCIPP (structured-con-inter-process-protocol) message type defs -and APIs for applying custom msgspec-sets for implementing un-protocol state machines. - -''' - -# TODO: integration with our ``enable_modules: list[str]`` caps sys. - -# ``pkgutil.resolve_name()`` internally uses -# ``importlib.import_module()`` which can be filtered by inserting -# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before -# entering the ``Actor._process_messages()`` loop). -# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 -# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules -# - https://stackoverflow.com/a/63320902 -# - https://docs.python.org/3/library/sys.html#sys.meta_path - -# the new "Implicit Namespace Packages" might be relevant? -# - https://www.python.org/dev/peps/pep-0420/ - -# add implicit serialized message type support so that paths can be -# handed directly to IPC primitives such as streams and `Portal.run()` -# calls: -# - via ``msgspec``: -# - https://jcristharif.com/msgspec/api.html#struct -# - https://jcristharif.com/msgspec/extending.html -# via ``msgpack-python``: -# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type - -from __future__ import annotations -from contextlib import contextmanager as cm -from typing import ( - Any, - Optional, - Union, -) - -from msgspec import Struct, Raw -from msgspec.msgpack import ( - Encoder, - Decoder, -) - - -# LIFO codec stack that is appended when the user opens the -# ``configure_native_msgs()`` cm below to configure a new codec set -# which will be applied to all new (msgspec relevant) IPC transports -# that are spawned **after** the configure call is made. -_lifo_codecs: list[ - tuple[ - Encoder, - Decoder, - ], -] = [(Encoder(), Decoder())] - - -def get_msg_codecs() -> tuple[ - Encoder, - Decoder, -]: - ''' - Return the currently configured ``msgspec`` codec set. - - The defaults are defined above. - - ''' - global _lifo_codecs - return _lifo_codecs[-1] - - -@cm -def configure_native_msgs( - tagged_structs: list[Struct], -): - ''' - Push a codec set that will natively decode - tagged structs provied in ``tagged_structs`` - in all IPC transports and pop the codec on exit. - - ''' - # See "tagged unions" docs: - # https://jcristharif.com/msgspec/structs.html#tagged-unions - - # "The quickest way to enable tagged unions is to set tag=True when - # defining every struct type in the union. In this case tag_field - # defaults to "type", and tag defaults to the struct class name - # (e.g. "Get")." - enc = Encoder() - - types_union = Union[tagged_structs[0]] | Any - for struct in tagged_structs[1:]: - types_union |= struct - - dec = Decoder(types_union) - - _lifo_codecs.append((enc, dec)) - try: - print("YOYOYOOYOYOYOY") - yield enc, dec - finally: - print("NONONONONON") - _lifo_codecs.pop() - - -class Header(Struct, tag=True): - ''' - A msg header which defines payload properties - - ''' - uid: str - msgtype: Optional[str] = None - - -class Msg(Struct, tag=True): - ''' - The "god" msg type, a box for task level msg types. - - ''' - header: Header - payload: Raw - - -_root_dec = Decoder(Msg) -_root_enc = Encoder() - -# sub-decoders for retreiving embedded -# payload data and decoding to a sender -# side defined (struct) type. -_subdecs: dict[ - Optional[str], - Decoder] = { - None: Decoder(Any), -} - - -@cm -def enable_context( - msg_subtypes: list[list[Struct]] -) -> Decoder: - - for types in msg_subtypes: - first = types[0] - - # register using the default tag_field of "type" - # which seems to map to the class "name". - tags = [first.__name__] - - # create a tagged union decoder for this type set - type_union = Union[first] - for typ in types[1:]: - type_union |= typ - tags.append(typ.__name__) - - dec = Decoder(type_union) - - # register all tags for this union sub-decoder - for tag in tags: - _subdecs[tag] = dec - try: - yield dec - finally: - for tag in tags: - _subdecs.pop(tag) - - -def decmsg(msg: Msg) -> Any: - msg = _root_dec.decode(msg) - tag_field = msg.header.msgtype - dec = _subdecs[tag_field] - return dec.decode(msg.payload) - - -def encmsg( - dialog_id: str | int, - payload: Any, -) -> Msg: - - tag_field = None - - plbytes = _root_enc.encode(payload) - if b'type' in plbytes: - assert isinstance(payload, Struct) - tag_field = type(payload).__name__ - payload = Raw(plbytes) - - msg = Msg( - Header(dialog_id, tag_field), - payload, - ) - return _root_enc.encode(msg) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 3ceff845..e457370e 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -21,19 +21,27 @@ types. ''' from __future__ import annotations from collections import UserList -from pprint import ( - saferepr, -) +from contextlib import contextmanager as cm from typing import ( Any, Iterator, + Optional, + Union, ) from msgspec import ( msgpack, + Raw, Struct as _Struct, structs, ) +from msgspec.msgpack import ( + Encoder, + Decoder, +) +from pprint import ( + saferepr, +) # TODO: auto-gen type sig for input func both for # type-msgs and logging of RPC tasks? @@ -268,3 +276,174 @@ class Struct( )) return diffs + +# ------ - ------ +# +# TODO: integration with our ``enable_modules: list[str]`` caps sys. +# +# ``pkgutil.resolve_name()`` internally uses +# ``importlib.import_module()`` which can be filtered by inserting +# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before +# entering the ``Actor._process_messages()`` loop). +# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 +# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules +# - https://stackoverflow.com/a/63320902 +# - https://docs.python.org/3/library/sys.html#sys.meta_path + +# the new "Implicit Namespace Packages" might be relevant? +# - https://www.python.org/dev/peps/pep-0420/ + +# add implicit serialized message type support so that paths can be +# handed directly to IPC primitives such as streams and `Portal.run()` +# calls: +# - via ``msgspec``: +# - https://jcristharif.com/msgspec/api.html#struct +# - https://jcristharif.com/msgspec/extending.html +# via ``msgpack-python``: +# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type +# LIFO codec stack that is appended when the user opens the +# ``configure_native_msgs()`` cm below to configure a new codec set +# which will be applied to all new (msgspec relevant) IPC transports +# that are spawned **after** the configure call is made. +_lifo_codecs: list[ + tuple[ + Encoder, + Decoder, + ], +] = [(Encoder(), Decoder())] + + +def get_msg_codecs() -> tuple[ + Encoder, + Decoder, +]: + ''' + Return the currently configured ``msgspec`` codec set. + + The defaults are defined above. + + ''' + global _lifo_codecs + return _lifo_codecs[-1] + + +@cm +def configure_native_msgs( + tagged_structs: list[_Struct], +): + ''' + Push a codec set that will natively decode + tagged structs provied in ``tagged_structs`` + in all IPC transports and pop the codec on exit. + + ''' + # See "tagged unions" docs: + # https://jcristharif.com/msgspec/structs.html#tagged-unions + + # "The quickest way to enable tagged unions is to set tag=True when + # defining every struct type in the union. In this case tag_field + # defaults to "type", and tag defaults to the struct class name + # (e.g. "Get")." + enc = Encoder() + + types_union = Union[tagged_structs[0]] | Any + for struct in tagged_structs[1:]: + types_union |= struct + + dec = Decoder(types_union) + + _lifo_codecs.append((enc, dec)) + try: + print("YOYOYOOYOYOYOY") + yield enc, dec + finally: + print("NONONONONON") + _lifo_codecs.pop() + + +class Header(_Struct, tag=True): + ''' + A msg header which defines payload properties + + ''' + uid: str + msgtype: Optional[str] = None + + +class Msg(_Struct, tag=True): + ''' + The "god" msg type, a box for task level msg types. + + ''' + header: Header + payload: Raw + + +_root_dec = Decoder(Msg) +_root_enc = Encoder() + +# sub-decoders for retreiving embedded +# payload data and decoding to a sender +# side defined (struct) type. +_subdecs: dict[ + Optional[str], + Decoder] = { + None: Decoder(Any), +} + + +@cm +def enable_context( + msg_subtypes: list[list[_Struct]] +) -> Decoder: + + for types in msg_subtypes: + first = types[0] + + # register using the default tag_field of "type" + # which seems to map to the class "name". + tags = [first.__name__] + + # create a tagged union decoder for this type set + type_union = Union[first] + for typ in types[1:]: + type_union |= typ + tags.append(typ.__name__) + + dec = Decoder(type_union) + + # register all tags for this union sub-decoder + for tag in tags: + _subdecs[tag] = dec + try: + yield dec + finally: + for tag in tags: + _subdecs.pop(tag) + + +def decmsg(msg: Msg) -> Any: + msg = _root_dec.decode(msg) + tag_field = msg.header.msgtype + dec = _subdecs[tag_field] + return dec.decode(msg.payload) + + +def encmsg( + dialog_id: str | int, + payload: Any, +) -> Msg: + + tag_field = None + + plbytes = _root_enc.encode(payload) + if b'type' in plbytes: + assert isinstance(payload, _Struct) + tag_field = type(payload).__name__ + payload = Raw(plbytes) + + msg = Msg( + Header(dialog_id, tag_field), + payload, + ) + return _root_enc.encode(msg) -- 2.34.1 From b589bef1b6bb67ff678e0c47c07fef83956977b8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 26 Mar 2024 18:27:55 -0400 Subject: [PATCH 006/305] Move the pretty-`Struct` stuff to a `.pretty_struct` Leave all the proto native struct-msg stuff in `.types` since i'm thinking it's the right name for the mod that will hold all the built-in SCIPP msgspecs longer run. Obvi the naive codec stack stuff needs to be cleaned out/up and anything useful moved into `._codec` ;) --- tractor/msg/__init__.py | 3 +- tractor/msg/_codec.py | 2 +- tractor/msg/pretty_struct.py | 269 ++++++++++++++++++++++++++++++++++ tractor/msg/types.py | 277 ++--------------------------------- 4 files changed, 286 insertions(+), 265 deletions(-) create mode 100644 tractor/msg/pretty_struct.py diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index e2296788..b5c261cc 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -21,11 +21,10 @@ Built-in messaging patterns, types, APIs and helpers. from .ptr import ( NamespacePath as NamespacePath, ) -from .types import ( +from .pretty_struct import ( Struct as Struct, ) from ._codec import ( - _def_msgspec_codec as _def_msgspec_codec, _ctxvar_MsgCodec as _ctxvar_MsgCodec, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 0da454ad..c26de8d4 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -47,7 +47,7 @@ from types import ModuleType import msgspec from msgspec import msgpack -from .types import Struct +from .pretty_struct import Struct # TODO: API changes towards being interchange lib agnostic! diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py new file mode 100644 index 00000000..143fc7a4 --- /dev/null +++ b/tractor/msg/pretty_struct.py @@ -0,0 +1,269 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Prettified version of `msgspec.Struct` for easier console grokin. + +''' +from __future__ import annotations +from collections import UserList +from typing import ( + Any, + Iterator, +) + +from msgspec import ( + msgpack, + Struct as _Struct, + structs, +) +from pprint import ( + saferepr, +) + +# TODO: auto-gen type sig for input func both for +# type-msgs and logging of RPC tasks? +# taken and modified from: +# https://stackoverflow.com/a/57110117 +# import inspect +# from typing import List + +# def my_function(input_1: str, input_2: int) -> list[int]: +# pass + +# def types_of(func): +# specs = inspect.getfullargspec(func) +# return_type = specs.annotations['return'] +# input_types = [t.__name__ for s, t in specs.annotations.items() if s != 'return'] +# return f'{func.__name__}({": ".join(input_types)}) -> {return_type}' + +# types_of(my_function) + + +class DiffDump(UserList): + ''' + Very simple list delegator that repr() dumps (presumed) tuple + elements of the form `tuple[str, Any, Any]` in a nice + multi-line readable form for analyzing `Struct` diffs. + + ''' + def __repr__(self) -> str: + if not len(self): + return super().__repr__() + + # format by displaying item pair's ``repr()`` on multiple, + # indented lines such that they are more easily visually + # comparable when printed to console when printed to + # console. + repstr: str = '[\n' + for k, left, right in self: + repstr += ( + f'({k},\n' + f'\t{repr(left)},\n' + f'\t{repr(right)},\n' + ')\n' + ) + repstr += ']\n' + return repstr + + +class Struct( + _Struct, + + # https://jcristharif.com/msgspec/structs.html#tagged-unions + # tag='pikerstruct', + # tag=True, +): + ''' + A "human friendlier" (aka repl buddy) struct subtype. + + ''' + def _sin_props(self) -> Iterator[ + tuple[ + structs.FieldIinfo, + str, + Any, + ] + ]: + ''' + Iterate over all non-@property fields of this struct. + + ''' + fi: structs.FieldInfo + for fi in structs.fields(self): + key: str = fi.name + val: Any = getattr(self, key) + yield fi, key, val + + def to_dict( + self, + include_non_members: bool = True, + + ) -> dict: + ''' + Like it sounds.. direct delegation to: + https://jcristharif.com/msgspec/api.html#msgspec.structs.asdict + + BUT, by default we pop all non-member (aka not defined as + struct fields) fields by default. + + ''' + asdict: dict = structs.asdict(self) + if include_non_members: + return asdict + + # only return a dict of the struct members + # which were provided as input, NOT anything + # added as type-defined `@property` methods! + sin_props: dict = {} + fi: structs.FieldInfo + for fi, k, v in self._sin_props(): + sin_props[k] = asdict[k] + + return sin_props + + def pformat( + self, + field_indent: int = 2, + indent: int = 0, + + ) -> str: + ''' + Recursion-safe `pprint.pformat()` style formatting of + a `msgspec.Struct` for sane reading by a human using a REPL. + + ''' + # global whitespace indent + ws: str = ' '*indent + + # field whitespace indent + field_ws: str = ' '*(field_indent + indent) + + # qtn: str = ws + self.__class__.__qualname__ + qtn: str = self.__class__.__qualname__ + + obj_str: str = '' # accumulator + fi: structs.FieldInfo + k: str + v: Any + for fi, k, v in self._sin_props(): + + # TODO: how can we prefer `Literal['option1', 'option2, + # ..]` over .__name__ == `Literal` but still get only the + # latter for simple types like `str | int | None` etc..? + ft: type = fi.type + typ_name: str = getattr(ft, '__name__', str(ft)) + + # recurse to get sub-struct's `.pformat()` output Bo + if isinstance(v, Struct): + val_str: str = v.pformat( + indent=field_indent + indent, + field_indent=indent + field_indent, + ) + + else: # the `pprint` recursion-safe format: + # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr + val_str: str = saferepr(v) + + # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! + obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') + + return ( + f'{qtn}(\n' + f'{obj_str}' + f'{ws})' + ) + + # TODO: use a pprint.PrettyPrinter instance around ONLY rendering + # inside a known tty? + # def __repr__(self) -> str: + # ... + + # __str__ = __repr__ = pformat + __repr__ = pformat + + def copy( + self, + update: dict | None = None, + + ) -> Struct: + ''' + Validate-typecast all self defined fields, return a copy of + us with all such fields. + + NOTE: This is kinda like the default behaviour in + `pydantic.BaseModel` except a copy of the object is + returned making it compat with `frozen=True`. + + ''' + if update: + for k, v in update.items(): + setattr(self, k, v) + + # NOTE: roundtrip serialize to validate + # - enode to msgpack binary format, + # - decode that back to a struct. + return msgpack.Decoder(type=type(self)).decode( + msgpack.Encoder().encode(self) + ) + + def typecast( + self, + + # TODO: allow only casting a named subset? + # fields: set[str] | None = None, + + ) -> None: + ''' + Cast all fields using their declared type annotations + (kinda like what `pydantic` does by default). + + NOTE: this of course won't work on frozen types, use + ``.copy()`` above in such cases. + + ''' + # https://jcristharif.com/msgspec/api.html#msgspec.structs.fields + fi: structs.FieldInfo + for fi in structs.fields(self): + setattr( + self, + fi.name, + fi.type(getattr(self, fi.name)), + ) + + def __sub__( + self, + other: Struct, + + ) -> DiffDump[tuple[str, Any, Any]]: + ''' + Compare fields/items key-wise and return a ``DiffDump`` + for easy visual REPL comparison B) + + ''' + diffs: DiffDump[tuple[str, Any, Any]] = DiffDump() + for fi in structs.fields(self): + attr_name: str = fi.name + ours: Any = getattr(self, attr_name) + theirs: Any = getattr(other, attr_name) + if ours != theirs: + diffs.append(( + attr_name, + ours, + theirs, + )) + + return diffs diff --git a/tractor/msg/types.py b/tractor/msg/types.py index e457370e..d2fb0877 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -20,12 +20,9 @@ types. ''' from __future__ import annotations -from collections import UserList from contextlib import contextmanager as cm from typing import ( Any, - Iterator, - Optional, Union, ) @@ -33,252 +30,8 @@ from msgspec import ( msgpack, Raw, Struct as _Struct, - structs, -) -from msgspec.msgpack import ( - Encoder, - Decoder, -) -from pprint import ( - saferepr, ) -# TODO: auto-gen type sig for input func both for -# type-msgs and logging of RPC tasks? -# taken and modified from: -# https://stackoverflow.com/a/57110117 -# import inspect -# from typing import List - -# def my_function(input_1: str, input_2: int) -> list[int]: -# pass - -# def types_of(func): -# specs = inspect.getfullargspec(func) -# return_type = specs.annotations['return'] -# input_types = [t.__name__ for s, t in specs.annotations.items() if s != 'return'] -# return f'{func.__name__}({": ".join(input_types)}) -> {return_type}' - -# types_of(my_function) - - -class DiffDump(UserList): - ''' - Very simple list delegator that repr() dumps (presumed) tuple - elements of the form `tuple[str, Any, Any]` in a nice - multi-line readable form for analyzing `Struct` diffs. - - ''' - def __repr__(self) -> str: - if not len(self): - return super().__repr__() - - # format by displaying item pair's ``repr()`` on multiple, - # indented lines such that they are more easily visually - # comparable when printed to console when printed to - # console. - repstr: str = '[\n' - for k, left, right in self: - repstr += ( - f'({k},\n' - f'\t{repr(left)},\n' - f'\t{repr(right)},\n' - ')\n' - ) - repstr += ']\n' - return repstr - - -class Struct( - _Struct, - - # https://jcristharif.com/msgspec/structs.html#tagged-unions - # tag='pikerstruct', - # tag=True, -): - ''' - A "human friendlier" (aka repl buddy) struct subtype. - - ''' - def _sin_props(self) -> Iterator[ - tuple[ - structs.FieldIinfo, - str, - Any, - ] - ]: - ''' - Iterate over all non-@property fields of this struct. - - ''' - fi: structs.FieldInfo - for fi in structs.fields(self): - key: str = fi.name - val: Any = getattr(self, key) - yield fi, key, val - - def to_dict( - self, - include_non_members: bool = True, - - ) -> dict: - ''' - Like it sounds.. direct delegation to: - https://jcristharif.com/msgspec/api.html#msgspec.structs.asdict - - BUT, by default we pop all non-member (aka not defined as - struct fields) fields by default. - - ''' - asdict: dict = structs.asdict(self) - if include_non_members: - return asdict - - # only return a dict of the struct members - # which were provided as input, NOT anything - # added as type-defined `@property` methods! - sin_props: dict = {} - fi: structs.FieldInfo - for fi, k, v in self._sin_props(): - sin_props[k] = asdict[k] - - return sin_props - - def pformat( - self, - field_indent: int = 2, - indent: int = 0, - - ) -> str: - ''' - Recursion-safe `pprint.pformat()` style formatting of - a `msgspec.Struct` for sane reading by a human using a REPL. - - ''' - # global whitespace indent - ws: str = ' '*indent - - # field whitespace indent - field_ws: str = ' '*(field_indent + indent) - - # qtn: str = ws + self.__class__.__qualname__ - qtn: str = self.__class__.__qualname__ - - obj_str: str = '' # accumulator - fi: structs.FieldInfo - k: str - v: Any - for fi, k, v in self._sin_props(): - - # TODO: how can we prefer `Literal['option1', 'option2, - # ..]` over .__name__ == `Literal` but still get only the - # latter for simple types like `str | int | None` etc..? - ft: type = fi.type - typ_name: str = getattr(ft, '__name__', str(ft)) - - # recurse to get sub-struct's `.pformat()` output Bo - if isinstance(v, Struct): - val_str: str = v.pformat( - indent=field_indent + indent, - field_indent=indent + field_indent, - ) - - else: # the `pprint` recursion-safe format: - # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - val_str: str = saferepr(v) - - # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! - obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') - - return ( - f'{qtn}(\n' - f'{obj_str}' - f'{ws})' - ) - - # TODO: use a pprint.PrettyPrinter instance around ONLY rendering - # inside a known tty? - # def __repr__(self) -> str: - # ... - - # __str__ = __repr__ = pformat - __repr__ = pformat - - def copy( - self, - update: dict | None = None, - - ) -> Struct: - ''' - Validate-typecast all self defined fields, return a copy of - us with all such fields. - - NOTE: This is kinda like the default behaviour in - `pydantic.BaseModel` except a copy of the object is - returned making it compat with `frozen=True`. - - ''' - if update: - for k, v in update.items(): - setattr(self, k, v) - - # NOTE: roundtrip serialize to validate - # - enode to msgpack binary format, - # - decode that back to a struct. - return msgpack.Decoder(type=type(self)).decode( - msgpack.Encoder().encode(self) - ) - - def typecast( - self, - - # TODO: allow only casting a named subset? - # fields: set[str] | None = None, - - ) -> None: - ''' - Cast all fields using their declared type annotations - (kinda like what `pydantic` does by default). - - NOTE: this of course won't work on frozen types, use - ``.copy()`` above in such cases. - - ''' - # https://jcristharif.com/msgspec/api.html#msgspec.structs.fields - fi: structs.FieldInfo - for fi in structs.fields(self): - setattr( - self, - fi.name, - fi.type(getattr(self, fi.name)), - ) - - def __sub__( - self, - other: Struct, - - ) -> DiffDump[tuple[str, Any, Any]]: - ''' - Compare fields/items key-wise and return a ``DiffDump`` - for easy visual REPL comparison B) - - ''' - diffs: DiffDump[tuple[str, Any, Any]] = DiffDump() - for fi in structs.fields(self): - attr_name: str = fi.name - ours: Any = getattr(self, attr_name) - theirs: Any = getattr(other, attr_name) - if ours != theirs: - diffs.append(( - attr_name, - ours, - theirs, - )) - - return diffs - -# ------ - ------ -# # TODO: integration with our ``enable_modules: list[str]`` caps sys. # # ``pkgutil.resolve_name()`` internally uses @@ -307,15 +60,15 @@ class Struct( # that are spawned **after** the configure call is made. _lifo_codecs: list[ tuple[ - Encoder, - Decoder, + msgpack.Encoder, + msgpack.Decoder, ], -] = [(Encoder(), Decoder())] +] = [(msgpack.Encoder(), msgpack.Decoder())] def get_msg_codecs() -> tuple[ - Encoder, - Decoder, + msgpack.Encoder, + msgpack.Decoder, ]: ''' Return the currently configured ``msgspec`` codec set. @@ -344,13 +97,13 @@ def configure_native_msgs( # defining every struct type in the union. In this case tag_field # defaults to "type", and tag defaults to the struct class name # (e.g. "Get")." - enc = Encoder() + enc = msgpack.Encoder() types_union = Union[tagged_structs[0]] | Any for struct in tagged_structs[1:]: types_union |= struct - dec = Decoder(types_union) + dec = msgpack.Decoder(types_union) _lifo_codecs.append((enc, dec)) try: @@ -367,7 +120,7 @@ class Header(_Struct, tag=True): ''' uid: str - msgtype: Optional[str] = None + msgtype: str|None = None class Msg(_Struct, tag=True): @@ -379,23 +132,23 @@ class Msg(_Struct, tag=True): payload: Raw -_root_dec = Decoder(Msg) -_root_enc = Encoder() +_root_dec = msgpack.Decoder(Msg) +_root_enc = msgpack.Encoder() # sub-decoders for retreiving embedded # payload data and decoding to a sender # side defined (struct) type. _subdecs: dict[ - Optional[str], - Decoder] = { - None: Decoder(Any), + str|None, + msgpack.Decoder] = { + None: msgpack.Decoder(Any), } @cm def enable_context( msg_subtypes: list[list[_Struct]] -) -> Decoder: +) -> msgpack.Decoder: for types in msg_subtypes: first = types[0] @@ -410,7 +163,7 @@ def enable_context( type_union |= typ tags.append(typ.__name__) - dec = Decoder(type_union) + dec = msgpack.Decoder(type_union) # register all tags for this union sub-decoder for tag in tags: -- 2.34.1 From fe9406be9ba07865f6711e8dcec10d787126b7b7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 28 Mar 2024 10:45:01 -0400 Subject: [PATCH 007/305] Init def of "SC shuttle prot" with "msg-spec-limiting" As per the long outstanding GH issue this starts our rigorous journey into an attempt at a type-safe, cross-actor SC, IPC protocol Bo boop -> https://github.com/goodboy/tractor/issues/36 The idea is to "formally" define our SC "shuttle (dialog) protocol" by specifying a new `.msg.types.Msg` subtype-set which can fully encapsulate all IPC msg schemas needed in order to accomplish cross-process SC! The msg set deviated a little in terms of (type) names from the existing `dict`-msgs currently used in the runtime impl but, I think the name changes are much better in terms of explicitly representing the internal semantics of the actor runtime machinery/subsystems and the IPC-msg-dialog required for SC enforced RPC. ------ - ------ In cursory, the new formal msgs-spec includes the following msg-subtypes of a new top-level `Msg` boxing type (that holds the base field schema for all msgs): - `Start` to request RPC task scheduling by passing a `FuncSpec` payload (to replace the currently used `{'cmd': ... }` dict msg impl) - `StartAck` to allow the RPC task callee-side to report a `IpcCtxSpec` payload immediately back to the caller (currently responded naively via a `{'functype': ... }` msg) - `Started` to deliver the first value from `Context.started()` (instead of the existing `{'started': ... }`) - `Yield` to shuttle `MsgStream.send()`-ed values (instead of our `{'yield': ... }`) - `Stop` to terminate a `Context.open_stream()` session/block (over `{'stop': True }`) - `Return` to deliver the final value from the `Actor.start_remote_task()` (which is a `{'return': ... }`) - `Error` to box `RemoteActorError` exceptions via a `.pld: ErrorData` payload, planned to replace/extend the current `RemoteActorError.msgdata` mechanism internal to `._exceptions.pack/unpack_error()` The new `tractor.msg.types` includes all the above msg defs as well an API for rendering a "payload type specification" using a `payload_type_spec: Union[Type]` that can be passed to `msgspec.msgpack.Decoder(type=payload_type_spec)`. This ensures that (for a subset of the above msg set) `Msg.pld: PayloadT` data is type-parameterized using `msgspec`'s new `Generic[PayloadT]` field support and thus enables providing for an API where IPC `Context` dialogs can strictly define the allowed payload-datatype-set via type union! Iow, this is the foundation for supporting `Channel`/`Context`/`MsgStream` IPC primitives which are type checked/safe as desired in GH issue: - https://github.com/goodboy/tractor/issues/365 Misc notes on current impl(s) status: ------ - ------ - add a `.msg.types.mk_msg_spec()` which uses the new `msgspec` support for `class MyStruct[Struct, Generic[T]]` parameterize-able fields and delivers our boxing SC-msg-(sub)set with the desired `payload_types` applied to `.pld`: - https://jcristharif.com/msgspec/supported-types.html#generic-types - as a note this impl seems to need to use `type.new_class()` dynamic subtype generation, though i don't really get *why* still.. but without that the `msgspec.msgpack.Decoder` doesn't seem to reject `.pld` limited `Msg` subtypes as demonstrated in the new test. - around this ^ add a `.msg._codec.limit_msg_spec()` cm which exposes this payload type limiting API such that it can be applied per task via a `MsgCodec` in app code. - the orig approach in https://github.com/goodboy/tractor/pull/311 was the idea of making payload fields `.pld: Raw` wherein we could have per-field/sub-msg decoders dynamically loaded depending on the particular application-layer schema in use. I don't want to lose the idea of this since I think it might be useful for an idea I have about capability-based-fields(-sharing, maybe using field-subset encryption?), and as such i've kept the (ostensibly) working impls in TODO-comments in `.msg._codec` wherein maybe we can add a `MsgCodec._payload_decs: dict` table for this later on. |_ also left in the `.msg.types.enc/decmsg()` impls but renamed as `enc/dec_payload()` (but reworked to not rely on the lifo codec stack tables; now removed) such that we can prolly move them to `MsgCodec` methods in the future. - add an unused `._codec.mk_tagged_union_dec()` helper which was originally factored out the #311 proto-code but didn't end up working as desired with the new parameterized generic fields approach (now in `msg.types.mk_msg_spec()`) Testing/deps work: ------ - ------ - new `test_limit_msgspec()` which ensures all the `.types` content is correct but without using the wrapping APIs in `._codec`; i.e. using a in-line `Decoder` instead of a `MsgCodec`. - pin us to `msgspec>=0.18.5` which has the needed generic-types support (which took me way too long yester to figure out when implementing all this XD)! --- setup.py | 2 +- tests/test_caps_msging.py | 181 ++++++++++++++- tractor/msg/_codec.py | 144 +++++++++++- tractor/msg/types.py | 474 ++++++++++++++++++++++++++------------ 4 files changed, 645 insertions(+), 156 deletions(-) diff --git a/setup.py b/setup.py index 66b2622d..68ed7a94 100755 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ setup( 'wrapt', # IPC serialization - 'msgspec', + 'msgspec>=0.18.5', # debug mode REPL 'pdbp', diff --git a/tests/test_caps_msging.py b/tests/test_caps_msging.py index f659cb13..b101c1e0 100644 --- a/tests/test_caps_msging.py +++ b/tests/test_caps_msging.py @@ -6,12 +6,22 @@ B~) ''' from typing import ( Any, + _GenericAlias, Type, + Union, ) from contextvars import ( Context, ) +# from inspect import Parameter +from msgspec import ( + structs, + msgpack, + # defstruct, + Struct, + ValidationError, +) import tractor from tractor.msg import ( _def_msgspec_codec, @@ -23,6 +33,12 @@ from tractor.msg import ( apply_codec, current_msgspec_codec, ) +from tractor.msg.types import ( + PayloadT, + Msg, + # Started, + mk_msg_spec, +) import trio # TODO: wrap these into `._codec` such that user can just pass @@ -54,7 +70,7 @@ def mk_custom_codec() -> MsgCodec: # apply custom hooks and set a `Decoder` which only # loads `NamespacePath` types. nsp_codec: MsgCodec = mk_codec( - dec_types=NamespacePath, + ipc_msg_spec=NamespacePath, enc_hook=enc_hook, dec_hook=dec_hook, ) @@ -196,3 +212,166 @@ def test_codec_hooks_mod(): await p.cancel_actor() trio.run(main) + + +def chk_pld_type( + generic: Msg|_GenericAlias, + payload_type: Type[Struct]|Any, + pld: Any, + +) -> bool: + + roundtrip: bool = False + pld_val_type: Type = type(pld) + + # gen_paramed: _GenericAlias = generic[payload_type] + # TODO: verify that the overridden subtypes + # DO NOT have modified type-annots from original! + # 'Start', .pld: FuncSpec + # 'StartAck', .pld: IpcCtxSpec + # 'Stop', .pld: UNSEt + # 'Error', .pld: ErrorData + # for typedef in ( + # [gen_paramed] + # + + + # # type-var should always be set for these sub-types + # # as well! + # Msg.__subclasses__() + # ): + # if typedef.__name__ not in [ + # 'Msg', + # 'Started', + # 'Yield', + # 'Return', + # ]: + # continue + # payload_type: Type[Struct] = CustomPayload + + # TODO: can remove all this right!? + # + # when parameterized (like `Msg[Any]`) then + # we expect an alias as input. + # if isinstance(generic, _GenericAlias): + # assert payload_type in generic.__args__ + # else: + # assert PayloadType in generic.__parameters__ + # pld_param: Parameter = generic.__signature__.parameters['pld'] + # assert pld_param.annotation is PayloadType + + type_spec: Union[Type[Struct]] + msg_types: list[Msg[payload_type]] + ( + type_spec, + msg_types, + ) = mk_msg_spec( + payload_type=payload_type, + ) + enc = msgpack.Encoder() + dec = msgpack.Decoder( + type=type_spec, # like `Msg[Any]` + ) + + # verify the boxed-type for all variable payload-type msgs. + for typedef in msg_types: + + pld_field = structs.fields(typedef)[1] + assert pld_field.type in {payload_type, PayloadT} + # TODO: does this need to work to get all subtypes to + # adhere? + assert pld_field.type is payload_type + + kwargs: dict[str, Any] = { + 'cid': '666', + 'pld': pld, + } + enc_msg = typedef(**kwargs) + + wire_bytes: bytes = enc.encode(enc_msg) + + try: + dec_msg = dec.decode(wire_bytes) + assert dec_msg.pld == pld + assert (roundtrip := (dec_msg == enc_msg)) + + except ValidationError as ve: + # breakpoint() + if pld_val_type is payload_type: + raise ValueError( + 'Got `ValidationError` despite type-var match!?\n' + f'pld_val_type: {pld_val_type}\n' + f'payload_type: {payload_type}\n' + ) from ve + + else: + # ow we good cuz the pld spec mismatched. + print( + 'Got expected `ValidationError` since,\n' + f'{pld_val_type} is not {payload_type}\n' + ) + else: + if ( + pld_val_type is not payload_type + and payload_type is not Any + ): + raise ValueError( + 'DID NOT `ValidationError` despite expected type match!?\n' + f'pld_val_type: {pld_val_type}\n' + f'payload_type: {payload_type}\n' + ) + + return roundtrip + + + +def test_limit_msgspec(): + + async def main(): + async with tractor.open_root_actor( + debug_mode=True + ): + + # ensure we can round-trip a boxing `Msg` + assert chk_pld_type( + Msg, + Any, + None, + ) + + # TODO: don't need this any more right since + # `msgspec>=0.15` has the nice generics stuff yah?? + # + # manually override the type annot of the payload + # field and ensure it propagates to all msg-subtypes. + # Msg.__annotations__['pld'] = Any + + # verify that a mis-typed payload value won't decode + assert not chk_pld_type( + Msg, + int, + pld='doggy', + ) + + # parametrize the boxed `.pld` type as a custom-struct + # and ensure that parametrization propagates + # to all payload-msg-spec-able subtypes! + class CustomPayload(Struct): + name: str + value: Any + + assert not chk_pld_type( + Msg, + CustomPayload, + pld='doggy', + ) + + assert chk_pld_type( + Msg, + CustomPayload, + pld=CustomPayload(name='doggy', value='urmom') + ) + + # uhh bc we can `.pause_from_sync()` now! :surfer: + # breakpoint() + + trio.run(main) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index c26de8d4..5ce02055 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -47,20 +47,25 @@ from types import ModuleType import msgspec from msgspec import msgpack -from .pretty_struct import Struct +from tractor.msg.pretty_struct import Struct +from tractor.msg.types import ( + mk_msg_spec, + Msg, +) # TODO: API changes towards being interchange lib agnostic! +# # -[ ] capnproto has pre-compiled schema for eg.. # * https://capnproto.org/language.html # * http://capnproto.github.io/pycapnp/quickstart.html # * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +# class MsgCodec(Struct): ''' A IPC msg interchange format lib's encoder + decoder pair. ''' - lib: ModuleType = msgspec # ad-hoc type extensions @@ -70,12 +75,22 @@ class MsgCodec(Struct): # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions - types: Union[Type[Struct]]|Any = Any + ipc_msg_spec: Union[Type[Struct]]|Any = Any + payload_msg_spec: Union[Type[Struct]] = Any # post-configure cached props _enc: msgpack.Encoder|None = None _dec: msgpack.Decoder|None = None + # TODO: a sub-decoder system as well? + # see related comments in `.msg.types` + # _payload_decs: ( + # dict[ + # str, + # msgpack.Decoder, + # ] + # |None + # ) = None # TODO: use `functools.cached_property` for these ? # https://docs.python.org/3/library/functools.html#functools.cached_property @@ -88,8 +103,9 @@ class MsgCodec(Struct): enc_hook: Callable|None = None, reset: bool = False, - # TODO: what's the default for this? + # TODO: what's the default for this, and do we care? # write_buffer_size: int + # **kwargs, ) -> msgpack.Encoder: @@ -131,7 +147,7 @@ class MsgCodec(Struct): def decoder( self, - types: Union[Type[Struct]]|None = None, + ipc_msg_spec: Union[Type[Struct]]|None = None, dec_hook: Callable|None = None, reset: bool = False, **kwargs, @@ -152,7 +168,7 @@ class MsgCodec(Struct): or reset ): self._dec = self.lib.msgpack.Decoder( - types or self.types, + type=ipc_msg_spec or self.ipc_msg_spec, dec_hook=dec_hook or self.dec_hook, **kwargs, ) @@ -169,10 +185,39 @@ class MsgCodec(Struct): determined by the ''' - return self.dec.decode(msg) +def mk_tagged_union_dec( + tagged_structs: list[Struct], + +) -> tuple[ + list[str], + msgpack.Decoder, +]: + # See "tagged unions" docs: + # https://jcristharif.com/msgspec/structs.html#tagged-unions + + # "The quickest way to enable tagged unions is to set tag=True when + # defining every struct type in the union. In this case tag_field + # defaults to "type", and tag defaults to the struct class name + # (e.g. "Get")." + first: Struct = tagged_structs[0] + types_union: Union[Type[Struct]] = Union[ + first + ]|Any + tags: list[str] = [first.__name__] + + for struct in tagged_structs[1:]: + types_union |= struct + tags.append(struct.__name__) + + dec = msgpack.Decoder(types_union) + return ( + tags, + dec, + ) + # TODO: struct aware messaging coders as per: # - https://github.com/goodboy/tractor/issues/36 # - https://github.com/goodboy/tractor/issues/196 @@ -181,13 +226,18 @@ class MsgCodec(Struct): def mk_codec( libname: str = 'msgspec', + # for codec-ing boxed `Msg`-with-payload msgs + payload_types: Union[Type[Struct]]|None = None, + + # TODO: do we want to allow NOT/using a diff `Msg`-set? + # # struct type unions set for `Decoder` # https://jcristharif.com/msgspec/structs.html#tagged-unions - dec_types: Union[Type[Struct]]|Any = Any, + ipc_msg_spec: Union[Type[Struct]]|Any = Any, cache_now: bool = True, - # proxy to the `Struct.__init__()` + # proxy as `Struct(**kwargs)` **kwargs, ) -> MsgCodec: @@ -197,14 +247,59 @@ def mk_codec( `msgspec` ;). ''' + # (manually) generate a msg-payload-spec for all relevant + # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # for the decoder such that all sub-type msgs in our SCIPP + # will automatically decode to a type-"limited" payload (`Struct`) + # object (set). + payload_type_spec: Union[Type[Msg]]|None = None + if payload_types: + ( + payload_type_spec, + msg_types, + ) = mk_msg_spec( + payload_type=payload_types, + ) + assert len(payload_type_spec.__args__) == len(msg_types) + + # TODO: sub-decode `.pld: Raw`? + # see similar notes inside `.msg.types`.. + # + # not sure we'll end up wanting/needing this + # though it might have unforeseen advantages in terms + # of enabling encrypted appliciation layer (only) + # payloads? + # + # register sub-payload decoders to load `.pld: Raw` + # decoded `Msg`-packets using a dynamic lookup (table) + # instead of a pre-defined msg-spec via `Generic` + # parameterization. + # + # ( + # tags, + # payload_dec, + # ) = mk_tagged_union_dec( + # tagged_structs=list(payload_types.__args__), + # ) + # _payload_decs: ( + # dict[str, msgpack.Decoder]|None + # ) = { + # # pre-seed decoders for std-py-type-set for use when + # # `Msg.pld == None|Any`. + # None: msgpack.Decoder(Any), + # Any: msgpack.Decoder(Any), + # } + # for name in tags: + # _payload_decs[name] = payload_dec + codec = MsgCodec( - types=dec_types, + ipc_msg_spec=ipc_msg_spec, + payload_msg_spec=payload_type_spec, **kwargs, ) assert codec.lib.__name__ == libname - # by default config and cache the codec pair for given - # input settings. + # by default, config-n-cache the codec pair from input settings. if cache_now: assert codec.enc assert codec.dec @@ -251,3 +346,28 @@ def current_msgspec_codec() -> MsgCodec: ''' return _ctxvar_MsgCodec.get() + + +@cm +def limit_msg_spec( + payload_types: Union[Type[Struct]], + + # TODO: don't need this approach right? + # + # tagged_structs: list[Struct]|None = None, + + **codec_kwargs, +): + ''' + Apply a `MsgCodec` that will natively decode the SC-msg set's + `Msg.pld: Union[Type[Struct]]` payload fields using + tagged-unions of `msgspec.Struct`s from the `payload_types` + for all IPC contexts in use by the current `trio.Task`. + + ''' + msgspec_codec: MsgCodec = mk_codec( + payload_types=payload_types, + **codec_kwargs, + ) + with apply_codec(msgspec_codec): + yield msgspec_codec diff --git a/tractor/msg/types.py b/tractor/msg/types.py index d2fb0877..732a0f5d 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -15,23 +15,315 @@ # along with this program. If not, see . ''' -Extensions to built-in or (heavily used but 3rd party) friend-lib -types. +Define our strictly typed IPC message spec for the SCIPP: + +that is, + +the "Structurred-Concurrency-Inter-Process-(dialog)-(un)Protocol". ''' + from __future__ import annotations -from contextlib import contextmanager as cm +# from contextlib import contextmanager as cm +import types from typing import ( Any, + Generic, + Literal, + Type, + TypeVar, Union, ) from msgspec import ( msgpack, Raw, - Struct as _Struct, + Struct, + UNSET, ) + +# TODO: can also remove yah? +# +# class Header(Struct, tag=True): +# ''' +# A msg header which defines payload properties + +# ''' +# payload_tag: str|None = None + +# type variable for the boxed payload field `.pld` +PayloadT = TypeVar('PayloadT') + + +class Msg( + Struct, + Generic[PayloadT], + tag=True, + tag_field='msg_type', +): + ''' + The "god" boxing msg type. + + Boxes user data-msgs in a `.pld` and uses `msgspec`'s tagged + unions support to enable a spec from a common msg inheritance + tree. + + ''' + # header: Header + # TODO: use UNSET here? + cid: str|None # call/context-id + + # The msgs "payload" (spelled without vowels): + # https://en.wikipedia.org/wiki/Payload_(computing) + # + # NOTE: inherited from any `Msg` (and maybe overriden + # by use of `limit_msg_spec()`), but by default is + # parameterized to be `Any`. + # + # XXX this `Union` must strictly NOT contain `Any` if + # a limited msg-type-spec is intended, such that when + # creating and applying a new `MsgCodec` its + # `.decoder: Decoder` is configured with a `Union[Type[Struct]]` which + # restricts the allowed payload content (this `.pld` field) + # by type system defined loading constraints B) + # + # TODO: could also be set to `msgspec.Raw` if the sub-decoders + # approach is preferred over the generic parameterization + # approach as take by `mk_msg_spec()` below. + pld: PayloadT + + +# TODO: better name, like `Call/TaskInput`? +class FuncSpec(Struct): + # TODO: can we combine these 2 into a `NamespacePath` field? + ns: str + func: str + + kwargs: dict + uid: str # (calling) actor-id + + +class Start( + Msg, +): + ''' + Initial request to remotely schedule an RPC `trio.Task` via + `Actor.start_remote_task()`. + + It is called by all the following public APIs: + + - `ActorNursery.run_in_actor()` + + - `Portal.run()` + `|_.run_from_ns()` + `|_.open_stream_from()` + `|_._submit_for_result()` + + - `Context.open_context()` + + ''' + pld: FuncSpec + + +FuncType: Literal[ + 'asyncfunc', + 'asyncgen', + 'context', # TODO: the only one eventually? +] = 'context' + + +class IpcCtxSpec(Struct): + ''' + An inter-actor-`trio.Task`-comms `Context` spec. + + ''' + functype: FuncType + + # TODO: as part of the reponse we should report our allowed + # msg spec which should be generated from the type-annots as + # desired in # https://github.com/goodboy/tractor/issues/365 + # When this does not match what the starter/caller side + # expects we of course raise a `TypeError` just like if + # a function had been called using an invalid signature. + # + # msgspec: MsgSpec + + +class StartAck( + Msg, + Generic[PayloadT], +): + ''' + Init response to a `Cmd` request indicating the far + end's RPC callable "type". + + ''' + pld: IpcCtxSpec + + +class Started( + Msg, + Generic[PayloadT], +): + ''' + Packet to shuttle the "first value" delivered by + `Context.started(value: Any)` from a `@tractor.context` + decorated IPC endpoint. + + ''' + + +# TODO: instead of using our existing `Start` +# for this (as we did with the original `{'cmd': ..}` style) +# class Cancel(Msg): +# cid: str + + +class Yield( + Msg, + Generic[PayloadT], +): + ''' + Per IPC transmission of a value from `await MsgStream.send()`. + + ''' + + +class Stop(Msg): + ''' + Stream termination signal much like an IPC version + of `StopAsyncIteration`. + + ''' + pld: UNSET + + +class Return( + Msg, + Generic[PayloadT], +): + ''' + Final `return ` from a remotely scheduled + func-as-`trio.Task`. + + ''' + + +class ErrorData(Struct): + ''' + Remote actor error meta-data as needed originally by + `RemoteActorError.msgdata: dict`. + + ''' + src_uid: str + src_type_str: str + boxed_type_str: str + + relay_path: list[str] + tb_str: str + + # `ContextCancelled` + canceller: str|None = None + + # `StreamOverrun` + sender: str|None = None + + +class Error(Msg): + ''' + A pkt that wraps `RemoteActorError`s for relay. + + ''' + pld: ErrorData + + +# TODO: should be make a msg version of `ContextCancelled?` +# and/or with a scope field or a full `ActorCancelled`? +# class Cancelled(Msg): +# cid: str + +# TODO what about overruns? +# class Overrun(Msg): +# cid: str + + +def mk_msg_spec( + payload_type: Union[Type] = Any, + boxing_msg_set: set[Msg] = { + Started, + Yield, + Return, + }, + +) -> tuple[ + Union[Type[Msg]], + list[Type[Msg]], +]: + ''' + Generate a payload-type-parameterized `Msg` specification such + that IPC msgs which can be `Msg.pld` (payload) type + limited/filterd are specified given an input `payload_type: + Union[Type]`. + + ''' + submsg_types: list[Type[Msg]] = Msg.__subclasses__() + + # TODO: see below as well, + # => union building approach with `.__class_getitem__()` + # doesn't seem to work..? + # + # payload_type_spec: Union[Type[Msg]] + # + msg_types: list[Msg] = [] + for msgtype in boxing_msg_set: + + # check inheritance sanity + assert msgtype in submsg_types + + # TODO: wait why do we need the dynamic version here? + # -[ ] paraming the `PayloadT` values via `Generic[T]` + # doesn't seem to work at all? + # -[ ] is there a way to get it to work at module level + # just using inheritance or maybe a metaclass? + # + # index_paramed_msg_type: Msg = msgtype[payload_type] + + # TODO: WHY do we need to dynamically generate the + # subtype-msgs here to ensure the `.pld` parameterization + # propagates as well as works at all in terms of the + # `msgpack.Decoder()`..? + # + # dynamically create the payload type-spec-limited msg set. + manual_paramed_msg_subtype: Type = types.new_class( + msgtype.__name__, + ( + # XXX NOTE XXX this seems to be THE ONLY + # way to get this to work correctly!?! + Msg[payload_type], + Generic[PayloadT], + ), + {}, + ) + + # TODO: grok the diff here better.. + # assert index_paramed_msg_type == manual_paramed_msg_subtype + + # XXX TODO: why does the manual method work but not the + # `.__class_getitem__()` one!?! + paramed_msg_type = manual_paramed_msg_subtype + + # payload_type_spec |= paramed_msg_type + msg_types.append(paramed_msg_type) + + + payload_type_spec: Union[Type[Msg]] = Union[*msg_types] + return ( + payload_type_spec, + msg_types, + ) + + # TODO: integration with our ``enable_modules: list[str]`` caps sys. # # ``pkgutil.resolve_name()`` internally uses @@ -43,160 +335,58 @@ from msgspec import ( # - https://stackoverflow.com/a/63320902 # - https://docs.python.org/3/library/sys.html#sys.meta_path -# the new "Implicit Namespace Packages" might be relevant? -# - https://www.python.org/dev/peps/pep-0420/ - -# add implicit serialized message type support so that paths can be -# handed directly to IPC primitives such as streams and `Portal.run()` -# calls: -# - via ``msgspec``: -# - https://jcristharif.com/msgspec/api.html#struct -# - https://jcristharif.com/msgspec/extending.html -# via ``msgpack-python``: -# https://github.com/msgpack/msgpack-python#packingunpacking-of-custom-data-type -# LIFO codec stack that is appended when the user opens the -# ``configure_native_msgs()`` cm below to configure a new codec set -# which will be applied to all new (msgspec relevant) IPC transports -# that are spawned **after** the configure call is made. -_lifo_codecs: list[ - tuple[ - msgpack.Encoder, - msgpack.Decoder, - ], -] = [(msgpack.Encoder(), msgpack.Decoder())] - - -def get_msg_codecs() -> tuple[ - msgpack.Encoder, - msgpack.Decoder, -]: - ''' - Return the currently configured ``msgspec`` codec set. - - The defaults are defined above. - - ''' - global _lifo_codecs - return _lifo_codecs[-1] - - -@cm -def configure_native_msgs( - tagged_structs: list[_Struct], -): - ''' - Push a codec set that will natively decode - tagged structs provied in ``tagged_structs`` - in all IPC transports and pop the codec on exit. - - ''' - # See "tagged unions" docs: - # https://jcristharif.com/msgspec/structs.html#tagged-unions - - # "The quickest way to enable tagged unions is to set tag=True when - # defining every struct type in the union. In this case tag_field - # defaults to "type", and tag defaults to the struct class name - # (e.g. "Get")." - enc = msgpack.Encoder() - - types_union = Union[tagged_structs[0]] | Any - for struct in tagged_structs[1:]: - types_union |= struct - - dec = msgpack.Decoder(types_union) - - _lifo_codecs.append((enc, dec)) - try: - print("YOYOYOOYOYOYOY") - yield enc, dec - finally: - print("NONONONONON") - _lifo_codecs.pop() - - -class Header(_Struct, tag=True): - ''' - A msg header which defines payload properties - - ''' - uid: str - msgtype: str|None = None - - -class Msg(_Struct, tag=True): - ''' - The "god" msg type, a box for task level msg types. - - ''' - header: Header - payload: Raw - - -_root_dec = msgpack.Decoder(Msg) -_root_enc = msgpack.Encoder() - +# TODO: do we still want to try and support the sub-decoder with +# `Raw` technique in the case that the `Generic` approach gives +# future grief? +# # sub-decoders for retreiving embedded # payload data and decoding to a sender # side defined (struct) type. -_subdecs: dict[ +_payload_decs: dict[ str|None, - msgpack.Decoder] = { + msgpack.Decoder, +] = { + # default decoder is used when `Header.payload_tag == None` None: msgpack.Decoder(Any), } -@cm -def enable_context( - msg_subtypes: list[list[_Struct]] -) -> msgpack.Decoder: +def dec_payload( + msg: Msg, + msg_dec: msgpack.Decoder = msgpack.Decoder( + type=Msg[Any] + ), - for types in msg_subtypes: - first = types[0] +) -> Any|Struct: - # register using the default tag_field of "type" - # which seems to map to the class "name". - tags = [first.__name__] - - # create a tagged union decoder for this type set - type_union = Union[first] - for typ in types[1:]: - type_union |= typ - tags.append(typ.__name__) - - dec = msgpack.Decoder(type_union) - - # register all tags for this union sub-decoder - for tag in tags: - _subdecs[tag] = dec - try: - yield dec - finally: - for tag in tags: - _subdecs.pop(tag) + msg: Msg = msg_dec.decode(msg) + payload_tag: str = msg.header.payload_tag + payload_dec: msgpack.Decoder = _payload_decs[payload_tag] + return payload_dec.decode(msg.pld) -def decmsg(msg: Msg) -> Any: - msg = _root_dec.decode(msg) - tag_field = msg.header.msgtype - dec = _subdecs[tag_field] - return dec.decode(msg.payload) - - -def encmsg( - dialog_id: str | int, +def enc_payload( + enc: msgpack.Encoder, payload: Any, -) -> Msg: + cid: str, - tag_field = None +) -> bytes: - plbytes = _root_enc.encode(payload) - if b'type' in plbytes: - assert isinstance(payload, _Struct) - tag_field = type(payload).__name__ + # tag_field: str|None = None + + plbytes = enc.encode(payload) + if b'msg_type' in plbytes: + assert isinstance(payload, Struct) + + # tag_field: str = type(payload).__name__ payload = Raw(plbytes) msg = Msg( - Header(dialog_id, tag_field), - payload, + cid=cid, + pld=payload, + # Header( + # payload_tag=tag_field, + # # dialog_id, + # ), ) - return _root_enc.encode(msg) + return enc.encode(msg) -- 2.34.1 From 6cd74a5dba70a0549fcadd671e8b5246182239f2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 28 Mar 2024 13:07:03 -0400 Subject: [PATCH 008/305] Tweak msg-spec test suite mod name --- tests/{test_caps_msging.py => test_caps_based_msging.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_caps_msging.py => test_caps_based_msging.py} (100%) diff --git a/tests/test_caps_msging.py b/tests/test_caps_based_msging.py similarity index 100% rename from tests/test_caps_msging.py rename to tests/test_caps_based_msging.py -- 2.34.1 From 9e16cfe8fd4b5b4202008c826c0dc9c4017ebb66 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 28 Mar 2024 13:08:18 -0400 Subject: [PATCH 009/305] Change to multi-line-static-`dict` style msgs Re-arranging such that element-orders are line-arranged to our new IPC `.msg.types.Msg` fields spec in prep for replacing the current `dict`-as-msg impls with the `msgspec.Struct` native versions! --- tractor/_exceptions.py | 4 +++- tractor/_rpc.py | 35 +++++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 0e1d6d10..b1a8ee63 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -536,7 +536,9 @@ def pack_error( # content's `.msgdata`). error_msg['tb_str'] = tb_str - pkt: dict = {'error': error_msg} + pkt: dict = { + 'error': error_msg, + } if cid: pkt['cid'] = cid diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b108fdda..ef6cbe00 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -84,7 +84,10 @@ async def _invoke_non_context( # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): - await chan.send({'functype': 'asyncgen', 'cid': cid}) + await chan.send({ + 'cid': cid, + 'functype': 'asyncgen', + }) # XXX: massive gotcha! If the containing scope # is cancelled and we execute the below line, # any ``ActorNursery.__aexit__()`` WON'T be @@ -104,18 +107,27 @@ async def _invoke_non_context( # to_send = await chan.recv_nowait() # if to_send is not None: # to_yield = await coro.asend(to_send) - await chan.send({'yield': item, 'cid': cid}) + await chan.send({ + 'yield': item, + 'cid': cid, + }) log.runtime(f"Finished iterating {coro}") # TODO: we should really support a proper # `StopAsyncIteration` system here for returning a final # value if desired - await chan.send({'stop': True, 'cid': cid}) + await chan.send({ + 'stop': True, + 'cid': cid, + }) # one way @stream func that gets treated like an async gen # TODO: can we unify this with the `context=True` impl below? elif treat_as_gen: - await chan.send({'functype': 'asyncgen', 'cid': cid}) + await chan.send({ + 'cid': cid, + 'functype': 'asyncgen', + }) # XXX: the async-func may spawn further tasks which push # back values like an async-generator would but must # manualy construct the response dict-packet-responses as @@ -128,7 +140,10 @@ async def _invoke_non_context( if not cs.cancelled_caught: # task was not cancelled so we can instruct the # far end async gen to tear down - await chan.send({'stop': True, 'cid': cid}) + await chan.send({ + 'stop': True, + 'cid': cid + }) else: # regular async function/method # XXX: possibly just a scheduled `Actor._cancel_task()` @@ -177,10 +192,10 @@ async def _invoke_non_context( and chan.connected() ): try: - await chan.send( - {'return': result, - 'cid': cid} - ) + await chan.send({ + 'return': result, + 'cid': cid, + }) except ( BrokenPipeError, trio.BrokenResourceError, @@ -474,8 +489,8 @@ async def _invoke( # "least sugary" type of RPC ep with support for # bi-dir streaming B) await chan.send({ + 'cid': cid, 'functype': 'context', - 'cid': cid }) # TODO: should we also use an `.open_context()` equiv -- 2.34.1 From e0d7ed48e86be75d02bcca96686d0ac1c314f611 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Mar 2024 12:46:59 -0400 Subject: [PATCH 010/305] Drop `MsgCodec.decoder()/.encoder()` design Instead just instantiate `msgpack.Encoder/Decoder` instances inside `mk_codec()` and assign them directly as `._enc/._dec` fields. Explicitly take in named-args to both and proxy to the coder/decoder instantiation calls directly. Shuffling some codec internals: - rename `mk_codec()` inputs as `ipc_msg_spec` and `ipc_pld_spec`, make them mutex such that a payload type spec can't be passed if the built-in msg-spec isn't used. => expose `MsgCodec.ipc_pld_spec` directly from `._dec.type` => presume input `ipc_msg_spec` is `Any` by default when no `ipc_pld_spec` is passed since we have no way atm to enable a similar type-restricted-payload feature without a wrapping "shuttle protocol" ;) - move all the payload-sub-decoders stuff prototyped in GH#311 (inside `.types`) to `._codec` as commented-for-later-maybe `MsgCodec` methods including: - `.mk_pld_subdec()` for registering - `.enc/dec_payload()` for sub-codec field loading. - also comment out `._codec.mk_tagged_union_dec()` as the orig tag-to-decoder table factory, now mostly superseded by `.types.mk_msg_spec()` which takes the generic parameterizing approach instead. - change naming to `types.mk_msg_spec(payload_type_union)` input, making it more explicit that it expects a `Union[Type]`. Oh right, and start exposing all the `.types.Msg` subtypes in the `.msg` subpkg in prep for usage throughout the runtime B) --- tractor/msg/__init__.py | 37 ++++ tractor/msg/_codec.py | 394 +++++++++++++++++++++++----------------- tractor/msg/types.py | 113 +++--------- 3 files changed, 297 insertions(+), 247 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index b5c261cc..a93fa888 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -33,3 +33,40 @@ from ._codec import ( MsgCodec as MsgCodec, current_msgspec_codec as current_msgspec_codec, ) + +from .types import ( + Msg as Msg, + + Start, # with pld + FuncSpec as FuncSpec, + + StartAck, # with pld + IpcCtxSpec as IpcCtxSpec, + + Started, + Yield, + Stop, + Return, + + Error, # with pld + ErrorData as ErrorData +) + + +# built-in SC shuttle protocol msg type set in +# approx order of the IPC txn-state spaces. +__spec__: list[Msg] = [ + + # inter-actor RPC initiation + Start, + StartAck, + + # no-outcome-yet IAC (inter-actor-communication) + Started, + Yield, + Stop, + + # termination outcomes + Return, + Error, +] diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 5ce02055..e6cb4f1f 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -29,6 +29,7 @@ ToDo: backends we prolly should offer: - https://capnproto.org/language.html#language-reference ''' +from __future__ import annotations from contextvars import ( ContextVar, Token, @@ -54,18 +55,36 @@ from tractor.msg.types import ( ) -# TODO: API changes towards being interchange lib agnostic! +# TODO: overall IPC msg-spec features (i.e. in this mod)! # -# -[ ] capnproto has pre-compiled schema for eg.. -# * https://capnproto.org/language.html -# * http://capnproto.github.io/pycapnp/quickstart.html -# * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +# -[ ] API changes towards being interchange lib agnostic! +# -[ ] capnproto has pre-compiled schema for eg.. +# * https://capnproto.org/language.html +# * http://capnproto.github.io/pycapnp/quickstart.html +# * https://github.com/capnproto/pycapnp/blob/master/examples/addressbook.capnp +# +# -[ ] struct aware messaging coders as per: +# -[x] https://github.com/goodboy/tractor/issues/36 +# -[ ] https://github.com/goodboy/tractor/issues/196 +# -[ ] https://github.com/goodboy/tractor/issues/365 # class MsgCodec(Struct): ''' A IPC msg interchange format lib's encoder + decoder pair. ''' + # post-configure-cached when prop-accessed (see `mk_codec()` + # OR can be passed directly as, + # `MsgCodec(_enc=, _dec=)` + _enc: msgpack.Encoder|None = None + _dec: msgpack.Decoder|None = None + + # struct type unions + # https://jcristharif.com/msgspec/structs.html#tagged-unions + @property + def ipc_pld_spec(self) -> Union[Type[Struct]]: + return self._dec.type + lib: ModuleType = msgspec # ad-hoc type extensions @@ -73,16 +92,8 @@ class MsgCodec(Struct): enc_hook: Callable[[Any], Any]|None = None # coder dec_hook: Callable[[type, Any], Any]|None = None # decoder - # struct type unions - # https://jcristharif.com/msgspec/structs.html#tagged-unions - ipc_msg_spec: Union[Type[Struct]]|Any = Any - payload_msg_spec: Union[Type[Struct]] = Any - - # post-configure cached props - _enc: msgpack.Encoder|None = None - _dec: msgpack.Decoder|None = None - # TODO: a sub-decoder system as well? + # payload_msg_specs: Union[Type[Struct]] = Any # see related comments in `.msg.types` # _payload_decs: ( # dict[ @@ -91,42 +102,18 @@ class MsgCodec(Struct): # ] # |None # ) = None + # OR + # ) = { + # # pre-seed decoders for std-py-type-set for use when + # # `Msg.pld == None|Any`. + # None: msgpack.Decoder(Any), + # Any: msgpack.Decoder(Any), + # } # TODO: use `functools.cached_property` for these ? # https://docs.python.org/3/library/functools.html#functools.cached_property @property def enc(self) -> msgpack.Encoder: - return self._enc or self.encoder() - - def encoder( - self, - enc_hook: Callable|None = None, - reset: bool = False, - - # TODO: what's the default for this, and do we care? - # write_buffer_size: int - # - **kwargs, - - ) -> msgpack.Encoder: - ''' - Set or get the maybe-cached `msgspec.msgpack.Encoder` - instance configured for this codec. - - When `reset=True` any previously configured encoder will - be recreated and then cached with the new settings passed - as input. - - ''' - if ( - self._enc is None - or reset - ): - self._enc = self.lib.msgpack.Encoder( - enc_hook=enc_hook or self.enc_hook, - # write_buffer_size=write_buffer_size, - ) - return self._enc def encode( @@ -139,40 +126,10 @@ class MsgCodec(Struct): on a tranport protocol connection. ''' - return self.enc.encode(py_obj) + return self._enc.encode(py_obj) @property def dec(self) -> msgpack.Decoder: - return self._dec or self.decoder() - - def decoder( - self, - ipc_msg_spec: Union[Type[Struct]]|None = None, - dec_hook: Callable|None = None, - reset: bool = False, - **kwargs, - # ext_hook: ext_hook_sig - - ) -> msgpack.Decoder: - ''' - Set or get the maybe-cached `msgspec.msgpack.Decoder` - instance configured for this codec. - - When `reset=True` any previously configured decoder will - be recreated and then cached with the new settings passed - as input. - - ''' - if ( - self._dec is None - or reset - ): - self._dec = self.lib.msgpack.Decoder( - type=ipc_msg_spec or self.ipc_msg_spec, - dec_hook=dec_hook or self.dec_hook, - **kwargs, - ) - return self._dec def decode( @@ -185,60 +142,165 @@ class MsgCodec(Struct): determined by the ''' - return self.dec.decode(msg) + return self._dec.decode(msg) + + # TODO: do we still want to try and support the sub-decoder with + # `.Raw` technique in the case that the `Generic` approach gives + # future grief? + # + # -[ ] + # + #def mk_pld_subdec( + # self, + # payload_types: Union[Type[Struct]], + + #) -> msgpack.Decoder: + # # TODO: sub-decoder suppor for `.pld: Raw`? + # # => see similar notes inside `.msg.types`.. + # # + # # not sure we'll end up needing this though it might have + # # unforeseen advantages in terms of enabling encrypted + # # appliciation layer (only) payloads? + # # + # # register sub-payload decoders to load `.pld: Raw` + # # decoded `Msg`-packets using a dynamic lookup (table) + # # instead of a pre-defined msg-spec via `Generic` + # # parameterization. + # # + # ( + # tags, + # payload_dec, + # ) = mk_tagged_union_dec( + # tagged_structs=list(payload_types.__args__), + # ) + # # register sub-decoders by tag + # subdecs: dict[str, msgpack.Decoder]|None = self._payload_decs + # for name in tags: + # subdecs.setdefault( + # name, + # payload_dec, + # ) + + # return payload_dec + + # sub-decoders for retreiving embedded + # payload data and decoding to a sender + # side defined (struct) type. + # def dec_payload( + # codec: MsgCodec, + # msg: Msg, + + # ) -> Any|Struct: + + # msg: Msg = codec.dec.decode(msg) + # payload_tag: str = msg.header.payload_tag + # payload_dec: msgpack.Decoder = codec._payload_decs[payload_tag] + # return payload_dec.decode(msg.pld) + + # def enc_payload( + # codec: MsgCodec, + # payload: Any, + # cid: str, + + # ) -> bytes: + + # # tag_field: str|None = None + + # plbytes = codec.enc.encode(payload) + # if b'msg_type' in plbytes: + # assert isinstance(payload, Struct) + + # # tag_field: str = type(payload).__name__ + # payload = msgspec.Raw(plbytes) + + # msg = Msg( + # cid=cid, + # pld=payload, + # # Header( + # # payload_tag=tag_field, + # # # dialog_id, + # # ), + # ) + # return codec.enc.encode(msg) -def mk_tagged_union_dec( - tagged_structs: list[Struct], + #def mk_tagged_union_dec( + # tagged_structs: list[Struct], -) -> tuple[ - list[str], - msgpack.Decoder, -]: - # See "tagged unions" docs: - # https://jcristharif.com/msgspec/structs.html#tagged-unions + #) -> tuple[ + # list[str], + # msgpack.Decoder, + #]: + # ''' + # Create a `msgpack.Decoder` for an input `list[msgspec.Struct]` + # and return a `list[str]` of each struct's `tag_field: str` value + # which can be used to "map to" the initialized dec. - # "The quickest way to enable tagged unions is to set tag=True when - # defining every struct type in the union. In this case tag_field - # defaults to "type", and tag defaults to the struct class name - # (e.g. "Get")." - first: Struct = tagged_structs[0] - types_union: Union[Type[Struct]] = Union[ - first - ]|Any - tags: list[str] = [first.__name__] + # ''' + # # See "tagged unions" docs: + # # https://jcristharif.com/msgspec/structs.html#tagged-unions - for struct in tagged_structs[1:]: - types_union |= struct - tags.append(struct.__name__) + # # "The quickest way to enable tagged unions is to set tag=True when + # # defining every struct type in the union. In this case tag_field + # # defaults to "type", and tag defaults to the struct class name + # # (e.g. "Get")." + # first: Struct = tagged_structs[0] + # types_union: Union[Type[Struct]] = Union[ + # first + # ]|Any + # tags: list[str] = [first.__name__] - dec = msgpack.Decoder(types_union) - return ( - tags, - dec, - ) + # for struct in tagged_structs[1:]: + # types_union |= struct + # tags.append( + # getattr( + # struct, + # struct.__struct_config__.tag_field, + # struct.__name__, + # ) + # ) + + # dec = msgpack.Decoder(types_union) + # return ( + # tags, + # dec, + # ) -# TODO: struct aware messaging coders as per: -# - https://github.com/goodboy/tractor/issues/36 -# - https://github.com/goodboy/tractor/issues/196 -# - https://github.com/goodboy/tractor/issues/365 def mk_codec( - libname: str = 'msgspec', - - # for codec-ing boxed `Msg`-with-payload msgs - payload_types: Union[Type[Struct]]|None = None, - - # TODO: do we want to allow NOT/using a diff `Msg`-set? + ipc_msg_spec: Union[Type[Struct]]|Any|None = None, # + # ^TODO^: in the long run, do we want to allow using a diff IPC `Msg`-set? + # it would break the runtime, but maybe say if you wanted + # to add some kinda field-specific or wholesale `.pld` ecryption? + # struct type unions set for `Decoder` # https://jcristharif.com/msgspec/structs.html#tagged-unions - ipc_msg_spec: Union[Type[Struct]]|Any = Any, + ipc_pld_spec: Union[Type[Struct]]|Any|None = None, - cache_now: bool = True, + # TODO: offering a per-msg(-field) type-spec such that + # the fields can be dynamically NOT decoded and left as `Raw` + # values which are later loaded by a sub-decoder specified + # by `tag_field: str` value key? + # payload_msg_specs: dict[ + # str, # tag_field value as sub-decoder key + # Union[Type[Struct]] # `Msg.pld` type spec + # ]|None = None, + + libname: str = 'msgspec', # proxy as `Struct(**kwargs)` + # ------ - ------ + dec_hook: Callable|None = None, + enc_hook: Callable|None = None, + # ------ - ------ **kwargs, + # + # Encoder: + # write_buffer_size=write_buffer_size, + # + # Decoder: + # ext_hook: ext_hook_sig ) -> MsgCodec: ''' @@ -247,75 +309,81 @@ def mk_codec( `msgspec` ;). ''' - # (manually) generate a msg-payload-spec for all relevant - # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` - # for the decoder such that all sub-type msgs in our SCIPP - # will automatically decode to a type-"limited" payload (`Struct`) - # object (set). - payload_type_spec: Union[Type[Msg]]|None = None - if payload_types: + if ( + ipc_msg_spec is not None + and ipc_pld_spec + ): + raise RuntimeError( + f'If a payload spec is provided,\n' + "the builtin SC-shuttle-protocol's msg set\n" + f'(i.e. `{Msg}`) MUST be used!\n\n' + f'However both values were passed as => mk_codec(\n' + f' ipc_msg_spec={ipc_msg_spec}`\n' + f' ipc_pld_spec={ipc_pld_spec}`\n)\n' + ) + + elif ( + ipc_pld_spec + and + + # XXX required for now (or maybe forever?) until + # we can dream up a way to allow parameterizing and/or + # custom overrides to the `Msg`-spec protocol itself? + ipc_msg_spec is None + ): + # (manually) generate a msg-payload-spec for all relevant + # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # for the decoder such that all sub-type msgs in our SCIPP + # will automatically decode to a type-"limited" payload (`Struct`) + # object (set). ( - payload_type_spec, + ipc_msg_spec, msg_types, ) = mk_msg_spec( - payload_type=payload_types, + payload_type_union=ipc_pld_spec, ) - assert len(payload_type_spec.__args__) == len(msg_types) + assert len(ipc_msg_spec.__args__) == len(msg_types) + assert ipc_msg_spec - # TODO: sub-decode `.pld: Raw`? - # see similar notes inside `.msg.types`.. - # - # not sure we'll end up wanting/needing this - # though it might have unforeseen advantages in terms - # of enabling encrypted appliciation layer (only) - # payloads? - # - # register sub-payload decoders to load `.pld: Raw` - # decoded `Msg`-packets using a dynamic lookup (table) - # instead of a pre-defined msg-spec via `Generic` - # parameterization. - # - # ( - # tags, - # payload_dec, - # ) = mk_tagged_union_dec( - # tagged_structs=list(payload_types.__args__), - # ) - # _payload_decs: ( - # dict[str, msgpack.Decoder]|None - # ) = { - # # pre-seed decoders for std-py-type-set for use when - # # `Msg.pld == None|Any`. - # None: msgpack.Decoder(Any), - # Any: msgpack.Decoder(Any), - # } - # for name in tags: - # _payload_decs[name] = payload_dec + dec = msgpack.Decoder( + type=ipc_msg_spec, # like `Msg[Any]` + ) + + else: + ipc_msg_spec = ipc_msg_spec or Any + + enc = msgpack.Encoder( + enc_hook=enc_hook, + ) + dec = msgpack.Decoder( + type=ipc_msg_spec, # like `Msg[Any]` + dec_hook=dec_hook, + ) codec = MsgCodec( - ipc_msg_spec=ipc_msg_spec, - payload_msg_spec=payload_type_spec, - **kwargs, + _enc=enc, + _dec=dec, + # payload_msg_specs=payload_msg_specs, + # **kwargs, ) - assert codec.lib.__name__ == libname - # by default, config-n-cache the codec pair from input settings. - if cache_now: - assert codec.enc - assert codec.dec + # sanity on expected backend support + assert codec.lib.__name__ == libname return codec # instance of the default `msgspec.msgpack` codec settings, i.e. # no custom structs, hooks or other special types. -_def_msgspec_codec: MsgCodec = mk_codec() +_def_msgspec_codec: MsgCodec = mk_codec(ipc_msg_spec=Any) # NOTE: provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing # `Channel.send()/.recv()` of wire data. _ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( 'msgspec_codec', + + # TODO: move this to our new `Msg`-spec! default=_def_msgspec_codec, ) @@ -353,7 +421,7 @@ def limit_msg_spec( payload_types: Union[Type[Struct]], # TODO: don't need this approach right? - # + # -> related to the `MsgCodec._payload_decs` stuff above.. # tagged_structs: list[Struct]|None = None, **codec_kwargs, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 732a0f5d..7d64e766 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -22,9 +22,7 @@ that is, the "Structurred-Concurrency-Inter-Process-(dialog)-(un)Protocol". ''' - from __future__ import annotations -# from contextlib import contextmanager as cm import types from typing import ( Any, @@ -36,14 +34,12 @@ from typing import ( ) from msgspec import ( - msgpack, - Raw, Struct, UNSET, ) - -# TODO: can also remove yah? +# TODO: sub-decoded `Raw` fields? +# -[ ] see `MsgCodec._payload_decs` notes # # class Header(Struct, tag=True): # ''' @@ -70,7 +66,6 @@ class Msg( tree. ''' - # header: Header # TODO: use UNSET here? cid: str|None # call/context-id @@ -94,9 +89,24 @@ class Msg( pld: PayloadT -# TODO: better name, like `Call/TaskInput`? +# TODO: caps based RPC support in the payload? +# +# -[ ] integration with our ``enable_modules: list[str]`` caps sys. +# ``pkgutil.resolve_name()`` internally uses +# ``importlib.import_module()`` which can be filtered by +# inserting a ``MetaPathFinder`` into ``sys.meta_path`` (which +# we could do before entering the ``Actor._process_messages()`` +# loop)? +# - https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 +# - https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules +# - https://stackoverflow.com/a/63320902 +# - https://docs.python.org/3/library/sys.html#sys.meta_path +# +# -[ ] can we combine .ns + .func into a native `NamespacePath` field? +# +# -[ ]better name, like `Call/TaskInput`? +# class FuncSpec(Struct): - # TODO: can we combine these 2 into a `NamespacePath` field? ns: str func: str @@ -249,7 +259,7 @@ class Error(Msg): def mk_msg_spec( - payload_type: Union[Type] = Any, + payload_type_union: Union[Type] = Any, boxing_msg_set: set[Msg] = { Started, Yield, @@ -261,10 +271,13 @@ def mk_msg_spec( list[Type[Msg]], ]: ''' - Generate a payload-type-parameterized `Msg` specification such - that IPC msgs which can be `Msg.pld` (payload) type - limited/filterd are specified given an input `payload_type: - Union[Type]`. + Create a payload-(data-)type-parameterized IPC message specification. + + Allows generating IPC msg types from the above builtin set + with a payload (field) restricted data-type via the `Msg.pld: + PayloadT` type var. This allows runtime-task contexts to use + the python type system to limit/filter payload values as + determined by the input `payload_type_union: Union[Type]`. ''' submsg_types: list[Type[Msg]] = Msg.__subclasses__() @@ -287,7 +300,7 @@ def mk_msg_spec( # -[ ] is there a way to get it to work at module level # just using inheritance or maybe a metaclass? # - # index_paramed_msg_type: Msg = msgtype[payload_type] + # index_paramed_msg_type: Msg = msgtype[payload_type_union] # TODO: WHY do we need to dynamically generate the # subtype-msgs here to ensure the `.pld` parameterization @@ -300,7 +313,7 @@ def mk_msg_spec( ( # XXX NOTE XXX this seems to be THE ONLY # way to get this to work correctly!?! - Msg[payload_type], + Msg[payload_type_union], Generic[PayloadT], ), {}, @@ -322,71 +335,3 @@ def mk_msg_spec( payload_type_spec, msg_types, ) - - -# TODO: integration with our ``enable_modules: list[str]`` caps sys. -# -# ``pkgutil.resolve_name()`` internally uses -# ``importlib.import_module()`` which can be filtered by inserting -# a ``MetaPathFinder`` into ``sys.meta_path`` (which we could do before -# entering the ``Actor._process_messages()`` loop). -# https://github.com/python/cpython/blob/main/Lib/pkgutil.py#L645 -# https://stackoverflow.com/questions/1350466/preventing-python-code-from-importing-certain-modules -# - https://stackoverflow.com/a/63320902 -# - https://docs.python.org/3/library/sys.html#sys.meta_path - -# TODO: do we still want to try and support the sub-decoder with -# `Raw` technique in the case that the `Generic` approach gives -# future grief? -# -# sub-decoders for retreiving embedded -# payload data and decoding to a sender -# side defined (struct) type. -_payload_decs: dict[ - str|None, - msgpack.Decoder, -] = { - # default decoder is used when `Header.payload_tag == None` - None: msgpack.Decoder(Any), -} - - -def dec_payload( - msg: Msg, - msg_dec: msgpack.Decoder = msgpack.Decoder( - type=Msg[Any] - ), - -) -> Any|Struct: - - msg: Msg = msg_dec.decode(msg) - payload_tag: str = msg.header.payload_tag - payload_dec: msgpack.Decoder = _payload_decs[payload_tag] - return payload_dec.decode(msg.pld) - - -def enc_payload( - enc: msgpack.Encoder, - payload: Any, - cid: str, - -) -> bytes: - - # tag_field: str|None = None - - plbytes = enc.encode(payload) - if b'msg_type' in plbytes: - assert isinstance(payload, Struct) - - # tag_field: str = type(payload).__name__ - payload = Raw(plbytes) - - msg = Msg( - cid=cid, - pld=payload, - # Header( - # payload_tag=tag_field, - # # dialog_id, - # ), - ) - return enc.encode(msg) -- 2.34.1 From c79c2d7ffd7fdac8d85da599a4db0cb7dce5f486 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Mar 2024 13:48:08 -0400 Subject: [PATCH 011/305] Tweak msging tests to match codec api changes Mostly adjusting input args/logic to various spec/codec signatures and new runtime semantics: - `test_msg_spec_xor_pld_spec()` to verify that a shuttle prot spec and payload spec are necessarily mutex and that `mk_codec()` enforces it. - switch to `ipc_msg_spec` input in `mk_custom_codec()` helper. - drop buncha commented cruft from `test_limit_msgspec()` including no longer needed type union instance checks in dunder attributes. --- tests/test_caps_based_msging.py | 100 +++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 34 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index b101c1e0..98ab7fa3 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -1,5 +1,6 @@ ''' -Functional audits for our "capability based messaging (schema)" feats. +Low-level functional audits for our +"capability based messaging"-spec feats. B~) @@ -22,6 +23,7 @@ from msgspec import ( Struct, ValidationError, ) +import pytest import tractor from tractor.msg import ( _def_msgspec_codec, @@ -34,13 +36,29 @@ from tractor.msg import ( current_msgspec_codec, ) from tractor.msg.types import ( - PayloadT, + # PayloadT, Msg, # Started, mk_msg_spec, ) import trio + +def test_msg_spec_xor_pld_spec(): + ''' + If the `.msg.types.Msg`-set is overridden, we + can't also support a `Msg.pld` spec. + + ''' + # apply custom hooks and set a `Decoder` which only + # loads `NamespacePath` types. + with pytest.raises(RuntimeError): + mk_codec( + ipc_msg_spec=Any, + ipc_pld_spec=NamespacePath, + ) + + # TODO: wrap these into `._codec` such that user can just pass # a type table of some sort? def enc_hook(obj: Any) -> Any: @@ -66,11 +84,13 @@ def ex_func(*args): print(f'ex_func({args})') -def mk_custom_codec() -> MsgCodec: +def mk_custom_codec( + ipc_msg_spec: Type[Any] = Any, +) -> MsgCodec: # apply custom hooks and set a `Decoder` which only # loads `NamespacePath` types. nsp_codec: MsgCodec = mk_codec( - ipc_msg_spec=NamespacePath, + ipc_msg_spec=ipc_msg_spec, enc_hook=enc_hook, dec_hook=dec_hook, ) @@ -225,16 +245,9 @@ def chk_pld_type( pld_val_type: Type = type(pld) # gen_paramed: _GenericAlias = generic[payload_type] - # TODO: verify that the overridden subtypes - # DO NOT have modified type-annots from original! - # 'Start', .pld: FuncSpec - # 'StartAck', .pld: IpcCtxSpec - # 'Stop', .pld: UNSEt - # 'Error', .pld: ErrorData # for typedef in ( # [gen_paramed] # + - # # type-var should always be set for these sub-types # # as well! # Msg.__subclasses__() @@ -246,56 +259,75 @@ def chk_pld_type( # 'Return', # ]: # continue - # payload_type: Type[Struct] = CustomPayload - # TODO: can remove all this right!? - # - # when parameterized (like `Msg[Any]`) then - # we expect an alias as input. - # if isinstance(generic, _GenericAlias): - # assert payload_type in generic.__args__ - # else: - # assert PayloadType in generic.__parameters__ - # pld_param: Parameter = generic.__signature__.parameters['pld'] - # assert pld_param.annotation is PayloadType + # TODO: verify that the overridden subtypes + # DO NOT have modified type-annots from original! + # 'Start', .pld: FuncSpec + # 'StartAck', .pld: IpcCtxSpec + # 'Stop', .pld: UNSEt + # 'Error', .pld: ErrorData - type_spec: Union[Type[Struct]] + + pld_type_spec: Union[Type[Struct]] msg_types: list[Msg[payload_type]] + + # make a one-off dec to compare with our `MsgCodec` instance + # which does the below `mk_msg_spec()` call internally ( - type_spec, + pld_type_spec, msg_types, ) = mk_msg_spec( - payload_type=payload_type, + payload_type_union=payload_type, ) enc = msgpack.Encoder() dec = msgpack.Decoder( - type=type_spec, # like `Msg[Any]` + type=pld_type_spec or Any, # like `Msg[Any]` + ) + + codec: MsgCodec = mk_codec( + # NOTE: this ONLY accepts `Msg.pld` fields of a specified + # type union. + ipc_pld_spec=payload_type, + ) + + # assert codec.dec == dec + # XXX-^ not sure why these aren't "equal" but when cast + # to `str` they seem to match ?? .. kk + assert ( + str(pld_type_spec) + == + str(codec.ipc_pld_spec) + == + str(dec.type) + == + str(codec.dec.type) ) # verify the boxed-type for all variable payload-type msgs. for typedef in msg_types: pld_field = structs.fields(typedef)[1] - assert pld_field.type in {payload_type, PayloadT} - # TODO: does this need to work to get all subtypes to - # adhere? assert pld_field.type is payload_type + # TODO-^ does this need to work to get all subtypes to adhere? kwargs: dict[str, Any] = { 'cid': '666', 'pld': pld, } - enc_msg = typedef(**kwargs) + enc_msg: Msg = typedef(**kwargs) - wire_bytes: bytes = enc.encode(enc_msg) + wire_bytes: bytes = codec.enc.encode(enc_msg) + _wire_bytes: bytes = enc.encode(enc_msg) try: - dec_msg = dec.decode(wire_bytes) + _dec_msg = dec.decode(wire_bytes) + dec_msg = codec.dec.decode(wire_bytes) + assert dec_msg.pld == pld - assert (roundtrip := (dec_msg == enc_msg)) + assert _dec_msg.pld == pld + assert (roundtrip := (_dec_msg == enc_msg)) except ValidationError as ve: - # breakpoint() if pld_val_type is payload_type: raise ValueError( 'Got `ValidationError` despite type-var match!?\n' -- 2.34.1 From 8d716f2113096d875517c6e317120f206c212ffc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 29 Mar 2024 18:46:37 -0400 Subject: [PATCH 012/305] Be mega pedantic with msg-spec building Turns out the generics based payload speccing API, as in https://jcristharif.com/msgspec/supported-types.html#generic-types, DOES WORK properly as long as we don't rely on inheritance from `Msg` a parent `Generic`.. So let's get real pedantic in the `mk_msg_spec()` internals as well as verification in the test suite! Fixes in `.msg.types`: - implement (as part of tinker testing) multiple spec union building methods via a `spec_build_method: str` to `mk_msg_spec()` and leave a buncha notes around what did and didn't work: - 'indexed_generics' is the only method THAT WORKS and the one that you'd expect being closest to the `msgspec` docs (link above). - 'defstruct' using dynamically defined msgs => doesn't work! - 'types_new_class' using dynamically defined msgs but with `types.new_clas()` => ALSO doesn't work.. - explicitly separate the `.pld` type-constrainable by user code msg set into `types._payload_spec_msgs` putting the others in a `types._runtime_spec_msgs` and the full set defined as `.__spec__` (moving it out of the pkg-mod and back to `.types` as well). - for the `_payload_spec_msgs` msgs manually make them inherit `Generic[PayloadT]` and (redunantly) define a `.pld: PayloadT` field. - make `IpcCtxSpec.functype` an in line `Literal`. - toss in some TODO notes about choosing a better `Msg.cid` type. Fixes/tweaks around `.msg._codec`: - rename `MsgCodec.ipc/pld_msg_spec` -> `.msg/pld_spec` - make `._enc/._dec` non optional fields - wow, ^facepalm^ , make sure `._ipc.MsgpackTCPStream.__init__()` uses `mk_codec()` since `MsgCodec` can't be (easily) constructed directly. Get more detailed in testing: - inside the `chk_pld_type()` helper ensure `roundtrip` is always set to some value, `None` by default but a bool depending on legit outcome. - drop input `generic`; no longer used. - drop the masked `typedef` loop from `Msg.__subclasses__()`. - for add an `expect_roundtrip: bool` and use to jump into debugger when any expectation doesn't match the outcome. - use new `MsgCodec` field names (as per first section above). - ensure the encoded msg matches the decoded one from both the ad-hoc decoder and codec loaded values. - ensure the pld checking is only applied to msgs in the `types._payload_spec_msgs` set by `typef.__name__` filtering since `mk_msg_spec()` now returns the full `.types.Msg` set. --- tests/test_caps_based_msging.py | 150 +++++++++++++--------- tractor/_ipc.py | 3 +- tractor/msg/__init__.py | 38 ++---- tractor/msg/_codec.py | 32 +++-- tractor/msg/types.py | 220 ++++++++++++++++++++++++-------- 5 files changed, 289 insertions(+), 154 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 98ab7fa3..abdda0a5 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -35,6 +35,7 @@ from tractor.msg import ( apply_codec, current_msgspec_codec, ) +from tractor.msg import types from tractor.msg.types import ( # PayloadT, Msg, @@ -235,31 +236,15 @@ def test_codec_hooks_mod(): def chk_pld_type( - generic: Msg|_GenericAlias, - payload_type: Type[Struct]|Any, + payload_spec: Type[Struct]|Any, pld: Any, + expect_roundtrip: bool|None = None, + ) -> bool: - roundtrip: bool = False pld_val_type: Type = type(pld) - # gen_paramed: _GenericAlias = generic[payload_type] - # for typedef in ( - # [gen_paramed] - # + - # # type-var should always be set for these sub-types - # # as well! - # Msg.__subclasses__() - # ): - # if typedef.__name__ not in [ - # 'Msg', - # 'Started', - # 'Yield', - # 'Return', - # ]: - # continue - # TODO: verify that the overridden subtypes # DO NOT have modified type-annots from original! # 'Start', .pld: FuncSpec @@ -267,48 +252,64 @@ def chk_pld_type( # 'Stop', .pld: UNSEt # 'Error', .pld: ErrorData - - pld_type_spec: Union[Type[Struct]] - msg_types: list[Msg[payload_type]] - - # make a one-off dec to compare with our `MsgCodec` instance - # which does the below `mk_msg_spec()` call internally - ( - pld_type_spec, - msg_types, - ) = mk_msg_spec( - payload_type_union=payload_type, - ) - enc = msgpack.Encoder() - dec = msgpack.Decoder( - type=pld_type_spec or Any, # like `Msg[Any]` - ) - codec: MsgCodec = mk_codec( # NOTE: this ONLY accepts `Msg.pld` fields of a specified # type union. - ipc_pld_spec=payload_type, + ipc_pld_spec=payload_spec, + ) + + # make a one-off dec to compare with our `MsgCodec` instance + # which does the below `mk_msg_spec()` call internally + ipc_msg_spec: Union[Type[Struct]] + msg_types: list[Msg[payload_spec]] + ( + ipc_msg_spec, + msg_types, + ) = mk_msg_spec( + payload_type_union=payload_spec, + ) + _enc = msgpack.Encoder() + _dec = msgpack.Decoder( + type=ipc_msg_spec or Any, # like `Msg[Any]` + ) + + assert ( + payload_spec + == + codec.pld_spec ) # assert codec.dec == dec - # XXX-^ not sure why these aren't "equal" but when cast + # + # ^-XXX-^ not sure why these aren't "equal" but when cast # to `str` they seem to match ?? .. kk + assert ( - str(pld_type_spec) + str(ipc_msg_spec) == - str(codec.ipc_pld_spec) + str(codec.msg_spec) == - str(dec.type) + str(_dec.type) == str(codec.dec.type) ) # verify the boxed-type for all variable payload-type msgs. + if not msg_types: + breakpoint() + + roundtrip: bool|None = None + pld_spec_msg_names: list[str] = [ + td.__name__ for td in types._payload_spec_msgs + ] for typedef in msg_types: + skip_runtime_msg: bool = typedef.__name__ not in pld_spec_msg_names + if skip_runtime_msg: + continue + pld_field = structs.fields(typedef)[1] - assert pld_field.type is payload_type - # TODO-^ does this need to work to get all subtypes to adhere? + assert pld_field.type is payload_spec # TODO-^ does this need to work to get all subtypes to adhere? kwargs: dict[str, Any] = { 'cid': '666', @@ -316,44 +317,72 @@ def chk_pld_type( } enc_msg: Msg = typedef(**kwargs) + _wire_bytes: bytes = _enc.encode(enc_msg) wire_bytes: bytes = codec.enc.encode(enc_msg) - _wire_bytes: bytes = enc.encode(enc_msg) + assert _wire_bytes == wire_bytes + ve: ValidationError|None = None try: - _dec_msg = dec.decode(wire_bytes) dec_msg = codec.dec.decode(wire_bytes) + _dec_msg = _dec.decode(wire_bytes) - assert dec_msg.pld == pld - assert _dec_msg.pld == pld - assert (roundtrip := (_dec_msg == enc_msg)) + # decoded msg and thus payload should be exactly same! + assert (roundtrip := ( + _dec_msg + == + dec_msg + == + enc_msg + )) - except ValidationError as ve: - if pld_val_type is payload_type: + if ( + expect_roundtrip is not None + and expect_roundtrip != roundtrip + ): + breakpoint() + + assert ( + pld + == + dec_msg.pld + == + enc_msg.pld + ) + # assert (roundtrip := (_dec_msg == enc_msg)) + + except ValidationError as _ve: + ve = _ve + roundtrip: bool = False + if pld_val_type is payload_spec: raise ValueError( 'Got `ValidationError` despite type-var match!?\n' f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_type}\n' + f'payload_type: {payload_spec}\n' ) from ve else: # ow we good cuz the pld spec mismatched. print( 'Got expected `ValidationError` since,\n' - f'{pld_val_type} is not {payload_type}\n' + f'{pld_val_type} is not {payload_spec}\n' ) else: if ( - pld_val_type is not payload_type - and payload_type is not Any + payload_spec is not Any + and + pld_val_type is not payload_spec ): raise ValueError( 'DID NOT `ValidationError` despite expected type match!?\n' f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_type}\n' + f'payload_type: {payload_spec}\n' ) - return roundtrip + # full code decode should always be attempted! + if roundtrip is None: + breakpoint() + return roundtrip def test_limit_msgspec(): @@ -365,9 +394,10 @@ def test_limit_msgspec(): # ensure we can round-trip a boxing `Msg` assert chk_pld_type( - Msg, + # Msg, Any, None, + expect_roundtrip=True, ) # TODO: don't need this any more right since @@ -379,7 +409,7 @@ def test_limit_msgspec(): # verify that a mis-typed payload value won't decode assert not chk_pld_type( - Msg, + # Msg, int, pld='doggy', ) @@ -392,13 +422,13 @@ def test_limit_msgspec(): value: Any assert not chk_pld_type( - Msg, + # Msg, CustomPayload, pld='doggy', ) assert chk_pld_type( - Msg, + # Msg, CustomPayload, pld=CustomPayload(name='doggy', value='urmom') ) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index b1c2ccd2..5f71c38c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -48,6 +48,7 @@ from tractor._exceptions import TransportClosed from tractor.msg import ( _ctxvar_MsgCodec, MsgCodec, + mk_codec, ) log = get_logger(__name__) @@ -162,7 +163,7 @@ class MsgpackTCPStream(MsgTransport): # allow for custom IPC msg interchange format # dynamic override Bo - self.codec: MsgCodec = codec or MsgCodec() + self.codec: MsgCodec = codec or mk_codec() async def _iter_packets(self) -> AsyncGenerator[dict, None]: ''' diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index a93fa888..0c8809a9 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -37,36 +37,20 @@ from ._codec import ( from .types import ( Msg as Msg, - Start, # with pld + Start as Start, # with pld FuncSpec as FuncSpec, - StartAck, # with pld + StartAck as StartAck, # with pld IpcCtxSpec as IpcCtxSpec, - Started, - Yield, - Stop, - Return, + Started as Started, + Yield as Yield, + Stop as Stop, + Return as Return, - Error, # with pld - ErrorData as ErrorData + Error as Error, # with pld + ErrorData as ErrorData, + + # full msg spec set + __spec__ as __spec__, ) - - -# built-in SC shuttle protocol msg type set in -# approx order of the IPC txn-state spaces. -__spec__: list[Msg] = [ - - # inter-actor RPC initiation - Start, - StartAck, - - # no-outcome-yet IAC (inter-actor-communication) - Started, - Yield, - Stop, - - # termination outcomes - Return, - Error, -] diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e6cb4f1f..4477d393 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -73,16 +73,15 @@ class MsgCodec(Struct): A IPC msg interchange format lib's encoder + decoder pair. ''' - # post-configure-cached when prop-accessed (see `mk_codec()` - # OR can be passed directly as, - # `MsgCodec(_enc=, _dec=)` - _enc: msgpack.Encoder|None = None - _dec: msgpack.Decoder|None = None + _enc: msgpack.Encoder + _dec: msgpack.Decoder + + pld_spec: Union[Type[Struct]]|None # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions @property - def ipc_pld_spec(self) -> Union[Type[Struct]]: + def msg_spec(self) -> Union[Type[Struct]]: return self._dec.type lib: ModuleType = msgspec @@ -142,6 +141,7 @@ class MsgCodec(Struct): determined by the ''' + # https://jcristharif.com/msgspec/usage.html#typed-decoding return self._dec.decode(msg) # TODO: do we still want to try and support the sub-decoder with @@ -149,6 +149,7 @@ class MsgCodec(Struct): # future grief? # # -[ ] + # -> https://jcristharif.com/msgspec/api.html#raw # #def mk_pld_subdec( # self, @@ -224,6 +225,20 @@ class MsgCodec(Struct): # return codec.enc.encode(msg) + +# TODO: sub-decoded `Raw` fields? +# -[ ] see `MsgCodec._payload_decs` notes +# +# XXX if we wanted something more complex then field name str-keys +# we might need a header field type to describe the lookup sys? +# class Header(Struct, tag=True): +# ''' +# A msg header which defines payload properties + +# ''' +# payload_tag: str|None = None + + #def mk_tagged_union_dec( # tagged_structs: list[Struct], @@ -345,10 +360,6 @@ def mk_codec( assert len(ipc_msg_spec.__args__) == len(msg_types) assert ipc_msg_spec - dec = msgpack.Decoder( - type=ipc_msg_spec, # like `Msg[Any]` - ) - else: ipc_msg_spec = ipc_msg_spec or Any @@ -363,6 +374,7 @@ def mk_codec( codec = MsgCodec( _enc=enc, _dec=dec, + pld_spec=ipc_pld_spec, # payload_msg_specs=payload_msg_specs, # **kwargs, ) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 7d64e766..2411f0f9 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -34,20 +34,13 @@ from typing import ( ) from msgspec import ( + defstruct, + # field, Struct, UNSET, + UnsetType, ) -# TODO: sub-decoded `Raw` fields? -# -[ ] see `MsgCodec._payload_decs` notes -# -# class Header(Struct, tag=True): -# ''' -# A msg header which defines payload properties - -# ''' -# payload_tag: str|None = None - # type variable for the boxed payload field `.pld` PayloadT = TypeVar('PayloadT') @@ -57,6 +50,9 @@ class Msg( Generic[PayloadT], tag=True, tag_field='msg_type', + + # eq=True, + # order=True, ): ''' The "god" boxing msg type. @@ -66,8 +62,13 @@ class Msg( tree. ''' - # TODO: use UNSET here? cid: str|None # call/context-id + # ^-TODO-^: more explicit type? + # -[ ] use UNSET here? + # https://jcristharif.com/msgspec/supported-types.html#unset + # + # -[ ] `uuid.UUID` which has multi-protocol support + # https://jcristharif.com/msgspec/supported-types.html#uuid # The msgs "payload" (spelled without vowels): # https://en.wikipedia.org/wiki/Payload_(computing) @@ -136,19 +137,18 @@ class Start( pld: FuncSpec -FuncType: Literal[ - 'asyncfunc', - 'asyncgen', - 'context', # TODO: the only one eventually? -] = 'context' - - class IpcCtxSpec(Struct): ''' An inter-actor-`trio.Task`-comms `Context` spec. ''' - functype: FuncType + # TODO: maybe better names for all these? + # -[ ] obvi ^ would need sync with `._rpc` + functype: Literal[ + 'asyncfunc', + 'asyncgen', + 'context', # TODO: the only one eventually? + ] # TODO: as part of the reponse we should report our allowed # msg spec which should be generated from the type-annots as @@ -182,6 +182,7 @@ class Started( decorated IPC endpoint. ''' + pld: PayloadT # TODO: instead of using our existing `Start` @@ -198,6 +199,7 @@ class Yield( Per IPC transmission of a value from `await MsgStream.send()`. ''' + pld: PayloadT class Stop(Msg): @@ -206,7 +208,7 @@ class Stop(Msg): of `StopAsyncIteration`. ''' - pld: UNSET + pld: UnsetType = UNSET class Return( @@ -218,6 +220,7 @@ class Return( func-as-`trio.Task`. ''' + pld: PayloadT class ErrorData(Struct): @@ -258,13 +261,47 @@ class Error(Msg): # cid: str +# built-in SC shuttle protocol msg type set in +# approx order of the IPC txn-state spaces. +__spec__: list[Msg] = [ + + # inter-actor RPC initiation + Start, + StartAck, + + # no-outcome-yet IAC (inter-actor-communication) + Started, + Yield, + Stop, + + # termination outcomes + Return, + Error, +] + +_runtime_spec_msgs: list[Msg] = [ + Start, + StartAck, + Stop, + Error, +] +_payload_spec_msgs: list[Msg] = [ + Started, + Yield, + Return, +] + + def mk_msg_spec( payload_type_union: Union[Type] = Any, - boxing_msg_set: set[Msg] = { - Started, - Yield, - Return, - }, + + # boxing_msg_set: list[Msg] = _payload_spec_msgs, + spec_build_method: Literal[ + 'indexed_generics', # works + 'defstruct', + 'types_new_class', + + ] = 'indexed_generics', ) -> tuple[ Union[Type[Msg]], @@ -281,26 +318,58 @@ def mk_msg_spec( ''' submsg_types: list[Type[Msg]] = Msg.__subclasses__() + bases: tuple = ( + # XXX NOTE XXX the below generic-parameterization seems to + # be THE ONLY way to get this to work correctly in terms + # of getting ValidationError on a roundtrip? + Msg[payload_type_union], + Generic[PayloadT], + ) + defstruct_bases: tuple = ( + Msg, # [payload_type_union], + # Generic[PayloadT], + # ^-XXX-^: not allowed? lul.. + ) + ipc_msg_types: list[Msg] = [] - # TODO: see below as well, - # => union building approach with `.__class_getitem__()` - # doesn't seem to work..? - # - # payload_type_spec: Union[Type[Msg]] - # - msg_types: list[Msg] = [] - for msgtype in boxing_msg_set: + idx_msg_types: list[Msg] = [] + defs_msg_types: list[Msg] = [] + nc_msg_types: list[Msg] = [] + + for msgtype in __spec__: + + # for the NON-payload (user api) type specify-able + # msgs types, we simply aggregate the def as is + # for inclusion in the output type `Union`. + if msgtype not in _payload_spec_msgs: + ipc_msg_types.append(msgtype) + continue # check inheritance sanity assert msgtype in submsg_types # TODO: wait why do we need the dynamic version here? - # -[ ] paraming the `PayloadT` values via `Generic[T]` - # doesn't seem to work at all? - # -[ ] is there a way to get it to work at module level - # just using inheritance or maybe a metaclass? + # XXX ANSWER XXX -> BC INHERITANCE.. don't work w generics.. # - # index_paramed_msg_type: Msg = msgtype[payload_type_union] + # NOTE previously bc msgtypes WERE NOT inheritting + # directly the `Generic[PayloadT]` type, the manual method + # of generic-paraming with `.__class_getitem__()` wasn't + # working.. + # + # XXX but bc i changed that to make every subtype inherit + # it, this manual "indexed parameterization" method seems + # to work? + # + # -[x] paraming the `PayloadT` values via `Generic[T]` + # does work it seems but WITHOUT inheritance of generics + # + # -[-] is there a way to get it to work at module level + # just using inheritance or maybe a metaclass? + # => thot that `defstruct` might work, but NOPE, see + # below.. + # + idxed_msg_type: Msg = msgtype[payload_type_union] + idx_msg_types.append(idxed_msg_type) # TODO: WHY do we need to dynamically generate the # subtype-msgs here to ensure the `.pld` parameterization @@ -308,30 +377,69 @@ def mk_msg_spec( # `msgpack.Decoder()`..? # # dynamically create the payload type-spec-limited msg set. - manual_paramed_msg_subtype: Type = types.new_class( - msgtype.__name__, - ( - # XXX NOTE XXX this seems to be THE ONLY - # way to get this to work correctly!?! - Msg[payload_type_union], - Generic[PayloadT], - ), - {}, + newclass_msgtype: Type = types.new_class( + name=msgtype.__name__, + bases=bases, + kwds={}, + ) + nc_msg_types.append( + newclass_msgtype[payload_type_union] ) - # TODO: grok the diff here better.. + # with `msgspec.structs.defstruct` + # XXX ALSO DOESN'T WORK + defstruct_msgtype = defstruct( + name=msgtype.__name__, + fields=[ + ('cid', str), + + # XXX doesn't seem to work.. + # ('pld', PayloadT), + + ('pld', payload_type_union), + ], + bases=defstruct_bases, + ) + defs_msg_types.append(defstruct_msgtype) + # assert index_paramed_msg_type == manual_paramed_msg_subtype - # XXX TODO: why does the manual method work but not the - # `.__class_getitem__()` one!?! - paramed_msg_type = manual_paramed_msg_subtype + # paramed_msg_type = manual_paramed_msg_subtype - # payload_type_spec |= paramed_msg_type - msg_types.append(paramed_msg_type) + # ipc_payload_msgs_type_union |= index_paramed_msg_type + idx_spec: Union[Type[Msg]] = Union[*idx_msg_types] + def_spec: Union[Type[Msg]] = Union[*defs_msg_types] + nc_spec: Union[Type[Msg]] = Union[*nc_msg_types] + + specs: dict[str, Union[Type[Msg]]] = { + 'indexed_generics': idx_spec, + 'defstruct': def_spec, + 'types_new_class': nc_spec, + } + msgtypes_table: dict[str, list[Msg]] = { + 'indexed_generics': idx_msg_types, + 'defstruct': defs_msg_types, + 'types_new_class': nc_msg_types, + } + + # XXX lol apparently type unions can't ever + # be equal eh? + # TODO: grok the diff here better.. + # + # assert ( + # idx_spec + # == + # nc_spec + # == + # def_spec + # ) + # breakpoint() + + pld_spec: Union[Type] = specs[spec_build_method] + runtime_spec: Union[Type] = Union[*ipc_msg_types] - payload_type_spec: Union[Type[Msg]] = Union[*msg_types] return ( - payload_type_spec, - msg_types, + pld_spec | runtime_spec, + msgtypes_table[spec_build_method] + ipc_msg_types, ) -- 2.34.1 From 405c2a27e6d61169996e5d4ee7b560b8b2108b5f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 09:21:30 -0400 Subject: [PATCH 013/305] Get msg spec type limiting working with a `RunVar` Since `contextvars.ContextVar` seems to reset to the default in every new task, switching to using `trio.lowlevel.RunVar` kinda gets close to what we'd like where a child scope can override what's in the rent but ideally without modifying the rent's. I tried `tricycle.TreeVar` as well but it also seems to reset across (embedded) nurseries in our runtime; need to try it again bc apparently that's not how it's suppose to work? NOTE that for now i'm keeping the `.msg.types._ctxvar_MsgCodec` set to the `msgspec` default (`Any` types) so that the test suite will still pass until the runtime is ported to the new msg-spec + codec. Surrounding and in support of all this the `Msg`-set impl deats changed a bit as well as various stuff in `.msg` sub-mods: - drop the `.pld` struct types for `Error`, `Start`, `StartAck` since we don't really need the `.pld` payload field in those cases since they're runtime control msgs for starting RPC tasks and handling remote errors; we can just put the fields directly on each msg since the user will never want/need to override the `.pld` field type. - add a couple new runtime msgs and include them in `msg.__spec__` and make them NOT inherit from `Msg` since they are runtime-specific and thus have no need for `.pld` type constraints: - `Aid` the actor-id identity handshake msg. - `SpawnSpec`: the spawn data passed from a parent actor down to a a child in `Actor._from_parent()` for which we need a shuttle protocol msg, so might as well make it a pendatic one ;) - fix some `Actor.uid` field types that were type-borked on `Error` - add notes about how we need built-in `debug_mode` msgs in order to avoid msg-type errors when using the TTY lock machinery and a different `.pld` spec then the default `Any` is in use.. -> since `devx._debug.lock_tty_for_child()` and it's client side `wait_for_parent_stdin_hijack()` use `Context.started('Locked')` and `MsgStream.send('pdb_unlock')` string values as their `.pld` contents we'd need to either always do a `ipc_pld_spec | str` or pre-define some dedicated `Msg` types which get `Union`-ed in for this? - break out `msg.pretty_struct.Struct._sin_props()` into a helper func `iter_fields()` since the impl doesn't require a struct instance. - as mentioned above since `ContextVar` didn't work as anticipated I next tried `tricycle.TreeVar` but that too didn't seem to keep the `apply_codec()` setting intact across `Portal.open_context()`/`Context.open_stream()` (it kept reverting to the default `.pld: Any` default setting) so I finalized on a trio.lowlevel.RunVar` for now despite it basically being a `global`.. -> will probably come back to test this with `TreeVar` and some hot tips i picked up from @mikenerone in the `trio` gitter, which i put in comments surrounding proto-code. --- tractor/msg/__init__.py | 13 +- tractor/msg/_codec.py | 95 ++++++++++-- tractor/msg/pretty_struct.py | 43 +++--- tractor/msg/types.py | 270 +++++++++++++++++++++++++++++------ 4 files changed, 337 insertions(+), 84 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 0c8809a9..d8f37477 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -31,25 +31,24 @@ from ._codec import ( apply_codec as apply_codec, mk_codec as mk_codec, MsgCodec as MsgCodec, - current_msgspec_codec as current_msgspec_codec, + current_codec as current_codec, ) from .types import ( Msg as Msg, - Start as Start, # with pld - FuncSpec as FuncSpec, + Aid as Aid, + SpawnSpec as SpawnSpec, - StartAck as StartAck, # with pld - IpcCtxSpec as IpcCtxSpec, + Start as Start, + StartAck as StartAck, Started as Started, Yield as Yield, Stop as Stop, Return as Return, - Error as Error, # with pld - ErrorData as ErrorData, + Error as Error, # full msg spec set __spec__ as __spec__, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 4477d393..32a58a56 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -30,13 +30,13 @@ ToDo: backends we prolly should offer: ''' from __future__ import annotations -from contextvars import ( - ContextVar, - Token, -) from contextlib import ( contextmanager as cm, ) +# from contextvars import ( +# ContextVar, +# Token, +# ) from typing import ( Any, Callable, @@ -47,6 +47,12 @@ from types import ModuleType import msgspec from msgspec import msgpack +from trio.lowlevel import ( + RunVar, + RunVarToken, +) +# TODO: see notes below from @mikenerone.. +# from tricycle import TreeVar from tractor.msg.pretty_struct import Struct from tractor.msg.types import ( @@ -72,6 +78,9 @@ class MsgCodec(Struct): ''' A IPC msg interchange format lib's encoder + decoder pair. + Pretty much nothing more then delegation to underlying + `msgspec..Encoder/Decoder`s for now. + ''' _enc: msgpack.Encoder _dec: msgpack.Decoder @@ -86,11 +95,6 @@ class MsgCodec(Struct): lib: ModuleType = msgspec - # ad-hoc type extensions - # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types - enc_hook: Callable[[Any], Any]|None = None # coder - dec_hook: Callable[[type, Any], Any]|None = None # decoder - # TODO: a sub-decoder system as well? # payload_msg_specs: Union[Type[Struct]] = Any # see related comments in `.msg.types` @@ -304,7 +308,8 @@ def mk_codec( libname: str = 'msgspec', - # proxy as `Struct(**kwargs)` + # proxy as `Struct(**kwargs)` for ad-hoc type extensions + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types # ------ - ------ dec_hook: Callable|None = None, enc_hook: Callable|None = None, @@ -389,14 +394,52 @@ def mk_codec( # no custom structs, hooks or other special types. _def_msgspec_codec: MsgCodec = mk_codec(ipc_msg_spec=Any) -# NOTE: provides for per-`trio.Task` specificity of the +# The built-in IPC `Msg` spec. +# Our composing "shuttle" protocol which allows `tractor`-app code +# to use any `msgspec` supported type as the `Msg.pld` payload, +# https://jcristharif.com/msgspec/supported-types.html +# +_def_tractor_codec: MsgCodec = mk_codec( + ipc_pld_spec=Any, +) +# TODO: IDEALLY provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing # `Channel.send()/.recv()` of wire data. -_ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( + +# ContextVar-TODO: DIDN'T WORK, kept resetting in every new task to default!? +# _ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( + +# TreeVar-TODO: DIDN'T WORK, kept resetting in every new embedded nursery +# even though it's supposed to inherit from a parent context ??? +# +# _ctxvar_MsgCodec: TreeVar[MsgCodec] = TreeVar( +# +# ^-NOTE-^: for this to work see the mods by @mikenerone from `trio` gitter: +# +# 22:02:54 even for regular contextvars, all you have to do is: +# `task: Task = trio.lowlevel.current_task()` +# `task.parent_nursery.parent_task.context.run(my_ctx_var.set, new_value)` +# +# From a comment in his prop code he couldn't share outright: +# 1. For every TreeVar set in the current task (which covers what +# we need from SynchronizerFacade), walk up the tree until the +# root or finding one where the TreeVar is already set, setting +# it in all of the contexts along the way. +# 2. For each of those, we also forcibly set the values that are +# pending for child nurseries that have not yet accessed the +# TreeVar. +# 3. We similarly set the pending values for the child nurseries +# of the *current* task. +# + +# TODO: STOP USING THIS, since it's basically a global and won't +# allow sub-IPC-ctxs to limit the msg-spec however desired.. +_ctxvar_MsgCodec: MsgCodec = RunVar( 'msgspec_codec', # TODO: move this to our new `Msg`-spec! default=_def_msgspec_codec, + # default=_def_tractor_codec, ) @@ -410,15 +453,36 @@ def apply_codec( runtime context such that all IPC msgs are processed with it for that task. + Uses a `tricycle.TreeVar` to ensure the scope of the codec + matches the `@cm` block and DOES NOT change to the original + (default) value in new tasks (as it does for `ContextVar`). + + See the docs: + - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables + - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py + ''' - token: Token = _ctxvar_MsgCodec.set(codec) + orig: MsgCodec = _ctxvar_MsgCodec.get() + assert orig is not codec + token: RunVarToken = _ctxvar_MsgCodec.set(codec) + + # TODO: for TreeVar approach, see docs for @cm `.being()` API: + # https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables + # try: + # with _ctxvar_MsgCodec.being(codec): + # new = _ctxvar_MsgCodec.get() + # assert new is codec + # yield codec + try: yield _ctxvar_MsgCodec.get() finally: _ctxvar_MsgCodec.reset(token) + assert _ctxvar_MsgCodec.get() is orig -def current_msgspec_codec() -> MsgCodec: + +def current_codec() -> MsgCodec: ''' Return the current `trio.Task.context`'s value for `msgspec_codec` used by `Channel.send/.recv()` @@ -449,5 +513,6 @@ def limit_msg_spec( payload_types=payload_types, **codec_kwargs, ) - with apply_codec(msgspec_codec): + with apply_codec(msgspec_codec) as applied_codec: + assert applied_codec is msgspec_codec yield msgspec_codec diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index 143fc7a4..412b6ed6 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -80,6 +80,28 @@ class DiffDump(UserList): return repstr +def iter_fields(struct: Struct) -> Iterator[ + tuple[ + structs.FieldIinfo, + str, + Any, + ] +]: + ''' + Iterate over all non-@property fields of this struct. + + ''' + fi: structs.FieldInfo + for fi in structs.fields(struct): + key: str = fi.name + val: Any = getattr(struct, key) + yield ( + fi, + key, + val, + ) + + class Struct( _Struct, @@ -91,23 +113,6 @@ class Struct( A "human friendlier" (aka repl buddy) struct subtype. ''' - def _sin_props(self) -> Iterator[ - tuple[ - structs.FieldIinfo, - str, - Any, - ] - ]: - ''' - Iterate over all non-@property fields of this struct. - - ''' - fi: structs.FieldInfo - for fi in structs.fields(self): - key: str = fi.name - val: Any = getattr(self, key) - yield fi, key, val - def to_dict( self, include_non_members: bool = True, @@ -130,7 +135,7 @@ class Struct( # added as type-defined `@property` methods! sin_props: dict = {} fi: structs.FieldInfo - for fi, k, v in self._sin_props(): + for fi, k, v in iter_fields(self): sin_props[k] = asdict[k] return sin_props @@ -159,7 +164,7 @@ class Struct( fi: structs.FieldInfo k: str v: Any - for fi, k, v in self._sin_props(): + for fi, k, v in iter_fields(self): # TODO: how can we prefer `Literal['option1', 'option2, # ..]` over .__name__ == `Literal` but still get only the diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 2411f0f9..a81473d7 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -26,6 +26,7 @@ from __future__ import annotations import types from typing import ( Any, + Callable, Generic, Literal, Type, @@ -37,8 +38,12 @@ from msgspec import ( defstruct, # field, Struct, - UNSET, - UnsetType, + # UNSET, + # UnsetType, +) + +from tractor.msg import ( + pretty_struct, ) # type variable for the boxed payload field `.pld` @@ -48,11 +53,19 @@ PayloadT = TypeVar('PayloadT') class Msg( Struct, Generic[PayloadT], + + # https://jcristharif.com/msgspec/structs.html#tagged-unions tag=True, tag_field='msg_type', - # eq=True, + # https://jcristharif.com/msgspec/structs.html#field-ordering + # kw_only=True, + + # https://jcristharif.com/msgspec/structs.html#equality-and-order # order=True, + + # https://jcristharif.com/msgspec/structs.html#encoding-decoding-as-arrays + # as_array=True, ): ''' The "god" boxing msg type. @@ -90,6 +103,53 @@ class Msg( pld: PayloadT +class Aid( + Struct, + tag=True, + tag_field='msg_type', +): + ''' + Actor-identity msg. + + Initial contact exchange enabling an actor "mailbox handshake" + delivering the peer identity (and maybe eventually contact) + info. + + Used by discovery protocol to register actors as well as + conduct the initial comms (capability) filtering. + + ''' + name: str + uuid: str + # TODO: use built-in support for UUIDs? + # -[ ] `uuid.UUID` which has multi-protocol support + # https://jcristharif.com/msgspec/supported-types.html#uuid + + +class SpawnSpec( + pretty_struct.Struct, + tag=True, + tag_field='msg_type', +): + ''' + Initial runtime spec handed down from a spawning parent to its + child subactor immediately following first contact via an + `Aid` msg. + + ''' + _parent_main_data: dict + _runtime_vars: dict[str, Any] + + # module import capability + enable_modules: dict[str, str] + + # TODO: not just sockaddr pairs? + # -[ ] abstract into a `TransportAddr` type? + reg_addrs: list[tuple[str, int]] + bind_addrs: list[tuple[str, int]] + + + # TODO: caps based RPC support in the payload? # # -[ ] integration with our ``enable_modules: list[str]`` caps sys. @@ -105,18 +165,31 @@ class Msg( # # -[ ] can we combine .ns + .func into a native `NamespacePath` field? # -# -[ ]better name, like `Call/TaskInput`? +# -[ ] better name, like `Call/TaskInput`? # -class FuncSpec(Struct): - ns: str - func: str - - kwargs: dict - uid: str # (calling) actor-id +# -[ ] XXX a debugger lock msg transaction with payloads like, +# child -> `.pld: DebugLock` -> root +# child <- `.pld: DebugLocked` <- root +# child -> `.pld: DebugRelease` -> root +# +# WHY => when a pld spec is provided it might not allow for +# debug mode msgs as they currently are (using plain old `pld. +# str` payloads) so we only when debug_mode=True we need to +# union in this debugger payload set? +# +# mk_msg_spec( +# MyPldSpec, +# debug_mode=True, +# ) -> ( +# Union[MyPldSpec] +# | Union[DebugLock, DebugLocked, DebugRelease] +# ) class Start( - Msg, + Struct, + tag=True, + tag_field='msg_type', ): ''' Initial request to remotely schedule an RPC `trio.Task` via @@ -134,14 +207,26 @@ class Start( - `Context.open_context()` ''' - pld: FuncSpec + cid: str + + ns: str + func: str + + kwargs: dict + uid: tuple[str, str] # (calling) actor-id -class IpcCtxSpec(Struct): +class StartAck( + Struct, + tag=True, + tag_field='msg_type', +): ''' - An inter-actor-`trio.Task`-comms `Context` spec. + Init response to a `Cmd` request indicating the far + end's RPC spec, namely its callable "type". ''' + cid: str # TODO: maybe better names for all these? # -[ ] obvi ^ would need sync with `._rpc` functype: Literal[ @@ -160,18 +245,6 @@ class IpcCtxSpec(Struct): # msgspec: MsgSpec -class StartAck( - Msg, - Generic[PayloadT], -): - ''' - Init response to a `Cmd` request indicating the far - end's RPC callable "type". - - ''' - pld: IpcCtxSpec - - class Started( Msg, Generic[PayloadT], @@ -202,13 +275,19 @@ class Yield( pld: PayloadT -class Stop(Msg): +class Stop( + Struct, + tag=True, + tag_field='msg_type', +): ''' Stream termination signal much like an IPC version of `StopAsyncIteration`. ''' - pld: UnsetType = UNSET + cid: str + # TODO: do we want to support a payload on stop? + # pld: UnsetType = UNSET class Return( @@ -223,32 +302,33 @@ class Return( pld: PayloadT -class ErrorData(Struct): +class Error( + Struct, + tag=True, + tag_field='msg_type', +): ''' - Remote actor error meta-data as needed originally by + A pkt that wraps `RemoteActorError`s for relay and raising. + + Fields are 1-to-1 meta-data as needed originally by `RemoteActorError.msgdata: dict`. ''' - src_uid: str + src_uid: tuple[str, str] src_type_str: str boxed_type_str: str - - relay_path: list[str] + relay_path: list[tuple[str, str]] tb_str: str + cid: str|None = None + + # TODO: use UNSET or don't include them via + # # `ContextCancelled` - canceller: str|None = None + canceller: tuple[str, str]|None = None # `StreamOverrun` - sender: str|None = None - - -class Error(Msg): - ''' - A pkt that wraps `RemoteActorError`s for relay. - - ''' - pld: ErrorData + sender: tuple[str, str]|None = None # TODO: should be make a msg version of `ContextCancelled?` @@ -265,6 +345,12 @@ class Error(Msg): # approx order of the IPC txn-state spaces. __spec__: list[Msg] = [ + # identity handshake + Aid, + + # spawn specification from parent + SpawnSpec, + # inter-actor RPC initiation Start, StartAck, @@ -280,6 +366,8 @@ __spec__: list[Msg] = [ ] _runtime_spec_msgs: list[Msg] = [ + Aid, + SpawnSpec, Start, StartAck, Stop, @@ -443,3 +531,99 @@ def mk_msg_spec( pld_spec | runtime_spec, msgtypes_table[spec_build_method] + ipc_msg_types, ) + + +# TODO: make something similar to this inside `._codec` such that +# user can just pass a type table of some sort? +# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: +# ''' +# Deliver a `enc_hook()`/`dec_hook()` pair which does +# manual convertion from our above native `Msg` set +# to `dict` equivalent (wire msgs) in order to keep legacy compat +# with the original runtime implementation. + +# Note: this is is/was primarly used while moving the core +# runtime over to using native `Msg`-struct types wherein we +# start with the send side emitting without loading +# a typed-decoder and then later flipping the switch over to +# load to the native struct types once all runtime usage has +# been adjusted appropriately. + +# ''' +# def enc_to_dict(msg: Any) -> Any: +# ''' +# Encode `Msg`-structs to `dict` msgs instead +# of using `msgspec.msgpack.Decoder.type`-ed +# features. + +# ''' +# match msg: +# case Start(): +# dctmsg: dict = pretty_struct.Struct.to_dict( +# msg +# )['pld'] + +# case Error(): +# dctmsg: dict = pretty_struct.Struct.to_dict( +# msg +# )['pld'] +# return {'error': dctmsg} + + +# def dec_from_dict( +# type: Type, +# obj: Any, +# ) -> Any: +# ''' +# Decode to `Msg`-structs from `dict` msgs instead +# of using `msgspec.msgpack.Decoder.type`-ed +# features. + +# ''' +# cid: str = obj.get('cid') +# match obj: +# case {'cmd': pld}: +# return Start( +# cid=cid, +# pld=pld, +# ) +# case {'functype': pld}: +# return StartAck( +# cid=cid, +# functype=pld, +# # pld=IpcCtxSpec( +# # functype=pld, +# # ), +# ) +# case {'started': pld}: +# return Started( +# cid=cid, +# pld=pld, +# ) +# case {'yield': pld}: +# return Yield( +# cid=obj['cid'], +# pld=pld, +# ) +# case {'stop': pld}: +# return Stop( +# cid=cid, +# ) +# case {'return': pld}: +# return Return( +# cid=cid, +# pld=pld, +# ) + +# case {'error': pld}: +# return Error( +# cid=cid, +# pld=ErrorData( +# **pld +# ), +# ) + +# return ( +# # enc_to_dict, +# dec_from_dict, +# ) -- 2.34.1 From b5bdd20eb566c8aca0e37fd4316c5fcf857f7afa Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 11:14:43 -0400 Subject: [PATCH 014/305] Get `test_codec_hooks_mod` working with `Msg`s Though the runtime hasn't been changed over in this patch (it was in the local index at the time however), the test does now demonstrate that using a `Started` the correctly typed `.pld` will codec correctly when passed manually to `MsgCodec.encode/decode()`. Despite not having the runtime ported to the new shuttle msg set (meaning the mentioned test will fail without the runtime port patch), I was able to get this first original test working that limits payload packets as a `Msg.pld: NamespacePath`this as long as we spec `enc/dec_hook()`s then the `Msg.pld` will be processed correctly as per: https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types in both the `Any` and `NamespacePath|None` spec cases. ^- turns out in this case -^ that the codec hooks only get invoked on the unknown-fields NOT the entire `Struct`-msg. A further gotcha was merging a `|None` into the `pld_spec` since this test spawns a subactor and opens a context via `send_back_nsp()` and that func has no explicit `return` - so of course it delivers a `Return(pld=None)` which will fail if we only spec `NamespacePath`. --- tests/test_caps_based_msging.py | 305 ++++++++++++++++++++++++-------- 1 file changed, 236 insertions(+), 69 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index abdda0a5..b42d9e35 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -7,7 +7,6 @@ B~) ''' from typing import ( Any, - _GenericAlias, Type, Union, ) @@ -26,20 +25,23 @@ from msgspec import ( import pytest import tractor from tractor.msg import ( - _def_msgspec_codec, + _codec, _ctxvar_MsgCodec, NamespacePath, MsgCodec, mk_codec, apply_codec, - current_msgspec_codec, + current_codec, ) -from tractor.msg import types +from tractor.msg import ( + types, +) +from tractor import _state from tractor.msg.types import ( # PayloadT, Msg, - # Started, + Started, mk_msg_spec, ) import trio @@ -60,56 +62,110 @@ def test_msg_spec_xor_pld_spec(): ) -# TODO: wrap these into `._codec` such that user can just pass -# a type table of some sort? -def enc_hook(obj: Any) -> Any: - if isinstance(obj, NamespacePath): - return str(obj) - else: - raise NotImplementedError( - f'Objects of type {type(obj)} are not supported' - ) - - -def dec_hook(type: Type, obj: Any) -> Any: - print(f'type is: {type}') - if type is NamespacePath: - return NamespacePath(obj) - else: - raise NotImplementedError( - f'Objects of type {type(obj)} are not supported' - ) - - def ex_func(*args): print(f'ex_func({args})') def mk_custom_codec( - ipc_msg_spec: Type[Any] = Any, -) -> MsgCodec: - # apply custom hooks and set a `Decoder` which only - # loads `NamespacePath` types. - nsp_codec: MsgCodec = mk_codec( - ipc_msg_spec=ipc_msg_spec, - enc_hook=enc_hook, - dec_hook=dec_hook, - ) + pld_spec: Union[Type]|Any, - # TODO: validate `MsgCodec` interface/semantics? - # -[ ] simple field tests to ensure caching + reset is workin? - # -[ ] custom / changing `.decoder()` calls? - # - # dec = nsp_codec.decoder( - # types=NamespacePath, - # ) - # assert nsp_codec.dec is dec +) -> MsgCodec: + ''' + Create custom `msgpack` enc/dec-hooks and set a `Decoder` + which only loads `NamespacePath` types. + + ''' + uid: tuple[str, str] = tractor.current_actor().uid + + # XXX NOTE XXX: despite defining `NamespacePath` as a type + # field on our `Msg.pld`, we still need a enc/dec_hook() pair + # to cast to/from that type on the wire. See the docs: + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + + def enc_nsp(obj: Any) -> Any: + match obj: + case NamespacePath(): + print( + f'{uid}: `NamespacePath`-Only ENCODE?\n' + f'type: {type(obj)}\n' + f'obj: {obj}\n' + ) + + return str(obj) + + logmsg: str = ( + f'{uid}: Encoding `{obj}: <{type(obj)}>` not supported' + f'type: {type(obj)}\n' + f'obj: {obj}\n' + ) + print(logmsg) + raise NotImplementedError(logmsg) + + def dec_nsp( + type: Type, + obj: Any, + + ) -> Any: + print( + f'{uid}: CUSTOM DECODE\n' + f'input type: {type}\n' + f'obj: {obj}\n' + f'type(obj): `{type(obj).__class__}`\n' + ) + nsp = None + + # This never seems to hit? + if isinstance(obj, Msg): + print(f'Msg type: {obj}') + + if ( + type is NamespacePath + and isinstance(obj, str) + and ':' in obj + ): + nsp = NamespacePath(obj) + + if nsp: + print(f'Returning NSP instance: {nsp}') + return nsp + + logmsg: str = ( + f'{uid}: Decoding `{obj}: <{type(obj)}>` not supported' + f'input type: {type(obj)}\n' + f'obj: {obj}\n' + f'type(obj): `{type(obj).__class__}`\n' + ) + print(logmsg) + raise NotImplementedError(logmsg) + + + nsp_codec: MsgCodec = mk_codec( + ipc_pld_spec=pld_spec, + + # NOTE XXX: the encode hook MUST be used no matter what since + # our `NamespacePath` is not any of a `Any` native type nor + # a `msgspec.Struct` subtype - so `msgspec` has no way to know + # how to encode it unless we provide the custom hook. + # + # AGAIN that is, regardless of whether we spec an + # `Any`-decoded-pld the enc has no knowledge (by default) + # how to enc `NamespacePath` (nsp), so we add a custom + # hook to do that ALWAYS. + enc_hook=enc_nsp, + + # XXX NOTE: pretty sure this is mutex with the `type=` to + # `Decoder`? so it won't work in tandem with the + # `ipc_pld_spec` passed above? + dec_hook=dec_nsp, + ) return nsp_codec @tractor.context async def send_back_nsp( - ctx: tractor.Context, + ctx: Context, + expect_debug: bool, + use_any_spec: bool, ) -> None: ''' @@ -117,28 +173,65 @@ async def send_back_nsp( and ensure we can round trip a func ref with our parent. ''' - task: trio.Task = trio.lowlevel.current_task() - task_ctx: Context = task.context - assert _ctxvar_MsgCodec not in task_ctx + # debug mode sanity check + assert expect_debug == _state.debug_mode() - nsp_codec: MsgCodec = mk_custom_codec() + # task: trio.Task = trio.lowlevel.current_task() + + # TreeVar + # curr_codec = _ctxvar_MsgCodec.get_in(task) + + # ContextVar + # task_ctx: Context = task.context + # assert _ctxvar_MsgCodec not in task_ctx + + curr_codec = _ctxvar_MsgCodec.get() + assert curr_codec is _codec._def_tractor_codec + + if use_any_spec: + pld_spec = Any + else: + # NOTE: don't need the |None here since + # the parent side will never send `None` like + # we do here in the implicit return at the end of this + # `@context` body. + pld_spec = NamespacePath # |None + + nsp_codec: MsgCodec = mk_custom_codec( + pld_spec=pld_spec, + ) with apply_codec(nsp_codec) as codec: chk_codec_applied( custom_codec=nsp_codec, enter_value=codec, ) + # ensure roundtripping works locally nsp = NamespacePath.from_ref(ex_func) - await ctx.started(nsp) + wire_bytes: bytes = nsp_codec.encode( + Started( + cid=ctx.cid, + pld=nsp + ) + ) + msg: Started = nsp_codec.decode(wire_bytes) + pld = msg.pld + assert pld == nsp + await ctx.started(nsp) async with ctx.open_stream() as ipc: async for msg in ipc: - assert msg == f'{__name__}:ex_func' + if use_any_spec: + assert msg == f'{__name__}:ex_func' - # TODO: as per below - # assert isinstance(msg, NamespacePath) - assert isinstance(msg, str) + # TODO: as per below + # assert isinstance(msg, NamespacePath) + assert isinstance(msg, str) + else: + assert isinstance(msg, NamespacePath) + + await ipc.send(msg) def chk_codec_applied( @@ -146,11 +239,20 @@ def chk_codec_applied( enter_value: MsgCodec, ) -> MsgCodec: - task: trio.Task = trio.lowlevel.current_task() - task_ctx: Context = task.context + # task: trio.Task = trio.lowlevel.current_task() - assert _ctxvar_MsgCodec in task_ctx - curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + # TreeVar + # curr_codec = _ctxvar_MsgCodec.get_in(task) + + # ContextVar + # task_ctx: Context = task.context + # assert _ctxvar_MsgCodec in task_ctx + # curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + + # RunVar + curr_codec: MsgCodec = _ctxvar_MsgCodec.get() + last_read_codec = _ctxvar_MsgCodec.get() + assert curr_codec is last_read_codec assert ( # returned from `mk_codec()` @@ -163,14 +265,31 @@ def chk_codec_applied( curr_codec is # public API for all of the above - current_msgspec_codec() + current_codec() # the default `msgspec` settings - is not _def_msgspec_codec + is not _codec._def_msgspec_codec + is not _codec._def_tractor_codec ) -def test_codec_hooks_mod(): +@pytest.mark.parametrize( + 'ipc_pld_spec', + [ + # _codec._def_msgspec_codec, + Any, + # _codec._def_tractor_codec, + NamespacePath|None, + ], + ids=[ + 'any_type', + 'nsp_type', + ] +) +def test_codec_hooks_mod( + debug_mode: bool, + ipc_pld_spec: Union[Type]|Any, +): ''' Audit the `.msg.MsgCodec` override apis details given our impl uses `contextvars` to accomplish per `trio` task codec @@ -178,11 +297,21 @@ def test_codec_hooks_mod(): ''' async def main(): - task: trio.Task = trio.lowlevel.current_task() - task_ctx: Context = task.context - assert _ctxvar_MsgCodec not in task_ctx - async with tractor.open_nursery() as an: + # task: trio.Task = trio.lowlevel.current_task() + + # ContextVar + # task_ctx: Context = task.context + # assert _ctxvar_MsgCodec not in task_ctx + + # TreeVar + # def_codec: MsgCodec = _ctxvar_MsgCodec.get_in(task) + def_codec = _ctxvar_MsgCodec.get() + assert def_codec is _codec._def_tractor_codec + + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: p: tractor.Portal = await an.start_actor( 'sub', enable_modules=[__name__], @@ -192,7 +321,9 @@ def test_codec_hooks_mod(): # - codec not modified -> decode nsp as `str` # - codec modified with hooks -> decode nsp as # `NamespacePath` - nsp_codec: MsgCodec = mk_custom_codec() + nsp_codec: MsgCodec = mk_custom_codec( + pld_spec=ipc_pld_spec, + ) with apply_codec(nsp_codec) as codec: chk_codec_applied( custom_codec=nsp_codec, @@ -202,9 +333,22 @@ def test_codec_hooks_mod(): async with ( p.open_context( send_back_nsp, + # TODO: send the original nsp here and + # test with `limit_msg_spec()` above? + expect_debug=debug_mode, + use_any_spec=(ipc_pld_spec==Any), + ) as (ctx, first), ctx.open_stream() as ipc, ): + if ipc_pld_spec is NamespacePath: + assert isinstance(first, NamespacePath) + + print( + 'root: ENTERING CONTEXT BLOCK\n' + f'type(first): {type(first)}\n' + f'first: {first}\n' + ) # ensure codec is still applied across # `tractor.Context` + its embedded nursery. chk_codec_applied( @@ -212,23 +356,46 @@ def test_codec_hooks_mod(): enter_value=codec, ) - assert first == f'{__name__}:ex_func' + first_nsp = NamespacePath(first) + + # ensure roundtripping works + wire_bytes: bytes = nsp_codec.encode( + Started( + cid=ctx.cid, + pld=first_nsp + ) + ) + msg: Started = nsp_codec.decode(wire_bytes) + pld = msg.pld + assert pld == first_nsp + + # try a manual decode of the started msg+pld + # TODO: actually get the decoder loading # to native once we spec our SCIPP msgspec # (structurred-conc-inter-proc-protocol) # implemented as per, # https://github.com/goodboy/tractor/issues/36 # - # assert isinstance(first, NamespacePath) - assert isinstance(first, str) + if ipc_pld_spec is NamespacePath: + assert isinstance(first, NamespacePath) + + # `Any`-payload-spec case + else: + assert isinstance(first, str) + assert first == f'{__name__}:ex_func' + await ipc.send(first) - with trio.move_on_after(1): + with trio.move_on_after(.6): async for msg in ipc: + print(msg) # TODO: as per above # assert isinstance(msg, NamespacePath) assert isinstance(msg, str) + await ipc.send(msg) + await trio.sleep(0.1) await p.cancel_actor() -- 2.34.1 From afabef166e968fa2e188a949d22ed4159ae89cf7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 13:33:06 -0400 Subject: [PATCH 015/305] Add timeouts around some context test bodies Since with my in-index runtime-port to our native msg-spec it seems these ones are hanging B( - `test_one_end_stream_not_opened()` - `test_maybe_allow_overruns_stream()` Tossing in some `trio.fail_after()`s seems to at least gnab them as failures B) --- tests/test_context_stream_semantics.py | 134 ++++++++++++++----------- 1 file changed, 74 insertions(+), 60 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 121abaa8..36a5fd9f 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -6,6 +6,7 @@ sync-opening a ``tractor.Context`` beforehand. ''' from itertools import count +import math import platform from pprint import pformat from typing import ( @@ -845,7 +846,10 @@ async def keep_sending_from_callee( ('caller', 1, never_open_stream), ('callee', 0, keep_sending_from_callee), ], - ids='overrun_condition={}'.format, + ids=[ + ('caller_1buf_never_open_stream'), + ('callee_0buf_keep_sending_from_callee'), + ] ) def test_one_end_stream_not_opened( overrun_by: tuple[str, int, Callable], @@ -869,29 +873,30 @@ def test_one_end_stream_not_opened( enable_modules=[__name__], ) - async with portal.open_context( - entrypoint, - ) as (ctx, sent): - assert sent is None + with trio.fail_after(0.8): + async with portal.open_context( + entrypoint, + ) as (ctx, sent): + assert sent is None - if 'caller' in overrunner: + if 'caller' in overrunner: - async with ctx.open_stream() as stream: + async with ctx.open_stream() as stream: - # itersend +1 msg more then the buffer size - # to cause the most basic overrun. - for i in range(buf_size): - print(f'sending {i}') - await stream.send(i) + # itersend +1 msg more then the buffer size + # to cause the most basic overrun. + for i in range(buf_size): + print(f'sending {i}') + await stream.send(i) - else: - # expect overrun error to be relayed back - # and this sleep interrupted - await trio.sleep_forever() + else: + # expect overrun error to be relayed back + # and this sleep interrupted + await trio.sleep_forever() - else: - # callee overruns caller case so we do nothing here - await trio.sleep_forever() + else: + # callee overruns caller case so we do nothing here + await trio.sleep_forever() await portal.cancel_actor() @@ -1055,54 +1060,63 @@ def test_maybe_allow_overruns_stream( loglevel=loglevel, debug_mode=debug_mode, ) - seq = list(range(10)) - async with portal.open_context( - echo_back_sequence, - seq=seq, - wait_for_cancel=cancel_ctx, - be_slow=(slow_side == 'child'), - allow_overruns_side=allow_overruns_side, - ) as (ctx, sent): - assert sent is None + # stream-sequence batch info with send delay to determine + # approx timeout determining whether test has hung. + total_batches: int = 2 + num_items: int = 10 + seq = list(range(num_items)) + parent_send_delay: float = 0.16 + timeout: float = math.ceil( + total_batches * num_items * parent_send_delay + ) + with trio.fail_after(timeout): + async with portal.open_context( + echo_back_sequence, + seq=seq, + wait_for_cancel=cancel_ctx, + be_slow=(slow_side == 'child'), + allow_overruns_side=allow_overruns_side, - async with ctx.open_stream( - msg_buffer_size=1 if slow_side == 'parent' else None, - allow_overruns=(allow_overruns_side in {'parent', 'both'}), - ) as stream: + ) as (ctx, sent): + assert sent is None - total_batches: int = 2 - for _ in range(total_batches): - for msg in seq: - # print(f'root tx {msg}') - await stream.send(msg) - if slow_side == 'parent': - # NOTE: we make the parent slightly - # slower, when it is slow, to make sure - # that in the overruns everywhere case - await trio.sleep(0.16) + async with ctx.open_stream( + msg_buffer_size=1 if slow_side == 'parent' else None, + allow_overruns=(allow_overruns_side in {'parent', 'both'}), + ) as stream: - batch = [] - async for msg in stream: - print(f'root rx {msg}') - batch.append(msg) - if batch == seq: - break + for _ in range(total_batches): + for msg in seq: + # print(f'root tx {msg}') + await stream.send(msg) + if slow_side == 'parent': + # NOTE: we make the parent slightly + # slower, when it is slow, to make sure + # that in the overruns everywhere case + await trio.sleep(parent_send_delay) + + batch = [] + async for msg in stream: + print(f'root rx {msg}') + batch.append(msg) + if batch == seq: + break + + if cancel_ctx: + # cancel the remote task + print('Requesting `ctx.cancel()` in parent!') + await ctx.cancel() + + res: str|ContextCancelled = await ctx.result() if cancel_ctx: - # cancel the remote task - print('Requesting `ctx.cancel()` in parent!') - await ctx.cancel() + assert isinstance(res, ContextCancelled) + assert tuple(res.canceller) == current_actor().uid - res: str|ContextCancelled = await ctx.result() - - if cancel_ctx: - assert isinstance(res, ContextCancelled) - assert tuple(res.canceller) == current_actor().uid - - else: - print(f'RX ROOT SIDE RESULT {res}') - assert res == 'yo' + else: + print(f'RX ROOT SIDE RESULT {res}') + assert res == 'yo' # cancel the daemon await portal.cancel_actor() -- 2.34.1 From 8d8a47ef7b795d7a20df067ce2b70946320564c7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 13:41:52 -0400 Subject: [PATCH 016/305] WIP porting runtime to use `Msg`-spec --- tractor/_context.py | 245 +++++++++------- tractor/_entry.py | 1 + tractor/_exceptions.py | 94 ++++-- tractor/_ipc.py | 99 ++++++- tractor/_portal.py | 19 +- tractor/_rpc.py | 644 ++++++++++++++++++++++++----------------- tractor/_runtime.py | 174 +++++++---- tractor/_spawn.py | 30 +- tractor/_streaming.py | 46 ++- tractor/devx/_debug.py | 3 + 10 files changed, 878 insertions(+), 477 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 51b23302..02dcac39 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -53,7 +53,14 @@ from ._exceptions import ( _raise_from_no_key_in_msg, ) from .log import get_logger -from .msg import NamespacePath +from .msg import ( + NamespacePath, + Msg, + Return, + Started, + Stop, + Yield, +) from ._ipc import Channel from ._streaming import MsgStream from ._state import ( @@ -96,7 +103,8 @@ async def _drain_to_final_msg( # wait for a final context result by collecting (but # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. - pre_result_drained: list[dict] = [] + # pre_result_drained: list[dict] = [] + pre_result_drained: list[Msg] = [] while not ( ctx.maybe_error and not ctx._final_result_is_set() @@ -155,7 +163,10 @@ async def _drain_to_final_msg( # await pause() # pray to the `trio` gawds that we're corrent with this - msg: dict = await ctx._recv_chan.receive() + # msg: dict = await ctx._recv_chan.receive() + msg: Msg = await ctx._recv_chan.receive() + # always capture unexpected/non-result msgs + pre_result_drained.append(msg) # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -175,24 +186,31 @@ async def _drain_to_final_msg( # continue to bubble up as normal. raise - try: - ctx._result: Any = msg['return'] - log.runtime( - 'Context delivered final draining msg:\n' - f'{pformat(msg)}' - ) - # XXX: only close the rx mem chan AFTER - # a final result is retreived. - # if ctx._recv_chan: - # await ctx._recv_chan.aclose() - # TODO: ^ we don't need it right? - break + match msg: + case Return( + cid=cid, + pld=res, + ): + # try: + # ctx._result: Any = msg['return'] + # ctx._result: Any = msg.pld + ctx._result: Any = res + log.runtime( + 'Context delivered final draining msg:\n' + f'{pformat(msg)}' + ) + # XXX: only close the rx mem chan AFTER + # a final result is retreived. + # if ctx._recv_chan: + # await ctx._recv_chan.aclose() + # TODO: ^ we don't need it right? + break - except KeyError: - # always capture unexpected/non-result msgs - pre_result_drained.append(msg) + # except KeyError: + # except AttributeError: + case Yield(): + # if 'yield' in msg: - if 'yield' in msg: # far end task is still streaming to us so discard # and report per local context state. if ( @@ -238,9 +256,10 @@ async def _drain_to_final_msg( # TODO: work out edge cases here where # a stream is open but the task also calls # this? - # -[ ] should be a runtime error if a stream is open - # right? - elif 'stop' in msg: + # -[ ] should be a runtime error if a stream is open right? + # Stop() + case Stop(): + # elif 'stop' in msg: log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' f'{pformat(msg)}\n' @@ -249,78 +268,80 @@ async def _drain_to_final_msg( # It's an internal error if any other msg type without # a`'cid'` field arrives here! - if not msg.get('cid'): - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' - ) + case _: + # if not msg.get('cid'): + if not msg.cid: + raise InternalError( + 'Unexpected cid-missing msg?\n\n' + f'{msg}\n' + ) - # XXX fallthrough to handle expected error XXX - # TODO: replace this with `ctx.maybe_raise()` - # - # TODO: would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') + # XXX fallthrough to handle expected error XXX + # TODO: replace this with `ctx.maybe_raise()` + # + # TODO: would this be handier for this case maybe? + # async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') - re: Exception|None = ctx._remote_error - if re: - log.critical( - 'Remote ctx terminated due to "error" msg:\n' - f'{re}' - ) - assert msg is ctx._cancel_msg - # NOTE: this solved a super dupe edge case XD - # this was THE super duper edge case of: - # - local task opens a remote task, - # - requests remote cancellation of far end - # ctx/tasks, - # - needs to wait for the cancel ack msg - # (ctxc) or some result in the race case - # where the other side's task returns - # before the cancel request msg is ever - # rxed and processed, - # - here this surrounding drain loop (which - # iterates all ipc msgs until the ack or - # an early result arrives) was NOT exiting - # since we are the edge case: local task - # does not re-raise any ctxc it receives - # IFF **it** was the cancellation - # requester.. - # will raise if necessary, ow break from - # loop presuming any error terminates the - # context! - ctx._maybe_raise_remote_err( - re, - # NOTE: obvi we don't care if we - # overran the far end if we're already - # waiting on a final result (msg). - # raise_overrun_from_self=False, - raise_overrun_from_self=raise_overrun, - ) + re: Exception|None = ctx._remote_error + if re: + log.critical( + 'Remote ctx terminated due to "error" msg:\n' + f'{re}' + ) + assert msg is ctx._cancel_msg + # NOTE: this solved a super dupe edge case XD + # this was THE super duper edge case of: + # - local task opens a remote task, + # - requests remote cancellation of far end + # ctx/tasks, + # - needs to wait for the cancel ack msg + # (ctxc) or some result in the race case + # where the other side's task returns + # before the cancel request msg is ever + # rxed and processed, + # - here this surrounding drain loop (which + # iterates all ipc msgs until the ack or + # an early result arrives) was NOT exiting + # since we are the edge case: local task + # does not re-raise any ctxc it receives + # IFF **it** was the cancellation + # requester.. + # will raise if necessary, ow break from + # loop presuming any error terminates the + # context! + ctx._maybe_raise_remote_err( + re, + # NOTE: obvi we don't care if we + # overran the far end if we're already + # waiting on a final result (msg). + # raise_overrun_from_self=False, + raise_overrun_from_self=raise_overrun, + ) - break # OOOOOF, yeah obvi we need this.. + break # OOOOOF, yeah obvi we need this.. - # XXX we should never really get here - # right! since `._deliver_msg()` should - # always have detected an {'error': ..} - # msg and already called this right!?! - elif error := unpack_error( - msg=msg, - chan=ctx._portal.channel, - hide_tb=False, - ): - log.critical('SHOULD NEVER GET HERE!?') - assert msg is ctx._cancel_msg - assert error.msgdata == ctx._remote_error.msgdata - from .devx._debug import pause - await pause() - ctx._maybe_cancel_and_set_remote_error(error) - ctx._maybe_raise_remote_err(error) + # XXX we should never really get here + # right! since `._deliver_msg()` should + # always have detected an {'error': ..} + # msg and already called this right!?! + elif error := unpack_error( + msg=msg, + chan=ctx._portal.channel, + hide_tb=False, + ): + log.critical('SHOULD NEVER GET HERE!?') + assert msg is ctx._cancel_msg + assert error.msgdata == ctx._remote_error.msgdata + from .devx._debug import pause + await pause() + ctx._maybe_cancel_and_set_remote_error(error) + ctx._maybe_raise_remote_err(error) - else: - # bubble the original src key error - raise + else: + # bubble the original src key error + raise else: log.cancel( 'Skipping `MsgStream` drain since final outcome is set\n\n' @@ -710,10 +731,14 @@ class Context: async def send_stop(self) -> None: # await pause() - await self.chan.send({ - 'stop': True, - 'cid': self.cid - }) + # await self.chan.send({ + # # Stop( + # 'stop': True, + # 'cid': self.cid + # }) + await self.chan.send( + Stop(cid=self.cid) + ) def _maybe_cancel_and_set_remote_error( self, @@ -1398,17 +1423,19 @@ class Context: for msg in drained_msgs: # TODO: mask this by default.. - if 'return' in msg: + # if 'return' in msg: + if isinstance(msg, Return): # from .devx import pause # await pause() - raise InternalError( + # raise InternalError( + log.warning( 'Final `return` msg should never be drained !?!?\n\n' f'{msg}\n' ) log.cancel( 'Ctx drained pre-result msgs:\n' - f'{drained_msgs}' + f'{pformat(drained_msgs)}' ) self.maybe_raise( @@ -1616,7 +1643,18 @@ class Context: f'called `.started()` twice on context with {self.chan.uid}' ) - await self.chan.send({'started': value, 'cid': self.cid}) + # await self.chan.send( + # { + # 'started': value, + # 'cid': self.cid, + # } + # ) + await self.chan.send( + Started( + cid=self.cid, + pld=value, + ) + ) self._started_called = True async def _drain_overflows( @@ -1671,7 +1709,8 @@ class Context: async def _deliver_msg( self, - msg: dict, + # msg: dict, + msg: Msg, ) -> bool: ''' @@ -1855,7 +1894,7 @@ class Context: # anything different. return False else: - txt += f'\n{msg}\n' + # txt += f'\n{msg}\n' # raise local overrun and immediately pack as IPC # msg for far end. try: @@ -1986,15 +2025,17 @@ async def open_context_from_portal( ) assert ctx._remote_func_type == 'context' - msg: dict = await ctx._recv_chan.receive() + msg: Started = await ctx._recv_chan.receive() try: # the "first" value here is delivered by the callee's # ``Context.started()`` call. - first: Any = msg['started'] + # first: Any = msg['started'] + first: Any = msg.pld ctx._started_called: bool = True - except KeyError as src_error: + # except KeyError as src_error: + except AttributeError as src_error: _raise_from_no_key_in_msg( ctx=ctx, msg=msg, diff --git a/tractor/_entry.py b/tractor/_entry.py index 21c9ae48..bf719abb 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -136,6 +136,7 @@ def _trio_main( run_as_asyncio_guest(trio_main) else: trio.run(trio_main) + except KeyboardInterrupt: log.cancel( 'Actor received KBI\n' diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index b1a8ee63..7deda9d2 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -31,9 +31,16 @@ import textwrap import traceback import trio +from msgspec import structs from tractor._state import current_actor from tractor.log import get_logger +from tractor.msg import ( + Error, + Msg, + Stop, + Yield, +) if TYPE_CHECKING: from ._context import Context @@ -135,6 +142,8 @@ class RemoteActorError(Exception): # and instead render if from `.boxed_type_str`? self._boxed_type: BaseException = boxed_type self._src_type: BaseException|None = None + + # TODO: make this a `.errmsg: Error` throughout? self.msgdata: dict[str, Any] = msgdata # TODO: mask out eventually or place in `pack_error()` @@ -464,7 +473,23 @@ class AsyncioCancelled(Exception): ''' class MessagingError(Exception): - 'Some kind of unexpected SC messaging dialog issue' + ''' + IPC related msg (typing), transaction (ordering) or dialog + handling error. + + ''' + + +class MsgTypeError(MessagingError): + ''' + Equivalent of a `TypeError` for an IPC wire-message + due to an invalid field value (type). + + Normally this is re-raised from some `.msg._codec` + decode error raised by a backend interchange lib + like `msgspec` or `pycapnproto`. + + ''' def pack_error( @@ -473,7 +498,7 @@ def pack_error( tb: str|None = None, cid: str|None = None, -) -> dict[str, dict]: +) -> Error|dict[str, dict]: ''' Create an "error message" which boxes a locally caught exception's meta-data and encodes it for wire transport via an @@ -536,17 +561,23 @@ def pack_error( # content's `.msgdata`). error_msg['tb_str'] = tb_str - pkt: dict = { - 'error': error_msg, - } - if cid: - pkt['cid'] = cid + # Error() + # pkt: dict = { + # 'error': error_msg, + # } + pkt: Error = Error( + cid=cid, + **error_msg, + # TODO: just get rid of `.pld` on this msg? + ) + # if cid: + # pkt['cid'] = cid return pkt def unpack_error( - msg: dict[str, Any], + msg: dict[str, Any]|Error, chan: Channel|None = None, box_type: RemoteActorError = RemoteActorError, @@ -564,15 +595,17 @@ def unpack_error( ''' __tracebackhide__: bool = hide_tb - error_dict: dict[str, dict] | None - if ( - error_dict := msg.get('error') - ) is None: + error_dict: dict[str, dict]|None + if not isinstance(msg, Error): + # if ( + # error_dict := msg.get('error') + # ) is None: # no error field, nothing to unpack. return None # retrieve the remote error's msg encoded details - tb_str: str = error_dict.get('tb_str', '') + # tb_str: str = error_dict.get('tb_str', '') + tb_str: str = msg.tb_str message: str = ( f'{chan.uid}\n' + @@ -581,7 +614,8 @@ def unpack_error( # try to lookup a suitable error type from the local runtime # env then use it to construct a local instance. - boxed_type_str: str = error_dict['boxed_type_str'] + # boxed_type_str: str = error_dict['boxed_type_str'] + boxed_type_str: str = msg.boxed_type_str boxed_type: Type[BaseException] = get_err_type(boxed_type_str) if boxed_type_str == 'ContextCancelled': @@ -595,7 +629,11 @@ def unpack_error( # original source error. elif boxed_type_str == 'RemoteActorError': assert boxed_type is RemoteActorError - assert len(error_dict['relay_path']) >= 1 + # assert len(error_dict['relay_path']) >= 1 + assert len(msg.relay_path) >= 1 + + # TODO: mk RAE just take the `Error` instance directly? + error_dict: dict = structs.asdict(msg) exc = box_type( message, @@ -623,11 +661,12 @@ def is_multi_cancelled(exc: BaseException) -> bool: def _raise_from_no_key_in_msg( ctx: Context, - msg: dict, + msg: Msg, src_err: KeyError, log: StackLevelAdapter, # caller specific `log` obj expect_key: str = 'yield', + expect_msg: str = Yield, stream: MsgStream | None = None, # allow "deeper" tbs when debugging B^o @@ -660,8 +699,10 @@ def _raise_from_no_key_in_msg( # an internal error should never get here try: - cid: str = msg['cid'] - except KeyError as src_err: + cid: str = msg.cid + # cid: str = msg['cid'] + # except KeyError as src_err: + except AttributeError as src_err: raise MessagingError( f'IPC `Context` rx-ed msg without a ctx-id (cid)!?\n' f'cid: {cid}\n\n' @@ -672,7 +713,10 @@ def _raise_from_no_key_in_msg( # TODO: test that shows stream raising an expected error!!! # raise the error message in a boxed exception type! - if msg.get('error'): + # if msg.get('error'): + if isinstance(msg, Error): + # match msg: + # case Error(): raise unpack_error( msg, ctx.chan, @@ -683,8 +727,10 @@ def _raise_from_no_key_in_msg( # `MsgStream` termination msg. # TODO: does it make more sense to pack # the stream._eoc outside this in the calleer always? + # case Stop(): elif ( - msg.get('stop') + # msg.get('stop') + isinstance(msg, Stop) or ( stream and stream._eoc @@ -725,14 +771,16 @@ def _raise_from_no_key_in_msg( stream and stream._closed ): - raise trio.ClosedResourceError('This stream was closed') - + # TODO: our own error subtype? + raise trio.ClosedResourceError( + 'This stream was closed' + ) # always re-raise the source error if no translation error case # is activated above. _type: str = 'Stream' if stream else 'Context' raise MessagingError( - f"{_type} was expecting a '{expect_key}' message" + f"{_type} was expecting a '{expect_key.upper()}' message" " BUT received a non-error msg:\n" f'{pformat(msg)}' ) from src_err diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 5f71c38c..6168c77c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -38,17 +38,23 @@ from typing import ( Protocol, Type, TypeVar, + Union, ) +import msgspec from tricycle import BufferedReceiveStream import trio from tractor.log import get_logger -from tractor._exceptions import TransportClosed +from tractor._exceptions import ( + TransportClosed, + MsgTypeError, +) from tractor.msg import ( _ctxvar_MsgCodec, + _codec, MsgCodec, - mk_codec, + types, ) log = get_logger(__name__) @@ -163,7 +169,16 @@ class MsgpackTCPStream(MsgTransport): # allow for custom IPC msg interchange format # dynamic override Bo - self.codec: MsgCodec = codec or mk_codec() + self._task = trio.lowlevel.current_task() + self._codec: MsgCodec = ( + codec + or + _codec._ctxvar_MsgCodec.get() + ) + log.critical( + '!?!: USING STD `tractor` CODEC !?!?\n' + f'{self._codec}\n' + ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: ''' @@ -171,7 +186,6 @@ class MsgpackTCPStream(MsgTransport): stream using the current task's `MsgCodec`. ''' - import msgspec # noqa decodes_failed: int = 0 while True: @@ -206,7 +220,19 @@ class MsgpackTCPStream(MsgTransport): try: # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. - yield _ctxvar_MsgCodec.get().decode(msg_bytes) + codec: MsgCodec = _ctxvar_MsgCodec.get() + if self._codec.pld_spec != codec.pld_spec: + # assert ( + # task := trio.lowlevel.current_task() + # ) is not self._task + # self._task = task + self._codec = codec + log.critical( + '.recv() USING NEW CODEC !?!?\n' + f'{self._codec}\n\n' + f'msg_bytes -> {msg_bytes}\n' + ) + yield codec.decode(msg_bytes) # TODO: remove, was only for orig draft impl # testing. @@ -221,6 +247,41 @@ class MsgpackTCPStream(MsgTransport): # # yield obj + # XXX NOTE: since the below error derives from + # `DecodeError` we need to catch is specially + # and always raise such that spec violations + # are never allowed to be caught silently! + except msgspec.ValidationError as verr: + + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgspec.msgpack.decode(msg_bytes) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(types, msg_type_name) + errmsg: str = ( + f'Received invalid IPC `{msg_type_name}` msg\n\n' + ) + + # XXX see if we can determine the exact invalid field + # such that we can comprehensively report the + # specific field's type problem + msgspec_msg: str = verr.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + if field_val := msg_dict.get(maybe_field): + field_type: Union[Type] = msg_type.__signature__.parameters[ + maybe_field + ].annotation + errmsg += ( + f'{msg.rstrip("`")}\n\n' + f'{msg_type}\n' + f' |_.{maybe_field}: {field_type} = {field_val}\n' + ) + + raise MsgTypeError(errmsg) from verr + except ( msgspec.DecodeError, UnicodeDecodeError, @@ -230,14 +291,15 @@ class MsgpackTCPStream(MsgTransport): # do with a channel drop - hope that receiving from the # channel will raise an expected error and bubble up. try: - msg_str: str | bytes = msg_bytes.decode() + msg_str: str|bytes = msg_bytes.decode() except UnicodeDecodeError: msg_str = msg_bytes - log.error( - '`msgspec` failed to decode!?\n' - 'dumping bytes:\n' - f'{msg_str!r}' + log.exception( + 'Failed to decode msg?\n' + f'{codec}\n\n' + 'Rxed bytes from wire:\n\n' + f'{msg_str!r}\n' ) decodes_failed += 1 else: @@ -258,8 +320,21 @@ class MsgpackTCPStream(MsgTransport): # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. - bytes_data: bytes = _ctxvar_MsgCodec.get().encode(msg) - # bytes_data: bytes = self.codec.encode(msg) + codec: MsgCodec = _ctxvar_MsgCodec.get() + # if self._codec != codec: + if self._codec.pld_spec != codec.pld_spec: + self._codec = codec + log.critical( + '.send() using NEW CODEC !?!?\n' + f'{self._codec}\n\n' + f'OBJ -> {msg}\n' + ) + if type(msg) not in types.__spec__: + log.warning( + 'Sending non-`Msg`-spec msg?\n\n' + f'{msg}\n' + ) + bytes_data: bytes = codec.encode(msg) # supposedly the fastest says, # https://stackoverflow.com/a/54027962 diff --git a/tractor/_portal.py b/tractor/_portal.py index ac602dd5..cc9052ba 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -45,7 +45,10 @@ from ._state import ( ) from ._ipc import Channel from .log import get_logger -from .msg import NamespacePath +from .msg import ( + NamespacePath, + Return, +) from ._exceptions import ( unpack_error, NoResult, @@ -66,7 +69,8 @@ log = get_logger(__name__) # `._raise_from_no_key_in_msg()` (after tweak to # accept a `chan: Channel` arg) in key block! def _unwrap_msg( - msg: dict[str, Any], + # msg: dict[str, Any], + msg: Return, channel: Channel, hide_tb: bool = True, @@ -79,18 +83,21 @@ def _unwrap_msg( __tracebackhide__: bool = hide_tb try: - return msg['return'] - except KeyError as ke: + return msg.pld + # return msg['return'] + # except KeyError as ke: + except AttributeError as err: # internal error should never get here - assert msg.get('cid'), ( + # assert msg.get('cid'), ( + assert msg.cid, ( "Received internal error at portal?" ) raise unpack_error( msg, channel - ) from ke + ) from err class Portal: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index ef6cbe00..9b179524 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -57,6 +57,15 @@ from ._exceptions import ( from .devx import _debug from . import _state from .log import get_logger +from tractor.msg.types import ( + Start, + StartAck, + Started, + Stop, + Yield, + Return, + Error, +) if TYPE_CHECKING: from ._runtime import Actor @@ -84,10 +93,13 @@ async def _invoke_non_context( # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): - await chan.send({ - 'cid': cid, - 'functype': 'asyncgen', - }) + # await chan.send({ + await chan.send( + StartAck( + cid=cid, + functype='asyncgen', + ) + ) # XXX: massive gotcha! If the containing scope # is cancelled and we execute the below line, # any ``ActorNursery.__aexit__()`` WON'T be @@ -107,27 +119,45 @@ async def _invoke_non_context( # to_send = await chan.recv_nowait() # if to_send is not None: # to_yield = await coro.asend(to_send) - await chan.send({ - 'yield': item, - 'cid': cid, - }) + # await chan.send({ + # # Yield() + # 'cid': cid, + # 'yield': item, + # }) + await chan.send( + Yield( + cid=cid, + pld=item, + ) + ) log.runtime(f"Finished iterating {coro}") # TODO: we should really support a proper # `StopAsyncIteration` system here for returning a final # value if desired - await chan.send({ - 'stop': True, - 'cid': cid, - }) + await chan.send( + Stop(cid=cid) + ) + # await chan.send({ + # # Stop( + # 'cid': cid, + # 'stop': True, + # }) # one way @stream func that gets treated like an async gen # TODO: can we unify this with the `context=True` impl below? elif treat_as_gen: - await chan.send({ - 'cid': cid, - 'functype': 'asyncgen', - }) + await chan.send( + StartAck( + cid=cid, + functype='asyncgen', + ) + ) + # await chan.send({ + # # StartAck() + # 'cid': cid, + # 'functype': 'asyncgen', + # }) # XXX: the async-func may spawn further tasks which push # back values like an async-generator would but must # manualy construct the response dict-packet-responses as @@ -140,10 +170,14 @@ async def _invoke_non_context( if not cs.cancelled_caught: # task was not cancelled so we can instruct the # far end async gen to tear down - await chan.send({ - 'stop': True, - 'cid': cid - }) + await chan.send( + Stop(cid=cid) + ) + # await chan.send({ + # # Stop( + # 'cid': cid, + # 'stop': True, + # }) else: # regular async function/method # XXX: possibly just a scheduled `Actor._cancel_task()` @@ -155,10 +189,17 @@ async def _invoke_non_context( # way: using the linked IPC context machinery. failed_resp: bool = False try: - await chan.send({ - 'functype': 'asyncfunc', - 'cid': cid - }) + await chan.send( + StartAck( + cid=cid, + functype='asyncfunc', + ) + ) + # await chan.send({ + # # StartAck() + # 'cid': cid, + # 'functype': 'asyncfunc', + # }) except ( trio.ClosedResourceError, trio.BrokenResourceError, @@ -192,10 +233,17 @@ async def _invoke_non_context( and chan.connected() ): try: - await chan.send({ - 'return': result, - 'cid': cid, - }) + # await chan.send({ + # # Return() + # 'cid': cid, + # 'return': result, + # }) + await chan.send( + Return( + cid=cid, + pld=result, + ) + ) except ( BrokenPipeError, trio.BrokenResourceError, @@ -376,6 +424,8 @@ async def _invoke( # XXX for .pause_from_sync()` usage we need to make sure # `greenback` is boostrapped in the subactor! await _debug.maybe_init_greenback() + # else: + # await pause() # TODO: possibly a specially formatted traceback # (not sure what typing is for this..)? @@ -488,10 +538,18 @@ async def _invoke( # a "context" endpoint type is the most general and # "least sugary" type of RPC ep with support for # bi-dir streaming B) - await chan.send({ - 'cid': cid, - 'functype': 'context', - }) + # StartAck + await chan.send( + StartAck( + cid=cid, + functype='context', + ) + ) + # await chan.send({ + # # StartAck() + # 'cid': cid, + # 'functype': 'context', + # }) # TODO: should we also use an `.open_context()` equiv # for this callee side by factoring the impl from @@ -515,10 +573,17 @@ async def _invoke( ctx._result = res # deliver final result to caller side. - await chan.send({ - 'return': res, - 'cid': cid - }) + await chan.send( + Return( + cid=cid, + pld=res, + ) + ) + # await chan.send({ + # # Return() + # 'cid': cid, + # 'return': res, + # }) # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, @@ -691,7 +756,8 @@ async def try_ship_error_to_remote( try: # NOTE: normally only used for internal runtime errors # so ship to peer actor without a cid. - msg: dict = pack_error( + # msg: dict = pack_error( + msg: Error = pack_error( err, cid=cid, @@ -707,12 +773,13 @@ async def try_ship_error_to_remote( trio.BrokenResourceError, BrokenPipeError, ): - err_msg: dict = msg['error']['tb_str'] + # err_msg: dict = msg['error']['tb_str'] log.critical( 'IPC transport failure -> ' f'failed to ship error to {remote_descr}!\n\n' f'X=> {channel.uid}\n\n' - f'{err_msg}\n' + # f'{err_msg}\n' + f'{msg}\n' ) @@ -772,31 +839,6 @@ async def process_messages( with CancelScope(shield=shield) as loop_cs: task_status.started(loop_cs) async for msg in chan: - - # dedicated loop terminate sentinel - if msg is None: - - tasks: dict[ - tuple[Channel, str], - tuple[Context, Callable, trio.Event] - ] = actor._rpc_tasks.copy() - log.cancel( - f'Peer IPC channel terminated via `None` setinel msg?\n' - f'=> Cancelling all {len(tasks)} local RPC tasks..\n' - f'peer: {chan.uid}\n' - f'|_{chan}\n' - ) - for (channel, cid) in tasks: - if channel is chan: - await actor._cancel_task( - cid, - channel, - requesting_uid=channel.uid, - - ipc_msg=msg, - ) - break - log.transport( # type: ignore f'<= IPC msg from peer: {chan.uid}\n\n' @@ -806,216 +848,294 @@ async def process_messages( f'{pformat(msg)}\n' ) - cid = msg.get('cid') - if cid: - # deliver response to local caller/waiter - # via its per-remote-context memory channel. - await actor._push_result( - chan, - cid, - msg, - ) + match msg: - log.runtime( - 'Waiting on next IPC msg from\n' - f'peer: {chan.uid}:\n' - f'|_{chan}\n' + # if msg is None: + # dedicated loop terminate sentinel + case None: - # f'last msg: {msg}\n' - ) - continue - - # process a 'cmd' request-msg upack - # TODO: impl with native `msgspec.Struct` support !! - # -[ ] implement with ``match:`` syntax? - # -[ ] discard un-authed msgs as per, - # - try: - ( - ns, - funcname, - kwargs, - actorid, - cid, - ) = msg['cmd'] - - except KeyError: - # This is the non-rpc error case, that is, an - # error **not** raised inside a call to ``_invoke()`` - # (i.e. no cid was provided in the msg - see above). - # Push this error to all local channel consumers - # (normally portals) by marking the channel as errored - assert chan.uid - exc = unpack_error(msg, chan=chan) - chan._exc = exc - raise exc - - log.runtime( - 'Handling RPC cmd from\n' - f'peer: {actorid}\n' - '\n' - f'=> {ns}.{funcname}({kwargs})\n' - ) - if ns == 'self': - if funcname == 'cancel': - func: Callable = actor.cancel - kwargs |= { - 'req_chan': chan, - } - - # don't start entire actor runtime cancellation - # if this actor is currently in debug mode! - pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete - if pdb_complete: - await pdb_complete.wait() - - # Either of `Actor.cancel()`/`.cancel_soon()` - # was called, so terminate this IPC msg - # loop, exit back out into `async_main()`, - # and immediately start the core runtime - # machinery shutdown! - with CancelScope(shield=True): - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - - log.runtime( - 'Cancelling IPC transport msg-loop with peer:\n' + tasks: dict[ + tuple[Channel, str], + tuple[Context, Callable, trio.Event] + ] = actor._rpc_tasks.copy() + log.cancel( + f'Peer IPC channel terminated via `None` setinel msg?\n' + f'=> Cancelling all {len(tasks)} local RPC tasks..\n' + f'peer: {chan.uid}\n' f'|_{chan}\n' ) - loop_cs.cancel() + for (channel, cid) in tasks: + if channel is chan: + await actor._cancel_task( + cid, + channel, + requesting_uid=channel.uid, + + ipc_msg=msg, + ) break - if funcname == '_cancel_task': - func: Callable = actor._cancel_task - - # we immediately start the runtime machinery - # shutdown - # with CancelScope(shield=True): - target_cid: str = kwargs['cid'] - kwargs |= { - # NOTE: ONLY the rpc-task-owning - # parent IPC channel should be able to - # cancel it! - 'parent_chan': chan, - 'requesting_uid': chan.uid, - 'ipc_msg': msg, - } - # TODO: remove? already have emit in meth. - # log.runtime( - # f'Rx RPC task cancel request\n' - # f'<= canceller: {chan.uid}\n' - # f' |_{chan}\n\n' - # f'=> {actor}\n' - # f' |_cid: {target_cid}\n' - # ) - try: - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - except BaseException: - log.exception( - 'Failed to cancel task?\n' - f'<= canceller: {chan.uid}\n' - f' |_{chan}\n\n' - f'=> {actor}\n' - f' |_cid: {target_cid}\n' - ) - continue - else: - # normally registry methods, eg. - # ``.register_actor()`` etc. - func: Callable = getattr(actor, funcname) - - else: - # complain to client about restricted modules - try: - func = actor._get_rpc_func(ns, funcname) - except ( - ModuleNotExposed, - AttributeError, - ) as err: - err_msg: dict[str, dict] = pack_error( - err, - cid=cid, - ) - await chan.send(err_msg) - continue - - # schedule a task for the requested RPC function - # in the actor's main "service nursery". - # TODO: possibly a service-tn per IPC channel for - # supervision isolation? would avoid having to - # manage RPC tasks individually in `._rpc_tasks` - # table? - log.runtime( - f'Spawning task for RPC request\n' - f'<= caller: {chan.uid}\n' - f' |_{chan}\n\n' - # TODO: maddr style repr? - # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' - # f'cid="{cid[-16:]} .."\n\n' - - f'=> {actor}\n' - f' |_cid: {cid}\n' - f' |>> {func}()\n' - ) - assert actor._service_n # wait why? do it at top? - try: - ctx: Context = await actor._service_n.start( - partial( - _invoke, - actor, - cid, + # cid = msg.get('cid') + # if cid: + case ( + StartAck(cid=cid) + | Started(cid=cid) + | Yield(cid=cid) + | Stop(cid=cid) + | Return(cid=cid) + | Error(cid=cid) + ): + # deliver response to local caller/waiter + # via its per-remote-context memory channel. + await actor._push_result( chan, - func, - kwargs, - ), - name=funcname, - ) + cid, + msg, + ) - except ( - RuntimeError, - BaseExceptionGroup, - ): - # avoid reporting a benign race condition - # during actor runtime teardown. - nursery_cancelled_before_task: bool = True - break + log.runtime( + 'Waiting on next IPC msg from\n' + f'peer: {chan.uid}:\n' + f'|_{chan}\n' - # in the lone case where a ``Context`` is not - # delivered, it's likely going to be a locally - # scoped exception from ``_invoke()`` itself. - if isinstance(err := ctx, Exception): - log.warning( - 'Task for RPC failed?' - f'|_ {func}()\n\n' + # f'last msg: {msg}\n' + ) + continue - f'{err}' - ) - continue + # process a 'cmd' request-msg upack + # TODO: impl with native `msgspec.Struct` support !! + # -[ ] implement with ``match:`` syntax? + # -[ ] discard un-authed msgs as per, + # + case Start( + cid=cid, + ns=ns, + func=funcname, + kwargs=kwargs, + uid=actorid, + ): + # try: + # ( + # ns, + # funcname, + # kwargs, + # actorid, + # cid, + # ) = msg['cmd'] - else: - # mark that we have ongoing rpc tasks - actor._ongoing_rpc_tasks = trio.Event() + # # TODO: put in `case Error():` right? + # except KeyError: + # # This is the non-rpc error case, that is, an + # # error **not** raised inside a call to ``_invoke()`` + # # (i.e. no cid was provided in the msg - see above). + # # Push this error to all local channel consumers + # # (normally portals) by marking the channel as errored + # assert chan.uid + # exc = unpack_error(msg, chan=chan) + # chan._exc = exc + # raise exc - # store cancel scope such that the rpc task can be - # cancelled gracefully if requested - actor._rpc_tasks[(chan, cid)] = ( - ctx, - func, - trio.Event(), - ) + log.runtime( + 'Handling RPC `Start` request from\n' + f'peer: {actorid}\n' + '\n' + f'=> {ns}.{funcname}({kwargs})\n' + ) + # case Start( + # ns='self', + # funcname='cancel', + # ): + if ns == 'self': + if funcname == 'cancel': + func: Callable = actor.cancel + kwargs |= { + 'req_chan': chan, + } + + # don't start entire actor runtime cancellation + # if this actor is currently in debug mode! + pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete + if pdb_complete: + await pdb_complete.wait() + + # Either of `Actor.cancel()`/`.cancel_soon()` + # was called, so terminate this IPC msg + # loop, exit back out into `async_main()`, + # and immediately start the core runtime + # machinery shutdown! + with CancelScope(shield=True): + await _invoke( + actor, + cid, + chan, + func, + kwargs, + is_rpc=False, + ) + + log.runtime( + 'Cancelling IPC transport msg-loop with peer:\n' + f'|_{chan}\n' + ) + loop_cs.cancel() + break + + # case Start( + # ns='self', + # funcname='_cancel_task', + # ): + if funcname == '_cancel_task': + func: Callable = actor._cancel_task + + # we immediately start the runtime machinery + # shutdown + # with CancelScope(shield=True): + target_cid: str = kwargs['cid'] + kwargs |= { + # NOTE: ONLY the rpc-task-owning + # parent IPC channel should be able to + # cancel it! + 'parent_chan': chan, + 'requesting_uid': chan.uid, + 'ipc_msg': msg, + } + # TODO: remove? already have emit in meth. + # log.runtime( + # f'Rx RPC task cancel request\n' + # f'<= canceller: {chan.uid}\n' + # f' |_{chan}\n\n' + # f'=> {actor}\n' + # f' |_cid: {target_cid}\n' + # ) + try: + await _invoke( + actor, + cid, + chan, + func, + kwargs, + is_rpc=False, + ) + except BaseException: + log.exception( + 'Failed to cancel task?\n' + f'<= canceller: {chan.uid}\n' + f' |_{chan}\n\n' + f'=> {actor}\n' + f' |_cid: {target_cid}\n' + ) + continue + + # case Start( + # ns='self', + # funcname='register_actor', + # ): + else: + # normally registry methods, eg. + # ``.register_actor()`` etc. + func: Callable = getattr(actor, funcname) + + # case Start( + # ns=str(), + # funcname=funcname, + # ): + else: + # complain to client about restricted modules + try: + func = actor._get_rpc_func(ns, funcname) + except ( + ModuleNotExposed, + AttributeError, + ) as err: + err_msg: dict[str, dict] = pack_error( + err, + cid=cid, + ) + await chan.send(err_msg) + continue + + # schedule a task for the requested RPC function + # in the actor's main "service nursery". + # TODO: possibly a service-tn per IPC channel for + # supervision isolation? would avoid having to + # manage RPC tasks individually in `._rpc_tasks` + # table? + log.runtime( + f'Spawning task for RPC request\n' + f'<= caller: {chan.uid}\n' + f' |_{chan}\n\n' + # TODO: maddr style repr? + # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' + # f'cid="{cid[-16:]} .."\n\n' + + f'=> {actor}\n' + f' |_cid: {cid}\n' + f' |>> {func}()\n' + ) + assert actor._service_n # wait why? do it at top? + try: + ctx: Context = await actor._service_n.start( + partial( + _invoke, + actor, + cid, + chan, + func, + kwargs, + ), + name=funcname, + ) + + except ( + RuntimeError, + BaseExceptionGroup, + ): + # avoid reporting a benign race condition + # during actor runtime teardown. + nursery_cancelled_before_task: bool = True + break + + # in the lone case where a ``Context`` is not + # delivered, it's likely going to be a locally + # scoped exception from ``_invoke()`` itself. + if isinstance(err := ctx, Exception): + log.warning( + 'Task for RPC failed?' + f'|_ {func}()\n\n' + + f'{err}' + ) + continue + + else: + # mark that we have ongoing rpc tasks + actor._ongoing_rpc_tasks = trio.Event() + + # store cancel scope such that the rpc task can be + # cancelled gracefully if requested + actor._rpc_tasks[(chan, cid)] = ( + ctx, + func, + trio.Event(), + ) + + case Error()|_: + # This is the non-rpc error case, that is, an + # error **not** raised inside a call to ``_invoke()`` + # (i.e. no cid was provided in the msg - see above). + # Push this error to all local channel consumers + # (normally portals) by marking the channel as errored + log.exception( + f'Unhandled IPC msg:\n\n' + f'{msg}\n' + ) + assert chan.uid + exc = unpack_error( + msg, + chan=chan, + ) + chan._exc = exc + raise exc log.runtime( 'Waiting on next IPC msg from\n' diff --git a/tractor/_runtime.py b/tractor/_runtime.py index ed7b4503..eee78973 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -87,6 +87,23 @@ from ._rpc import ( process_messages, try_ship_error_to_remote, ) +from tractor.msg import ( + types as msgtypes, + pretty_struct, +) +# from tractor.msg.types import ( +# Aid, +# SpawnSpec, +# Start, +# StartAck, +# Started, +# Yield, +# Stop, +# Return, +# Error, +# ) + + if TYPE_CHECKING: @@ -143,6 +160,7 @@ class Actor: # Information about `__main__` from parent _parent_main_data: dict[str, str] _parent_chan_cs: CancelScope|None = None + _spawn_spec: SpawnSpec|None = None # syncs for setup/teardown sequences _server_down: trio.Event|None = None @@ -539,7 +557,8 @@ class Actor: f'{pformat(msg)}\n' ) - cid = msg.get('cid') + # cid: str|None = msg.get('cid') + cid: str|None = msg.cid if cid: # deliver response to local caller/waiter await self._push_result( @@ -891,29 +910,44 @@ class Actor: f'=> {ns}.{func}({kwargs})\n' ) await chan.send( - {'cmd': ( - ns, - func, - kwargs, - self.uid, - cid, - )} + msgtypes.Start( + ns=ns, + func=func, + kwargs=kwargs, + uid=self.uid, + cid=cid, + ) ) + # {'cmd': ( + # ns, + # func, + # kwargs, + # self.uid, + # cid, + # )} + # ) # Wait on first response msg and validate; this should be # immediate. - first_msg: dict = await ctx._recv_chan.receive() - functype: str = first_msg.get('functype') + # first_msg: dict = await ctx._recv_chan.receive() + # functype: str = first_msg.get('functype') - if 'error' in first_msg: + first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() + try: + functype: str = first_msg.functype + except AttributeError: raise unpack_error(first_msg, chan) + # if 'error' in first_msg: + # raise unpack_error(first_msg, chan) - elif functype not in ( + if functype not in ( 'asyncfunc', 'asyncgen', 'context', ): - raise ValueError(f"{first_msg} is an invalid response packet?") + raise ValueError( + f'{first_msg} is an invalid response packet?' + ) ctx._remote_func_type = functype return ctx @@ -946,24 +980,36 @@ class Actor: await self._do_handshake(chan) accept_addrs: list[tuple[str, int]]|None = None - if self._spawn_method == "trio": - # Receive runtime state from our parent - parent_data: dict[str, Any] - parent_data = await chan.recv() - log.runtime( - 'Received state from parent:\n\n' - # TODO: eventually all these msgs as - # `msgspec.Struct` with a special mode that - # pformats them in multi-line mode, BUT only - # if "trace"/"util" mode is enabled? - f'{pformat(parent_data)}\n' - ) - accept_addrs: list[tuple[str, int]] = parent_data.pop('bind_addrs') - rvs = parent_data.pop('_runtime_vars') + if self._spawn_method == "trio": + + # Receive runtime state from our parent + # parent_data: dict[str, Any] + # parent_data = await chan.recv() + + # TODO: maybe we should just wrap this directly + # in a `Actor.spawn_info: SpawnInfo` struct? + spawnspec: msgtypes.SpawnSpec = await chan.recv() + self._spawn_spec = spawnspec + + # TODO: eventually all these msgs as + # `msgspec.Struct` with a special mode that + # pformats them in multi-line mode, BUT only + # if "trace"/"util" mode is enabled? + log.runtime( + 'Received runtime spec from parent:\n\n' + f'{pformat(spawnspec)}\n' + ) + # accept_addrs: list[tuple[str, int]] = parent_data.pop('bind_addrs') + accept_addrs: list[tuple[str, int]] = spawnspec.bind_addrs + + # rvs = parent_data.pop('_runtime_vars') + rvs = spawnspec._runtime_vars if rvs['_debug_mode']: try: - log.info('Enabling `stackscope` traces on SIGUSR1') + log.info( + 'Enabling `stackscope` traces on SIGUSR1' + ) from .devx import enable_stack_on_sig enable_stack_on_sig() except ImportError: @@ -971,28 +1017,40 @@ class Actor: '`stackscope` not installed for use in debug mode!' ) - log.runtime(f"Runtime vars are: {rvs}") + log.runtime(f'Runtime vars are: {rvs}') rvs['_is_root'] = False _state._runtime_vars.update(rvs) - for attr, value in parent_data.items(): - if ( - attr == 'reg_addrs' - and value - ): - # XXX: ``msgspec`` doesn't support serializing tuples - # so just cash manually here since it's what our - # internals expect. - # TODO: we don't really NEED these as - # tuples so we can probably drop this - # casting since apparently in python lists - # are "more efficient"? - self.reg_addrs = [tuple(val) for val in value] + # XXX: ``msgspec`` doesn't support serializing tuples + # so just cash manually here since it's what our + # internals expect. + # + self.reg_addrs = [ + # TODO: we don't really NEED these as tuples? + # so we can probably drop this casting since + # apparently in python lists are "more + # efficient"? + tuple(val) + for val in spawnspec.reg_addrs + ] - else: - setattr(self, attr, value) + # for attr, value in parent_data.items(): + for _, attr, value in pretty_struct.iter_fields( + spawnspec, + ): + setattr(self, attr, value) + # if ( + # attr == 'reg_addrs' + # and value + # ): + # self.reg_addrs = [tuple(val) for val in value] + # else: + # setattr(self, attr, value) - return chan, accept_addrs + return ( + chan, + accept_addrs, + ) except OSError: # failed to connect log.warning( @@ -1434,7 +1492,7 @@ class Actor: self, chan: Channel - ) -> tuple[str, str]: + ) -> msgtypes.Aid: ''' Exchange `(name, UUIDs)` identifiers as the first communication step with any (peer) remote `Actor`. @@ -1443,14 +1501,27 @@ class Actor: "actor model" parlance. ''' - await chan.send(self.uid) - value: tuple = await chan.recv() - uid: tuple[str, str] = (str(value[0]), str(value[1])) + name, uuid = self.uid + await chan.send( + msgtypes.Aid( + name=name, + uuid=uuid, + ) + ) + aid: msgtypes.Aid = await chan.recv() + chan.aid = aid + + uid: tuple[str, str] = ( + # str(value[0]), + # str(value[1]) + aid.name, + aid.uuid, + ) if not isinstance(uid, tuple): raise ValueError(f"{uid} is not a valid uid?!") - chan.uid = str(uid[0]), str(uid[1]) + chan.uid = uid return uid def is_infected_aio(self) -> bool: @@ -1510,7 +1581,8 @@ async def async_main( # because we're running in mp mode if ( set_accept_addr_says_rent - and set_accept_addr_says_rent is not None + and + set_accept_addr_says_rent is not None ): accept_addrs = set_accept_addr_says_rent diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 48135cc9..824f41f3 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -49,6 +49,9 @@ from tractor._portal import Portal from tractor._runtime import Actor from tractor._entry import _mp_main from tractor._exceptions import ActorFailure +from tractor.msg.types import ( + SpawnSpec, +) if TYPE_CHECKING: @@ -493,14 +496,25 @@ async def trio_proc( portal, ) - # send additional init params - await chan.send({ - '_parent_main_data': subactor._parent_main_data, - 'enable_modules': subactor.enable_modules, - 'reg_addrs': subactor.reg_addrs, - 'bind_addrs': bind_addrs, - '_runtime_vars': _runtime_vars, - }) + # send a "spawning specification" which configures the + # initial runtime state of the child. + await chan.send( + SpawnSpec( + _parent_main_data=subactor._parent_main_data, + enable_modules=subactor.enable_modules, + reg_addrs=subactor.reg_addrs, + bind_addrs=bind_addrs, + _runtime_vars=_runtime_vars, + ) + ) + + # await chan.send({ + # '_parent_main_data': subactor._parent_main_data, + # 'enable_modules': subactor.enable_modules, + # 'reg_addrs': subactor.reg_addrs, + # 'bind_addrs': bind_addrs, + # '_runtime_vars': _runtime_vars, + # }) # track subactor in current nursery curr_actor: Actor = current_actor() diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 90c33d31..941cfe8d 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -43,6 +43,11 @@ from .trionics import ( broadcast_receiver, BroadcastReceiver, ) +from tractor.msg import ( + Stop, + Yield, + Error, +) if TYPE_CHECKING: from ._context import Context @@ -94,21 +99,25 @@ class MsgStream(trio.abc.Channel): self, allow_msg_keys: list[str] = ['yield'], ): - msg: dict = self._rx_chan.receive_nowait() + # msg: dict = self._rx_chan.receive_nowait() + msg: Yield|Stop = self._rx_chan.receive_nowait() for ( i, key, ) in enumerate(allow_msg_keys): try: - return msg[key] - except KeyError as kerr: + # return msg[key] + return msg.pld + # except KeyError as kerr: + except AttributeError as attrerr: if i < (len(allow_msg_keys) - 1): continue _raise_from_no_key_in_msg( ctx=self._ctx, msg=msg, - src_err=kerr, + # src_err=kerr, + src_err=attrerr, log=log, expect_key=key, stream=self, @@ -148,18 +157,22 @@ class MsgStream(trio.abc.Channel): src_err: Exception|None = None # orig tb try: try: - msg = await self._rx_chan.receive() - return msg['yield'] + msg: Yield = await self._rx_chan.receive() + # return msg['yield'] + return msg.pld - except KeyError as kerr: - src_err = kerr + # except KeyError as kerr: + except AttributeError as attrerr: + # src_err = kerr + src_err = attrerr # NOTE: may raise any of the below error types # includg EoC when a 'stop' msg is found. _raise_from_no_key_in_msg( ctx=self._ctx, msg=msg, - src_err=kerr, + # src_err=kerr, + src_err=attrerr, log=log, expect_key='yield', stream=self, @@ -514,11 +527,18 @@ class MsgStream(trio.abc.Channel): raise self._closed try: + # await self._ctx.chan.send( + # payload={ + # 'yield': data, + # 'cid': self._ctx.cid, + # }, + # # hide_tb=hide_tb, + # ) await self._ctx.chan.send( - payload={ - 'yield': data, - 'cid': self._ctx.cid, - }, + payload=Yield( + cid=self._ctx.cid, + pld=data, + ), # hide_tb=hide_tb, ) except ( diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 255b1dbd..26155b22 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -935,6 +935,9 @@ async def _pause( # ``breakpoint()`` was awaited and begin handling stdio. log.debug('Entering sync world of the `pdb` REPL..') try: + # log.critical( + # f'stack len: {len(pdb.stack)}\n' + # ) debug_func( actor, pdb, -- 2.34.1 From 7908c9575e399243922604a91090e0abd114ceca Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 14:32:16 -0400 Subject: [PATCH 017/305] Woops, only pack `Error(cid=cid)` if input is not `None` --- tractor/_exceptions.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 7deda9d2..9c1dc36d 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -498,7 +498,7 @@ def pack_error( tb: str|None = None, cid: str|None = None, -) -> Error|dict[str, dict]: +) -> Error: ''' Create an "error message" which boxes a locally caught exception's meta-data and encodes it for wire transport via an @@ -561,19 +561,10 @@ def pack_error( # content's `.msgdata`). error_msg['tb_str'] = tb_str - # Error() - # pkt: dict = { - # 'error': error_msg, - # } - pkt: Error = Error( - cid=cid, - **error_msg, - # TODO: just get rid of `.pld` on this msg? - ) - # if cid: - # pkt['cid'] = cid + if cid is not None: + error_msg['cid'] = cid - return pkt + return Error(**error_msg) def unpack_error( -- 2.34.1 From e72bc5c2084486e3d852b25ce5433819520e50b0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Apr 2024 14:34:08 -0400 Subject: [PATCH 018/305] TOSQUASH f2ce4a3, timeout bump --- tests/test_context_stream_semantics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 36a5fd9f..d2b572cc 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -873,7 +873,7 @@ def test_one_end_stream_not_opened( enable_modules=[__name__], ) - with trio.fail_after(0.8): + with trio.fail_after(1): async with portal.open_context( entrypoint, ) as (ctx, sent): -- 2.34.1 From 335997966c33ce15eee9eb45b10b4553d3943fa0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 3 Apr 2024 09:45:03 -0400 Subject: [PATCH 019/305] Flip default codec to our `Msg`-spec Yes, this is "the switch" and will likely cause the test suite to bail until a few more fixes some in. Tweaked a couple `.msg` pkg exports: - remove `__spec__` (used by modules) and change it to `__msg_types: lists[Msg]` as well as add a new `__msg_spec__: TypeAlias`, being the default `Any` paramed spec. - tweak the naming of `msg.types` lists of runtime vs payload msgs to: `._runtime_msgs` and `._payload_msgs`. - just build `__msg_types__` out of the above 2 lists. --- tractor/msg/__init__.py | 12 ++++++-- tractor/msg/_codec.py | 4 +-- tractor/msg/types.py | 61 +++++++++++++++++++++++------------------ 3 files changed, 47 insertions(+), 30 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index d8f37477..fe965e0b 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -18,6 +18,10 @@ Built-in messaging patterns, types, APIs and helpers. ''' +from typing import ( + Union, + TypeAlias, +) from .ptr import ( NamespacePath as NamespacePath, ) @@ -50,6 +54,10 @@ from .types import ( Error as Error, - # full msg spec set - __spec__ as __spec__, + # full msg class set from above as list + __msg_types__ as __msg_types__, ) +# TODO: use new type declaration syntax for msg-type-spec +# https://docs.python.org/3/library/typing.html#type-aliases +# https://docs.python.org/3/reference/simple_stmts.html#type +__msg_spec__: TypeAlias = Union[*__msg_types__] diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 32a58a56..56f24d62 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -438,8 +438,8 @@ _ctxvar_MsgCodec: MsgCodec = RunVar( 'msgspec_codec', # TODO: move this to our new `Msg`-spec! - default=_def_msgspec_codec, - # default=_def_tractor_codec, + # default=_def_msgspec_codec, + default=_def_tractor_codec, ) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index a81473d7..b246cb61 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -26,7 +26,7 @@ from __future__ import annotations import types from typing import ( Any, - Callable, + # Callable, Generic, Literal, Type, @@ -340,50 +340,54 @@ class Error( # class Overrun(Msg): # cid: str +_runtime_msgs: list[Msg] = [ -# built-in SC shuttle protocol msg type set in -# approx order of the IPC txn-state spaces. -__spec__: list[Msg] = [ - - # identity handshake + # identity handshake on first IPC `Channel` contact. Aid, - # spawn specification from parent + # parent-to-child spawn specification passed as 2nd msg after + # handshake ONLY after child connects back to parent. SpawnSpec, # inter-actor RPC initiation - Start, - StartAck, + Start, # schedule remote task-as-func + StartAck, # ack the schedule request - # no-outcome-yet IAC (inter-actor-communication) - Started, - Yield, + # emission from `MsgStream.aclose()` Stop, - # termination outcomes - Return, + # box remote errors, normally subtypes + # of `RemoteActorError`. Error, ] -_runtime_spec_msgs: list[Msg] = [ - Aid, - SpawnSpec, - Start, - StartAck, - Stop, - Error, -] -_payload_spec_msgs: list[Msg] = [ +# the no-outcome-yet IAC (inter-actor-communication) sub-set which +# can be `Msg.pld` payload field type-limited by application code +# using `apply_codec()` and `limit_msg_spec()`. +_payload_msgs: list[Msg] = [ + # first from `Context.started()` Started, + + # any sent via `MsgStream.send()` Yield, + + # the final value returned from a `@context` decorated + # IPC endpoint. Return, ] +# built-in SC shuttle protocol msg type set in +# approx order of the IPC txn-state spaces. +__msg_types__: list[Msg] = ( + _runtime_msgs + + + _payload_msgs +) + def mk_msg_spec( payload_type_union: Union[Type] = Any, - # boxing_msg_set: list[Msg] = _payload_spec_msgs, spec_build_method: Literal[ 'indexed_generics', # works 'defstruct', @@ -424,12 +428,12 @@ def mk_msg_spec( defs_msg_types: list[Msg] = [] nc_msg_types: list[Msg] = [] - for msgtype in __spec__: + for msgtype in __msg_types__: # for the NON-payload (user api) type specify-able # msgs types, we simply aggregate the def as is # for inclusion in the output type `Union`. - if msgtype not in _payload_spec_msgs: + if msgtype not in _payload_msgs: ipc_msg_types.append(msgtype) continue @@ -535,6 +539,11 @@ def mk_msg_spec( # TODO: make something similar to this inside `._codec` such that # user can just pass a type table of some sort? +# -[ ] we would need to decode all msgs to `pretty_struct.Struct` +# and then call `.to_dict()` on them? +# -[ ] we're going to need to re-impl all the stuff changed in the +# runtime port such that it can handle dicts or `Msg`s? +# # def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: # ''' # Deliver a `enc_hook()`/`dec_hook()` pair which does -- 2.34.1 From 13ecb151dbce8173c6b638d8a56321aee3417863 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 3 Apr 2024 09:50:22 -0400 Subject: [PATCH 020/305] Start a new `._testing.fault_simulation` Since I needed the `break_ipc()` helper from the `examples/advanced_faults/ipc_failure_during_stream.py` used in the `test_advanced_faults` suite, might as well move it into a pkg-wide importable module. Also changed the default break method to be `socket_close` which just calls `Stream.socket.close()` underneath in `trio`. Also tweak that example to not keep sending after the stream has been broken since with new `trio` that will raise `ClosedResourceError` and in the wrapping test we generally speaking want to see a hang and then cancel via simulated user sent SIGINT/ctl-c. --- .../ipc_failure_during_stream.py | 86 ++++------------- tractor/_testing/__init__.py | 3 + tractor/_testing/fault_simulation.py | 92 +++++++++++++++++++ 3 files changed, 112 insertions(+), 69 deletions(-) create mode 100644 tractor/_testing/fault_simulation.py diff --git a/examples/advanced_faults/ipc_failure_during_stream.py b/examples/advanced_faults/ipc_failure_during_stream.py index 9dca92b1..60b28c3e 100644 --- a/examples/advanced_faults/ipc_failure_during_stream.py +++ b/examples/advanced_faults/ipc_failure_during_stream.py @@ -21,75 +21,12 @@ import trio import pytest -async def break_ipc( - stream: MsgStream, - method: str|None = None, - pre_close: bool = False, - - def_method: str = 'eof', - -) -> None: - ''' - XXX: close the channel right after an error is raised - purposely breaking the IPC transport to make sure the parent - doesn't get stuck in debug or hang on the connection join. - this more or less simulates an infinite msg-receive hang on - the other end. - - ''' - # close channel via IPC prot msging before - # any transport breakage - if pre_close: - await stream.aclose() - - method: str = method or def_method - print( - '#################################\n' - 'Simulating CHILD-side IPC BREAK!\n' - f'method: {method}\n' - f'pre `.aclose()`: {pre_close}\n' - '#################################\n' - ) - - match method: - case 'trans_aclose': - await stream._ctx.chan.transport.stream.aclose() - - case 'eof': - await stream._ctx.chan.transport.stream.send_eof() - - case 'msg': - await stream._ctx.chan.send(None) - - # TODO: the actual real-world simulated cases like - # transport layer hangs and/or lower layer 2-gens type - # scenarios.. - # - # -[ ] already have some issues for this general testing - # area: - # - https://github.com/goodboy/tractor/issues/97 - # - https://github.com/goodboy/tractor/issues/124 - # - PR from @guille: - # https://github.com/goodboy/tractor/pull/149 - # case 'hang': - # TODO: framework research: - # - # - https://github.com/GuoTengda1993/pynetem - # - https://github.com/shopify/toxiproxy - # - https://manpages.ubuntu.com/manpages/trusty/man1/wirefilter.1.html - - case _: - raise RuntimeError( - f'IPC break method unsupported: {method}' - ) - - async def break_ipc_then_error( stream: MsgStream, break_ipc_with: str|None = None, pre_close: bool = False, ): - await break_ipc( + await _testing.break_ipc( stream=stream, method=break_ipc_with, pre_close=pre_close, @@ -121,6 +58,7 @@ async def recv_and_spawn_net_killers( Receive stream msgs and spawn some IPC killers mid-stream. ''' + broke_ipc: bool = False await ctx.started() async with ( ctx.open_stream() as stream, @@ -128,13 +66,17 @@ async def recv_and_spawn_net_killers( ): async for i in stream: print(f'child echoing {i}') - await stream.send(i) + if not broke_ipc: + await stream.send(i) + else: + await trio.sleep(0.01) if ( break_ipc_after and i >= break_ipc_after ): + broke_ipc = True n.start_soon( iter_ipc_stream, stream, @@ -242,14 +184,13 @@ async def main( # await stream._ctx.chan.send(None) # await stream._ctx.chan.transport.stream.send_eof() await stream._ctx.chan.transport.stream.aclose() - ipc_break_sent = True # it actually breaks right here in the - # mp_spawn/forkserver backends and thus the zombie - # reaper never even kicks in? - print(f'parent sending {i}') + # mp_spawn/forkserver backends and thus the + # zombie reaper never even kicks in? try: + print(f'parent sending {i}') await stream.send(i) except ContextCancelled as ctxc: print( @@ -262,6 +203,13 @@ async def main( # TODO: is this needed or no? raise + except trio.ClosedResourceError: + # NOTE: don't send if we already broke the + # connection to avoid raising a closed-error + # such that we drop through to the ctl-c + # mashing by user. + await trio.sleep(0.01) + # timeout: int = 1 # with trio.move_on_after(timeout) as cs: async with stuff_hangin_ctlc() as timeout: diff --git a/tractor/_testing/__init__.py b/tractor/_testing/__init__.py index 876c87e8..fd79fe20 100644 --- a/tractor/_testing/__init__.py +++ b/tractor/_testing/__init__.py @@ -26,6 +26,9 @@ import tractor from .pytest import ( tractor_test as tractor_test ) +from .fault_simulation import ( + break_ipc as break_ipc, +) def repodir() -> pathlib.Path: diff --git a/tractor/_testing/fault_simulation.py b/tractor/_testing/fault_simulation.py new file mode 100644 index 00000000..fbd97bf5 --- /dev/null +++ b/tractor/_testing/fault_simulation.py @@ -0,0 +1,92 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +`pytest` utils helpers and plugins for testing `tractor`'s runtime +and applications. + +''' + +from tractor import ( + MsgStream, +) + +async def break_ipc( + stream: MsgStream, + method: str|None = None, + pre_close: bool = False, + + def_method: str = 'socket_close', + +) -> None: + ''' + XXX: close the channel right after an error is raised + purposely breaking the IPC transport to make sure the parent + doesn't get stuck in debug or hang on the connection join. + this more or less simulates an infinite msg-receive hang on + the other end. + + ''' + # close channel via IPC prot msging before + # any transport breakage + if pre_close: + await stream.aclose() + + method: str = method or def_method + print( + '#################################\n' + 'Simulating CHILD-side IPC BREAK!\n' + f'method: {method}\n' + f'pre `.aclose()`: {pre_close}\n' + '#################################\n' + ) + + match method: + case 'socket_close': + await stream._ctx.chan.transport.stream.aclose() + + case 'socket_eof': + # NOTE: `trio` does the following underneath this + # call in `src/trio/_highlevel_socket.py`: + # `Stream.socket.shutdown(tsocket.SHUT_WR)` + await stream._ctx.chan.transport.stream.send_eof() + + # TODO: remove since now this will be invalid with our + # new typed msg spec? + # case 'msg': + # await stream._ctx.chan.send(None) + + # TODO: the actual real-world simulated cases like + # transport layer hangs and/or lower layer 2-gens type + # scenarios.. + # + # -[ ] already have some issues for this general testing + # area: + # - https://github.com/goodboy/tractor/issues/97 + # - https://github.com/goodboy/tractor/issues/124 + # - PR from @guille: + # https://github.com/goodboy/tractor/pull/149 + # case 'hang': + # TODO: framework research: + # + # - https://github.com/GuoTengda1993/pynetem + # - https://github.com/shopify/toxiproxy + # - https://manpages.ubuntu.com/manpages/trusty/man1/wirefilter.1.html + + case _: + raise RuntimeError( + f'IPC break method unsupported: {method}' + ) -- 2.34.1 From 5a79a17dbbd357c50b1fe596bd1122291abcc521 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 10:53:07 -0400 Subject: [PATCH 021/305] Use `._testing.break_ipc()` in final advanced fault test child ctx --- tests/test_advanced_faults.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/test_advanced_faults.py b/tests/test_advanced_faults.py index 52db139f..dfaeb68e 100644 --- a/tests/test_advanced_faults.py +++ b/tests/test_advanced_faults.py @@ -13,6 +13,7 @@ import trio import tractor from tractor._testing import ( examples_dir, + break_ipc, ) @@ -93,7 +94,8 @@ def test_ipc_channel_break_during_stream( expect_final_exc = trio.ClosedResourceError mod: ModuleType = import_path( - examples_dir() / 'advanced_faults' / 'ipc_failure_during_stream.py', + examples_dir() / 'advanced_faults' + / 'ipc_failure_during_stream.py', root=examples_dir(), consider_namespace_packages=False, ) @@ -225,9 +227,15 @@ async def break_ipc_after_started( ) -> None: await ctx.started() async with ctx.open_stream() as stream: - await stream.aclose() - await trio.sleep(0.2) - await ctx.chan.send(None) + + # TODO: make a test which verifies the error + # for this, i.e. raises a `MsgTypeError` + # await ctx.chan.send(None) + + await break_ipc( + stream=stream, + pre_close=True, + ) print('child broke IPC and terminating') -- 2.34.1 From f1dd6474bfde0599541520111352d5b1b9186d73 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 11:36:09 -0400 Subject: [PATCH 022/305] Extend codec test to for msg-spec parameterizing Set a diff `Msg.pld` spec per test and then send multiple types to a child actor making sure the child can only send certain types over a stream and fails with validation or decode errors ow. The test is also param-ed both with and without hooks demonstrating how a custom type, `NamespacePath`, needs them for effective use. The subactor IPC context child is passed a `expect_ipc_send: dict` which relays the values along with their expected `.send()`-ability. Deats on technical refinements: ------ - ------ - added a `iter_maybe_sends()` send-value-as-msg-auditor and predicate generator (literally) so as to be able to pre-determine if given the current codec and `send_values` which values are expected to be IPC transmittable. - as per ^, the diff value-msgs are first round-tripped inside a `Started` msg using the configured codec in the parent/root actor before bothering with using IPC primitives + a subactor; this is how the `expect_ipc_send` table is generated initially. - for serializing the specs (`Union[Type]`s as required by `msgspec`), added a pair of codec hooks: `enc/dec_type_union()` (that ideally we move into a `.msg` submod eventually) which code the type-values as a `list[str]` of names. - the `dec_` hook had to be modified to NOT raise an error when an invalid/unhandled value arrives, this is because we do NOT want the RPC msg handling loop to raise on the `async for msg in chan:` and instead prefer to ignore and warn (for now, but eventually respond with error msg - see notes in hook body) these msgs when sent during a streaming phase; `Context.started()` will however error on a bad input for the current msg-spec since it is part of the "cheap" dialog (again see notes in `._context`) wherein the `Started` msg is always roundtripped prior to `Channel.send()` to guarantee the child adheres to its own spec. - tossed in lotsa `print()`s for console groking of the run progress. Further notes on typed-msging breaking cancellation: ------ - ------ - turns out since the runtime's cancellation implementation, being done with `Actor.cancel()` methods and friends will actually break when a stringent spec is applied (eg. a single type-spec) since the return values from said methods are generally `bool`s.. - this means we do indeed need special handling of "runtime RPC method invocations" since ideally a user's msg-spec choices do not break core functionality on them XD => The obvi solution is to add a/some special sub-`Msg` types for such cases, possibly just a `RuntimeReturn(Return)` type that will always include a `.pld: bool` for these cancel methods such that their results are always handled without msg type errors. More to come on a (hopefully) elegant solution to that last bit! --- tests/test_caps_based_msging.py | 648 +++++++++++++++++++++++--------- 1 file changed, 462 insertions(+), 186 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index b42d9e35..acc1f307 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -5,6 +5,7 @@ Low-level functional audits for our B~) ''' +import typing from typing import ( Any, Type, @@ -23,7 +24,9 @@ from msgspec import ( ValidationError, ) import pytest + import tractor +from tractor import _state from tractor.msg import ( _codec, _ctxvar_MsgCodec, @@ -34,12 +37,9 @@ from tractor.msg import ( apply_codec, current_codec, ) -from tractor.msg import ( - types, -) -from tractor import _state from tractor.msg.types import ( - # PayloadT, + _payload_msgs, + log, Msg, Started, mk_msg_spec, @@ -62,17 +62,14 @@ def test_msg_spec_xor_pld_spec(): ) -def ex_func(*args): - print(f'ex_func({args})') - - def mk_custom_codec( pld_spec: Union[Type]|Any, + add_hooks: bool, ) -> MsgCodec: ''' Create custom `msgpack` enc/dec-hooks and set a `Decoder` - which only loads `NamespacePath` types. + which only loads `pld_spec` (like `NamespacePath`) types. ''' uid: tuple[str, str] = tractor.current_actor().uid @@ -83,61 +80,75 @@ def mk_custom_codec( # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types def enc_nsp(obj: Any) -> Any: + print(f'{uid} ENC HOOK') match obj: case NamespacePath(): print( f'{uid}: `NamespacePath`-Only ENCODE?\n' - f'type: {type(obj)}\n' - f'obj: {obj}\n' + f'obj-> `{obj}`: {type(obj)}\n' ) - + # if type(obj) != NamespacePath: + # breakpoint() return str(obj) - logmsg: str = ( - f'{uid}: Encoding `{obj}: <{type(obj)}>` not supported' - f'type: {type(obj)}\n' - f'obj: {obj}\n' + print( + f'{uid}\n' + 'CUSTOM ENCODE\n' + f'obj-arg-> `{obj}`: {type(obj)}\n' + ) + logmsg: str = ( + f'{uid}\n' + 'FAILED ENCODE\n' + f'obj-> `{obj}: {type(obj)}`\n' ) - print(logmsg) raise NotImplementedError(logmsg) def dec_nsp( - type: Type, + obj_type: Type, obj: Any, ) -> Any: print( - f'{uid}: CUSTOM DECODE\n' - f'input type: {type}\n' - f'obj: {obj}\n' - f'type(obj): `{type(obj).__class__}`\n' + f'{uid}\n' + 'CUSTOM DECODE\n' + f'type-arg-> {obj_type}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n' ) nsp = None - # This never seems to hit? - if isinstance(obj, Msg): - print(f'Msg type: {obj}') - if ( - type is NamespacePath + obj_type is NamespacePath and isinstance(obj, str) and ':' in obj ): nsp = NamespacePath(obj) + # TODO: we could built a generic handler using + # JUST matching the obj_type part? + # nsp = obj_type(obj) if nsp: print(f'Returning NSP instance: {nsp}') return nsp logmsg: str = ( - f'{uid}: Decoding `{obj}: <{type(obj)}>` not supported' - f'input type: {type(obj)}\n' - f'obj: {obj}\n' - f'type(obj): `{type(obj).__class__}`\n' + f'{uid}\n' + 'FAILED DECODE\n' + f'type-> {obj_type}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n' ) - print(logmsg) - raise NotImplementedError(logmsg) - + # TODO: figure out the ignore subsys for this! + # -[ ] option whether to defense-relay backc the msg + # inside an `Invalid`/`Ignore` + # -[ ] how to make this handling pluggable such that a + # `Channel`/`MsgTransport` can intercept and process + # back msgs either via exception handling or some other + # signal? + log.warning(logmsg) + # NOTE: this delivers the invalid + # value up to `msgspec`'s decoding + # machinery for error raising. + return obj + # raise NotImplementedError(logmsg) nsp_codec: MsgCodec = mk_codec( ipc_pld_spec=pld_spec, @@ -151,97 +162,32 @@ def mk_custom_codec( # `Any`-decoded-pld the enc has no knowledge (by default) # how to enc `NamespacePath` (nsp), so we add a custom # hook to do that ALWAYS. - enc_hook=enc_nsp, + enc_hook=enc_nsp if add_hooks else None, # XXX NOTE: pretty sure this is mutex with the `type=` to # `Decoder`? so it won't work in tandem with the # `ipc_pld_spec` passed above? - dec_hook=dec_nsp, + dec_hook=dec_nsp if add_hooks else None, ) return nsp_codec -@tractor.context -async def send_back_nsp( - ctx: Context, - expect_debug: bool, - use_any_spec: bool, - -) -> None: - ''' - Setup up a custom codec to load instances of `NamespacePath` - and ensure we can round trip a func ref with our parent. - - ''' - # debug mode sanity check - assert expect_debug == _state.debug_mode() - - # task: trio.Task = trio.lowlevel.current_task() - - # TreeVar - # curr_codec = _ctxvar_MsgCodec.get_in(task) - - # ContextVar - # task_ctx: Context = task.context - # assert _ctxvar_MsgCodec not in task_ctx - - curr_codec = _ctxvar_MsgCodec.get() - assert curr_codec is _codec._def_tractor_codec - - if use_any_spec: - pld_spec = Any - else: - # NOTE: don't need the |None here since - # the parent side will never send `None` like - # we do here in the implicit return at the end of this - # `@context` body. - pld_spec = NamespacePath # |None - - nsp_codec: MsgCodec = mk_custom_codec( - pld_spec=pld_spec, - ) - with apply_codec(nsp_codec) as codec: - chk_codec_applied( - custom_codec=nsp_codec, - enter_value=codec, - ) - - # ensure roundtripping works locally - nsp = NamespacePath.from_ref(ex_func) - wire_bytes: bytes = nsp_codec.encode( - Started( - cid=ctx.cid, - pld=nsp - ) - ) - msg: Started = nsp_codec.decode(wire_bytes) - pld = msg.pld - assert pld == nsp - - await ctx.started(nsp) - async with ctx.open_stream() as ipc: - async for msg in ipc: - - if use_any_spec: - assert msg == f'{__name__}:ex_func' - - # TODO: as per below - # assert isinstance(msg, NamespacePath) - assert isinstance(msg, str) - else: - assert isinstance(msg, NamespacePath) - - await ipc.send(msg) - - def chk_codec_applied( - custom_codec: MsgCodec, - enter_value: MsgCodec, + expect_codec: MsgCodec, + enter_value: MsgCodec|None = None, + ) -> MsgCodec: + ''' + buncha sanity checks ensuring that the IPC channel's + context-vars are set to the expected codec and that are + ctx-var wrapper APIs match the same. - # task: trio.Task = trio.lowlevel.current_task() - + ''' + # TODO: play with tricyle again, bc this is supposed to work + # the way we want? + # # TreeVar + # task: trio.Task = trio.lowlevel.current_task() # curr_codec = _ctxvar_MsgCodec.get_in(task) # ContextVar @@ -249,46 +195,358 @@ def chk_codec_applied( # assert _ctxvar_MsgCodec in task_ctx # curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + # NOTE: currently we use this! # RunVar - curr_codec: MsgCodec = _ctxvar_MsgCodec.get() + curr_codec: MsgCodec = current_codec() last_read_codec = _ctxvar_MsgCodec.get() - assert curr_codec is last_read_codec + # assert curr_codec is last_read_codec assert ( + (same_codec := expect_codec) is # returned from `mk_codec()` - custom_codec is # yielded value from `apply_codec()` - enter_value is # read from current task's `contextvars.Context` curr_codec is - - # public API for all of the above - current_codec() + last_read_codec # the default `msgspec` settings is not _codec._def_msgspec_codec is not _codec._def_tractor_codec ) + if enter_value: + enter_value is same_codec + + +def iter_maybe_sends( + send_items: dict[Union[Type], Any] | list[tuple], + ipc_pld_spec: Union[Type] | Any, + add_codec_hooks: bool, + + codec: MsgCodec|None = None, + +) -> tuple[Any, bool]: + + if isinstance(send_items, dict): + send_items = send_items.items() + + for ( + send_type_spec, + send_value, + ) in send_items: + + expect_roundtrip: bool = False + + # values-to-typespec santiy + send_type = type(send_value) + assert send_type == send_type_spec or ( + (subtypes := getattr(send_type_spec, '__args__', None)) + and send_type in subtypes + ) + + spec_subtypes: set[Union[Type]] = ( + getattr( + ipc_pld_spec, + '__args__', + {ipc_pld_spec,}, + ) + ) + send_in_spec: bool = ( + send_type == ipc_pld_spec + or ( + ipc_pld_spec != Any + and # presume `Union` of types + send_type in spec_subtypes + ) + or ( + ipc_pld_spec == Any + and + send_type != NamespacePath + ) + ) + expect_roundtrip = ( + send_in_spec + # any spec should support all other + # builtin py values that we send + # except our custom nsp type which + # we should be able to send as long + # as we provide the custom codec hooks. + or ( + ipc_pld_spec == Any + and + send_type == NamespacePath + and + add_codec_hooks + ) + ) + + if codec is not None: + # XXX FIRST XXX ensure roundtripping works + # before touching any IPC primitives/APIs. + wire_bytes: bytes = codec.encode( + Started( + cid='blahblah', + pld=send_value, + ) + ) + # NOTE: demonstrates the decoder loading + # to via our native SCIPP msg-spec + # (structurred-conc-inter-proc-protocol) + # implemented as per, + try: + msg: Started = codec.decode(wire_bytes) + if not expect_roundtrip: + pytest.fail( + f'NOT-EXPECTED able to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + + pld = msg.pld + assert pld == send_value + + except ValidationError: + if expect_roundtrip: + pytest.fail( + f'EXPECTED to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + + yield ( + str(send_type), + send_value, + expect_roundtrip, + ) + + +def dec_type_union( + type_names: list[str], +) -> Type: + ''' + Look up types by name, compile into a list and then create and + return a `typing.Union` from the full set. + + ''' + import importlib + types: list[Type] = [] + for type_name in type_names: + for ns in [ + typing, + importlib.import_module(__name__), + ]: + if type_ref := getattr( + ns, + type_name, + False, + ): + types.append(type_ref) + + # special case handling only.. + # ipc_pld_spec: Union[Type] = eval( + # pld_spec_str, + # {}, # globals + # {'typing': typing}, # locals + # ) + + return Union[*types] + + +def enc_type_union( + union_or_type: Union[Type]|Type, +) -> list[str]: + ''' + Encode a type-union or single type to a list of type-name-strings + ready for IPC interchange. + + ''' + type_strs: list[str] = [] + for typ in getattr( + union_or_type, + '__args__', + {union_or_type,}, + ): + type_strs.append(typ.__qualname__) + + return type_strs + + +@tractor.context +async def send_back_nsp( + ctx: Context, + expect_debug: bool, + pld_spec_type_strs: list[str], + add_hooks: bool, + started_msg_bytes: bytes, + expect_ipc_send: dict[str, tuple[Any, bool]], + +) -> None: + ''' + Setup up a custom codec to load instances of `NamespacePath` + and ensure we can round trip a func ref with our parent. + + ''' + # debug mode sanity check (prolly superfluous but, meh) + assert expect_debug == _state.debug_mode() + + # init state in sub-actor should be default + chk_codec_applied( + expect_codec=_codec._def_tractor_codec, + ) + + # load pld spec from input str + ipc_pld_spec = dec_type_union( + pld_spec_type_strs, + ) + pld_spec_str = str(ipc_pld_spec) + + # same as on parent side config. + nsp_codec: MsgCodec = mk_custom_codec( + pld_spec=ipc_pld_spec, + add_hooks=add_hooks, + ) + with apply_codec(nsp_codec) as codec: + chk_codec_applied( + expect_codec=nsp_codec, + enter_value=codec, + ) + + print( + 'CHILD attempting `Started`-bytes DECODE..\n' + ) + try: + msg: Started = nsp_codec.decode(started_msg_bytes) + expected_pld_spec_str: str = msg.pld + assert pld_spec_str == expected_pld_spec_str + + # TODO: maybe we should add our own wrapper error so as to + # be interchange-lib agnostic? + # -[ ] the error type is wtv is raised from the hook so we + # could also require a type-class of errors for + # indicating whether the hook-failure can be handled by + # a nasty-dialog-unprot sub-sys? + except ValidationError: + + # NOTE: only in the `Any` spec case do we expect this to + # work since otherwise no spec covers a plain-ol' + # `.pld: str` + if pld_spec_str == 'Any': + raise + else: + print( + 'CHILD (correctly) unable to DECODE `Started`-bytes\n' + f'{started_msg_bytes}\n' + ) + + iter_send_val_items = iter(expect_ipc_send.values()) + sent: list[Any] = [] + for send_value, expect_send in iter_send_val_items: + try: + print( + f'CHILD attempting to `.started({send_value})`\n' + f'=> expect_send: {expect_send}\n' + f'SINCE, ipc_pld_spec: {ipc_pld_spec}\n' + f'AND, codec: {codec}\n' + ) + await ctx.started(send_value) + sent.append(send_value) + if not expect_send: + + # XXX NOTE XXX THIS WON'T WORK WITHOUT SPECIAL + # `str` handling! or special debug mode IPC + # msgs! + # await tractor.pause() + + raise RuntimeError( + # pytest.fail( + f'NOT-EXPECTED able to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {type(send_value)}\n' + ) + + break # move on to streaming block.. + + except NotImplementedError: + print('FAILED ENCODE!') + + except tractor.MsgTypeError: + # await tractor.pause() + if expect_send: + pytest.fail( + f'EXPECTED to `.started()` value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {type(send_value)}\n' + ) + + async with ctx.open_stream() as ipc: + for send_value, expect_send in iter_send_val_items: + send_type: Type = type(send_value) + print( + 'CHILD report on send value\n' + f'ipc_pld_spec: {ipc_pld_spec}\n' + f'expect_send: {expect_send}\n' + f'val: {send_value}\n' + ) + try: + await ipc.send(send_value) + sent.append(send_value) + if not expect_send: + pytest.fail( + f'NOT-EXPECTED able to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + except ValidationError: + if expect_send: + pytest.fail( + f'EXPECTED to roundtrip value given spec:\n' + f'ipc_pld_spec -> {ipc_pld_spec}\n' + f'value -> {send_value}: {send_type}\n' + ) + continue + + assert ( + len(sent) + == + len([val + for val, expect in + expect_ipc_send.values() + if expect is True]) + ) + + +def ex_func(*args): + print(f'ex_func({args})') + @pytest.mark.parametrize( 'ipc_pld_spec', [ - # _codec._def_msgspec_codec, Any, - # _codec._def_tractor_codec, - NamespacePath|None, + NamespacePath, + NamespacePath|None, # the "maybe" spec Bo ], ids=[ 'any_type', 'nsp_type', + 'maybe_nsp_type', ] ) +@pytest.mark.parametrize( + 'add_codec_hooks', + [ + True, + False, + ], + ids=['use_codec_hooks', 'no_codec_hooks'], +) def test_codec_hooks_mod( debug_mode: bool, ipc_pld_spec: Union[Type]|Any, + # send_value: None|str|NamespacePath, + add_codec_hooks: bool, ): ''' Audit the `.msg.MsgCodec` override apis details given our impl @@ -297,17 +555,17 @@ def test_codec_hooks_mod( ''' async def main(): + nsp = NamespacePath.from_ref(ex_func) + send_items: dict[Union, Any] = { + Union[None]: None, + Union[NamespacePath]: nsp, + Union[str]: str(nsp), + } - # task: trio.Task = trio.lowlevel.current_task() - - # ContextVar - # task_ctx: Context = task.context - # assert _ctxvar_MsgCodec not in task_ctx - - # TreeVar - # def_codec: MsgCodec = _ctxvar_MsgCodec.get_in(task) - def_codec = _ctxvar_MsgCodec.get() - assert def_codec is _codec._def_tractor_codec + # init default state for actor + chk_codec_applied( + expect_codec=_codec._def_tractor_codec, + ) async with tractor.open_nursery( debug_mode=debug_mode, @@ -323,79 +581,97 @@ def test_codec_hooks_mod( # `NamespacePath` nsp_codec: MsgCodec = mk_custom_codec( pld_spec=ipc_pld_spec, + add_hooks=add_codec_hooks, ) with apply_codec(nsp_codec) as codec: chk_codec_applied( - custom_codec=nsp_codec, + expect_codec=nsp_codec, enter_value=codec, ) + expect_ipc_send: dict[str, tuple[Any, bool]] = {} + + report: str = ( + 'Parent report on send values with\n' + f'ipc_pld_spec: {ipc_pld_spec}\n' + ' ------ - ------\n' + ) + for val_type_str, val, expect_send in iter_maybe_sends( + send_items, + ipc_pld_spec, + add_codec_hooks=add_codec_hooks, + ): + report += ( + f'send_value: {val}: {type(val)} ' + f'=> expect_send: {expect_send}\n' + ) + expect_ipc_send[val_type_str] = (val, expect_send) + + print( + report + + ' ------ - ------\n' + ) + assert len(expect_ipc_send) == len(send_items) + # now try over real IPC with a the subactor + # expect_ipc_rountrip: bool = True + expected_started = Started( + cid='cid', + pld=str(ipc_pld_spec), + ) + # build list of values we expect to receive from + # the subactor. + expect_to_send: list[Any] = [ + val + for val, expect_send in expect_ipc_send.values() + if expect_send + ] + + pld_spec_type_strs: list[str] = enc_type_union(ipc_pld_spec) + + # TODO: send the original nsp here and + # test with `limit_msg_spec()` above? + # await tractor.pause() + print('PARENT opening IPC ctx!\n') async with ( + p.open_context( send_back_nsp, - # TODO: send the original nsp here and - # test with `limit_msg_spec()` above? expect_debug=debug_mode, - use_any_spec=(ipc_pld_spec==Any), - + pld_spec_type_strs=pld_spec_type_strs, + add_hooks=add_codec_hooks, + started_msg_bytes=nsp_codec.encode(expected_started), + expect_ipc_send=expect_ipc_send, ) as (ctx, first), + ctx.open_stream() as ipc, ): - if ipc_pld_spec is NamespacePath: - assert isinstance(first, NamespacePath) - + # ensure codec is still applied across + # `tractor.Context` + its embedded nursery. + chk_codec_applied( + expect_codec=nsp_codec, + enter_value=codec, + ) print( 'root: ENTERING CONTEXT BLOCK\n' f'type(first): {type(first)}\n' f'first: {first}\n' ) - # ensure codec is still applied across - # `tractor.Context` + its embedded nursery. - chk_codec_applied( - custom_codec=nsp_codec, - enter_value=codec, - ) + expect_to_send.remove(first) - first_nsp = NamespacePath(first) + # TODO: explicit values we expect depending on + # codec config! + # assert first == first_val + # assert first == f'{__name__}:ex_func' - # ensure roundtripping works - wire_bytes: bytes = nsp_codec.encode( - Started( - cid=ctx.cid, - pld=first_nsp + async for next_sent in ipc: + print( + 'Child sent next value\n' + f'{next_sent}: {type(next_sent)}\n' ) - ) - msg: Started = nsp_codec.decode(wire_bytes) - pld = msg.pld - assert pld == first_nsp + expect_to_send.remove(next_sent) - # try a manual decode of the started msg+pld - - # TODO: actually get the decoder loading - # to native once we spec our SCIPP msgspec - # (structurred-conc-inter-proc-protocol) - # implemented as per, - # https://github.com/goodboy/tractor/issues/36 - # - if ipc_pld_spec is NamespacePath: - assert isinstance(first, NamespacePath) - - # `Any`-payload-spec case - else: - assert isinstance(first, str) - assert first == f'{__name__}:ex_func' - - await ipc.send(first) - - with trio.move_on_after(.6): - async for msg in ipc: - print(msg) - - # TODO: as per above - # assert isinstance(msg, NamespacePath) - assert isinstance(msg, str) - await ipc.send(msg) - await trio.sleep(0.1) + # all sent values should have arrived! + assert not expect_to_send await p.cancel_actor() @@ -467,7 +743,7 @@ def chk_pld_type( roundtrip: bool|None = None pld_spec_msg_names: list[str] = [ - td.__name__ for td in types._payload_spec_msgs + td.__name__ for td in _payload_msgs ] for typedef in msg_types: -- 2.34.1 From fc6419251b059a97b304dde9423700840391e69c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 13:59:43 -0400 Subject: [PATCH 023/305] Add buncha notes on `Start` field for "params" Such that the current `kwargs: dict` field can eventually be strictly msg-typed (eventually directly from a `@context` def) using modern typed python's hippest syntactical approach B) Also proto a new `CancelAck(Return)` subtype msg for supporting msg-spec agnostic `Actor.cancel_xx()` method calls in the runtime such that a user can't break cancellation (and thus SC) by dynamically setting a codec that doesn't allow `bool` results (as an eg. in this case). Note that the msg isn't used yet in `._rpc` but that's a comin! --- tractor/msg/types.py | 124 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 113 insertions(+), 11 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index b246cb61..3e7a2d7a 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -45,6 +45,10 @@ from msgspec import ( from tractor.msg import ( pretty_struct, ) +from tractor.log import get_logger + + +log = get_logger('tractor.msgspec') # type variable for the boxed payload field `.pld` PayloadT = TypeVar('PayloadT') @@ -185,7 +189,47 @@ class SpawnSpec( # | Union[DebugLock, DebugLocked, DebugRelease] # ) +# class Params( +# Struct, +# Generic[PayloadT], +# ): +# spec: PayloadT|ParamSpec +# inputs: InputsT|dict[str, Any] + # TODO: for eg. we could stringently check the target + # task-func's type sig and enforce it? + # as an example for an IPTC, + # @tractor.context + # async def send_back_nsp( + # ctx: Context, + # expect_debug: bool, + # pld_spec_str: str, + # add_hooks: bool, + # started_msg_dict: dict, + # ) -> : + + # TODO: figure out which of the `typing` feats we want to + # support: + # - plain ol `ParamSpec`: + # https://docs.python.org/3/library/typing.html#typing.ParamSpec + # - new in 3.12 type parameter lists Bo + # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params + # |_ historical pep 695: https://peps.python.org/pep-0695/ + # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ + # |_ on annotation scopes: + # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes + # spec: ParamSpec[ + # expect_debug: bool, + # pld_spec_str: str, + # add_hooks: bool, + # started_msg_dict: dict, + # ] + + +# TODO: possibly sub-type for runtime method requests? +# -[ ] `Runtime(Start)` with a `.ns: str = 'self' or +# we can just enforce any such method as having a strict +# ns for calling funcs, namely the `Actor` instance? class Start( Struct, tag=True, @@ -212,9 +256,45 @@ class Start( ns: str func: str - kwargs: dict + # TODO: make this a sub-struct which can be further + # type-limited, maybe `Inputs`? + # => SEE ABOVE <= + kwargs: dict[str, Any] uid: tuple[str, str] # (calling) actor-id + # TODO: enforcing a msg-spec in terms `Msg.pld` + # parameterizable msgs to be used in the appls IPC dialog. + # + # -[ ] both as part of the `.open_context()` call AND as part of the + # immediate ack-reponse (see similar below) + # we should do spec matching and fail if anything is awry? + # + # -[ ] eventually spec should be generated/parsed from the + # type-annots as # desired in GH issue: + # https://github.com/goodboy/tractor/issues/365 + # + # -[ ] semantics of the mismatch case + # - when caller-callee specs we should raise + # a `MsgTypeError` or `MsgSpecError` or similar? + # + # -[ ] wrapper types for both spec types such that we can easily + # IPC transport them? + # - `TypeSpec: Union[Type]` + # * also a `.__contains__()` for doing `None in + # TypeSpec[None|int]` since rn you need to do it on + # `.__args__` for unions.. + # - `MsgSpec: Union[Type[Msg]] + # + # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo + # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params + # |_ historical pep 695: https://peps.python.org/pep-0695/ + # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ + # |_ on annotation scopes: + # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes + # |_ 3.13 will have subscriptable funcs Bo + # https://peps.python.org/pep-0718/ + pld_spec: str = str(Any) + class StartAck( Struct, @@ -235,14 +315,10 @@ class StartAck( 'context', # TODO: the only one eventually? ] - # TODO: as part of the reponse we should report our allowed - # msg spec which should be generated from the type-annots as - # desired in # https://github.com/goodboy/tractor/issues/365 - # When this does not match what the starter/caller side - # expects we of course raise a `TypeError` just like if - # a function had been called using an invalid signature. - # - # msgspec: MsgSpec + # import typing + # eval(str(Any), {}, {'typing': typing}) + # started_spec: str = str(Any) + # return_spec class Started( @@ -290,6 +366,7 @@ class Stop( # pld: UnsetType = UNSET +# TODO: is `Result` or `Out[come]` a better name? class Return( Msg, Generic[PayloadT], @@ -302,6 +379,27 @@ class Return( pld: PayloadT +class CancelAck( + Return, +): + ''' + Deliver the `bool` return-value from a cancellation `Actor` + method scheduled via and prior RPC request. + + - `Actor.cancel()` + `|_.cancel_soon()` + `|_.cancel_rpc_tasks()` + `|_._cancel_task()` + `|_.cancel_server()` + + RPCs to these methods must **always** be able to deliver a result + despite the currently configured IPC msg spec such that graceful + cancellation is always functional in the runtime. + + ''' + pld: bool + + class Error( Struct, tag=True, @@ -530,9 +628,13 @@ def mk_msg_spec( pld_spec: Union[Type] = specs[spec_build_method] runtime_spec: Union[Type] = Union[*ipc_msg_types] - + ipc_spec = pld_spec | runtime_spec + log.runtime( + 'Generating new IPC msg-spec\n' + f'{ipc_spec}\n' + ) return ( - pld_spec | runtime_spec, + ipc_spec, msgtypes_table[spec_build_method] + ipc_msg_types, ) -- 2.34.1 From 1544849bbf870d35bca3e24e529f9a91ac8a7e83 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 14:04:32 -0400 Subject: [PATCH 024/305] Factor boxed-err formatting into new `pformat_boxed_tb()` helper for use elsewhere --- tractor/_exceptions.py | 78 +++++++++++++++++++++++++++++++----------- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 9c1dc36d..28c61628 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -104,6 +104,57 @@ def get_err_type(type_name: str) -> BaseException|None: return type_ref +def pformat_boxed_tb( + tb_str: str, + fields_str: str|None = None, + field_prefix: str = ' |_', + indent: str = ' '*2 +) -> str: + if ( + fields_str + and + field_prefix + ): + fields: str = textwrap.indent( + fields_str, + # prefix=' '*2, + # prefix=' |_', + prefix=field_prefix, + ) + else: + fields = fields_str or '' + + # body_indent: str = len(field_prefix) * ' ' + body: str = ( + + # orig + # f' |\n' + # f' ------ - ------\n\n' + # f'{tb_str}\n' + # f' ------ - ------\n' + # f' _|\n' + + f'|\n' + f' ------ - ------\n\n' + f'{tb_str}\n' + f' ------ - ------\n' + f'_|\n' + ) + if len(indent): + body: str = textwrap.indent( + body, + # prefix=body_indent, + prefix=indent, + ) + + return ( + fields + + + body + ) + # return body + + # TODO: rename to just `RemoteError`? class RemoteActorError(Exception): ''' @@ -117,7 +168,7 @@ class RemoteActorError(Exception): ''' reprol_fields: list[str] = [ 'src_uid', - 'relay_path', + # 'relay_path', ] def __init__( @@ -249,7 +300,7 @@ class RemoteActorError(Exception): @property def tb_str( self, - indent: str = ' '*3, + indent: str = ' ', ) -> str: if remote_tb := self.msgdata.get('tb_str'): return textwrap.indent( @@ -309,25 +360,12 @@ class RemoteActorError(Exception): fields: str = self._mk_fields_str( _body_fields, ) - fields: str = textwrap.indent( - fields, - # prefix=' '*2, - prefix=' |_', + body: str = pformat_boxed_tb( + tb_str=self.tb_str, + fields_str=fields, + field_prefix=' |_', + indent=' ', # no indent? ) - indent: str = ''*1 - body: str = ( - f'{fields}' - f' |\n' - f' ------ - ------\n\n' - f'{self.tb_str}\n' - f' ------ - ------\n' - f' _|\n' - ) - if indent: - body: str = textwrap.indent( - body, - prefix=indent, - ) return ( f'<{type(self).__name__}(\n' f'{body}' -- 2.34.1 From abc9e68f33c8e70cbcd4f77e10a6705e705e29cc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 16:00:32 -0400 Subject: [PATCH 025/305] Make `Context.started()` a type checked IPC send As detailed in the surrounding notes, it's pretty advantageous to always have the child context task ensure the first msg it relays back is msg-type checked against the current spec and thus `MsgCodec`. Implement the check via a simple codec-roundtrip of the `Started` msg such that the `.pld` payload is always validated before transit. This ensures the child will fail early and notify the parent before any streaming takes place (i.e. the "nasty" dialog protocol phase). The main motivation here is to avoid inter-actor task syncing bugs that are hard(er) to recover from and/or such as if an invalid typed msg is sent to the parent, who then ignores it (depending on config), and then the child thinks the parent is in some presumed state while the parent is still thinking a first msg has yet to arrive. Doing the stringent check on the sender side (i.e. the child is sending the "first" application msg via `.started()`) avoids/sidesteps dealing with such syncing/coordinated-state problems by keeping the entire IPC dialog in a "cheap" or "control" style transaction up until a stream is opened. Iow, the parent task's `.open_context()` block entry can't occur until the child side is definitely (as much as is possible with IPC msg type checking) in a correct state spec wise. During any streaming phase in the dialog the msg-type-checking is NOT done for performance (the "nasty" protocol phase) and instead any type errors are relayed back from the receiving side. I'm still unsure whether to take the same approach on the `Return` msg, since at that point erroring early doesn't benefit the parent task if/when a msg-type error occurs? Definitely more to ponder and tinker out here.. Impl notes: - a gotcha with the roundtrip-codec-ed msg is that it often won't match the input `value` bc in the `msgpack` case many native python sequence/collection types will map to a common array type due to the surjection that `msgpack`'s type-sys imposes. - so we can't assert that `started == rt_started` but it may be useful to at least report the diff of the type-reduced payload so that the caller can at least be notified how the input `value` might be better type-casted prior to call, for ex. pre-casting to `list`s. - added a `._strict_started: bool` that could provide the stringent checking if desired in the future. - on any validation error raise our `MsgTypeError` from it. - ALSO change over the lingering `.send_yield()` deprecated meth body to use a `Yield()`. --- tractor/_context.py | 79 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 02dcac39..b4e207a4 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -32,6 +32,7 @@ from dataclasses import ( ) from functools import partial import inspect +import msgspec from pprint import pformat from typing import ( Any, @@ -60,6 +61,9 @@ from .msg import ( Started, Stop, Yield, + current_codec, + MsgCodec, + pretty_struct, ) from ._ipc import Channel from ._streaming import MsgStream @@ -505,6 +509,8 @@ class Context: _in_overrun: bool = False _allow_overruns: bool = False + # TODO: figure out how we can enforce this without losing our minds.. + _strict_started: bool = False def __str__(self) -> str: ds: str = '=' @@ -727,7 +733,13 @@ class Context: DeprecationWarning, stacklevel=2, ) - await self.chan.send({'yield': data, 'cid': self.cid}) + # await self.chan.send({'yield': data, 'cid': self.cid}) + await self.chan.send( + Yield( + cid=self.cid, + pld=data, + ) + ) async def send_stop(self) -> None: # await pause() @@ -1643,18 +1655,61 @@ class Context: f'called `.started()` twice on context with {self.chan.uid}' ) - # await self.chan.send( - # { - # 'started': value, - # 'cid': self.cid, - # } - # ) - await self.chan.send( - Started( - cid=self.cid, - pld=value, - ) + started = Started( + cid=self.cid, + pld=value, ) + # XXX MEGA NOTE XXX: ONLY on the first msg sent with + # `Context.started()` do we STRINGENTLY roundtrip-check + # the first payload such that the child side can't send an + # incorrect value according to the currently applied + # msg-spec! + # + # HOWEVER, once a stream is opened via + # `Context.open_stream()` then this check is NEVER done on + # `MsgStream.send()` and instead both the parent and child + # sides are expected to relay back msg-type errors when + # decode failures exhibit on `MsgStream.receive()` calls thus + # enabling a so-called (by the holy 0mq lords) + # "cheap-or-nasty pattern" un-protocol design Bo + # + # https://zguide.zeromq.org/docs/chapter7/#The-Cheap-or-Nasty-Pattern + # + codec: MsgCodec = current_codec() + msg_bytes: bytes = codec.encode(started) + try: + # be a "cheap" dialog (see above!) + rt_started = codec.decode(msg_bytes) + if rt_started != started: + + # TODO: break these methods out from the struct subtype? + diff = pretty_struct.Struct.__sub__(rt_started, started) + + complaint: str = ( + 'Started value does not match after codec rountrip?\n\n' + f'{diff}' + ) + # TODO: rn this will pretty much always fail with + # any other sequence type embeded in the + # payload... + if self._strict_started: + raise ValueError(complaint) + else: + log.warning(complaint) + + await self.chan.send(rt_started) + + # raise any msg type error NO MATTER WHAT! + except msgspec.ValidationError as verr: + from tractor._ipc import _raise_msg_type_err + _raise_msg_type_err( + msg=msg_bytes, + codec=codec, + validation_err=verr, + verb_header='Trying to send payload' + # > 'invalid `Started IPC msgs\n' + ) + self._started_called = True async def _drain_overflows( -- 2.34.1 From dbb5e7dc789624c3dbd6e0067bf419b3f8ac68c9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 16:32:15 -0400 Subject: [PATCH 026/305] Expose `MsgTypeError` from pkg --- tractor/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tractor/__init__.py b/tractor/__init__.py index 31f59598..bd9b8610 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -45,9 +45,10 @@ from ._state import ( is_root_process as is_root_process, ) from ._exceptions import ( - RemoteActorError as RemoteActorError, - ModuleNotExposed as ModuleNotExposed, ContextCancelled as ContextCancelled, + ModuleNotExposed as ModuleNotExposed, + MsgTypeError as MsgTypeError, + RemoteActorError as RemoteActorError, ) from .devx import ( breakpoint as breakpoint, -- 2.34.1 From 4e769e45e4d33ddcf339941a2363d48d554503ea Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 16:34:07 -0400 Subject: [PATCH 027/305] Factor `MsgpackTCPStream` msg-type checks Add both the `.send()` and `.recv()` handling blocks to a common `_raise_msg_type_err()` which includes detailed error msg formatting: - the `.recv()` side case does introspection of the `Msg` fields and attempting to report the exact (field type related) issue - `.send()` side does some boxed-error style tb formatting like `RemoteActorError`. - add a `strict_types: bool` to `.send()` to allow for just warning on bad inputs versus raising, but always raise from any `Encoder` type error. --- tractor/_ipc.py | 174 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 123 insertions(+), 51 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 6168c77c..9af28e5a 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -54,7 +54,8 @@ from tractor.msg import ( _ctxvar_MsgCodec, _codec, MsgCodec, - types, + types as msgtypes, + pretty_struct, ) log = get_logger(__name__) @@ -72,6 +73,7 @@ def get_stream_addrs(stream: trio.SocketStream) -> tuple: ) +# TODO: this should be our `Union[*msgtypes.__spec__]` now right? MsgType = TypeVar("MsgType") # TODO: consider using a generic def and indexing with our eventual @@ -116,6 +118,73 @@ class MsgTransport(Protocol[MsgType]): ... +def _raise_msg_type_err( + msg: Any|bytes, + codec: MsgCodec, + validation_err: msgspec.ValidationError|None = None, + verb_header: str = '', + +) -> None: + + # if side == 'send': + if validation_err is None: # send-side + + import traceback + from tractor._exceptions import pformat_boxed_tb + + fmt_spec: str = '\n'.join( + map(str, codec.msg_spec.__args__) + ) + fmt_stack: str = ( + '\n'.join(traceback.format_stack(limit=3)) + ) + tb_fmt: str = pformat_boxed_tb( + tb_str=fmt_stack, + # fields_str=header, + field_prefix=' ', + indent='', + ) + raise MsgTypeError( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + # f' ------ - ------\n' + f'{fmt_spec}\n' + ) + + else: + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgspec.msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + errmsg: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' + ) + if verb_header: + errmsg = f'{verb_header} ' + errmsg + + # XXX see if we can determine the exact invalid field + # such that we can comprehensively report the + # specific field's type problem + msgspec_msg: str = validation_err.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + if field_val := msg_dict.get(maybe_field): + field_type: Union[Type] = msg_type.__signature__.parameters[ + maybe_field + ].annotation + errmsg += ( + f'{msg.rstrip("`")}\n\n' + f'{msg_type}\n' + f' |_.{maybe_field}: {field_type} = {field_val!r}\n' + ) + + raise MsgTypeError(errmsg) from validation_err + + # TODO: not sure why we have to inherit here, but it seems to be an # issue with ``get_msg_transport()`` returning a ``Type[Protocol]``; # probably should make a `mypy` issue? @@ -175,9 +244,10 @@ class MsgpackTCPStream(MsgTransport): or _codec._ctxvar_MsgCodec.get() ) - log.critical( - '!?!: USING STD `tractor` CODEC !?!?\n' - f'{self._codec}\n' + # TODO: mask out before release? + log.runtime( + f'New {self} created with codec\n' + f'codec: {self._codec}\n' ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: @@ -221,16 +291,18 @@ class MsgpackTCPStream(MsgTransport): # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. codec: MsgCodec = _ctxvar_MsgCodec.get() + + # TODO: mask out before release? if self._codec.pld_spec != codec.pld_spec: # assert ( # task := trio.lowlevel.current_task() # ) is not self._task # self._task = task self._codec = codec - log.critical( - '.recv() USING NEW CODEC !?!?\n' - f'{self._codec}\n\n' - f'msg_bytes -> {msg_bytes}\n' + log.runtime( + 'Using new codec in {self}.recv()\n' + f'codec: {self._codec}\n\n' + f'msg_bytes: {msg_bytes}\n' ) yield codec.decode(msg_bytes) @@ -252,36 +324,13 @@ class MsgpackTCPStream(MsgTransport): # and always raise such that spec violations # are never allowed to be caught silently! except msgspec.ValidationError as verr: - - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. - msg_dict: dict = msgspec.msgpack.decode(msg_bytes) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(types, msg_type_name) - errmsg: str = ( - f'Received invalid IPC `{msg_type_name}` msg\n\n' + # re-raise as type error + _raise_msg_type_err( + msg=msg_bytes, + codec=codec, + validation_err=verr, ) - # XXX see if we can determine the exact invalid field - # such that we can comprehensively report the - # specific field's type problem - msgspec_msg: str = verr.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - if field_val := msg_dict.get(maybe_field): - field_type: Union[Type] = msg_type.__signature__.parameters[ - maybe_field - ].annotation - errmsg += ( - f'{msg.rstrip("`")}\n\n' - f'{msg_type}\n' - f' |_.{maybe_field}: {field_type} = {field_val}\n' - ) - - raise MsgTypeError(errmsg) from verr - except ( msgspec.DecodeError, UnicodeDecodeError, @@ -307,12 +356,16 @@ class MsgpackTCPStream(MsgTransport): async def send( self, - msg: Any, + msg: msgtypes.Msg, + strict_types: bool = True, # hide_tb: bool = False, ) -> None: ''' - Send a msgpack coded blob-as-msg over TCP. + Send a msgpack encoded py-object-blob-as-msg over TCP. + + If `strict_types == True` then a `MsgTypeError` will be raised on any + invalid msg type ''' # __tracebackhide__: bool = hide_tb @@ -321,25 +374,40 @@ class MsgpackTCPStream(MsgTransport): # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. codec: MsgCodec = _ctxvar_MsgCodec.get() - # if self._codec != codec: + + # TODO: mask out before release? if self._codec.pld_spec != codec.pld_spec: self._codec = codec - log.critical( - '.send() using NEW CODEC !?!?\n' - f'{self._codec}\n\n' - f'OBJ -> {msg}\n' + log.runtime( + 'Using new codec in {self}.send()\n' + f'codec: {self._codec}\n\n' + f'msg: {msg}\n' ) - if type(msg) not in types.__spec__: - log.warning( - 'Sending non-`Msg`-spec msg?\n\n' - f'{msg}\n' - ) - bytes_data: bytes = codec.encode(msg) + + if type(msg) not in msgtypes.__msg_types__: + if strict_types: + _raise_msg_type_err( + msg, + codec=codec, + ) + else: + log.warning( + 'Sending non-`Msg`-spec msg?\n\n' + f'{msg}\n' + ) + + try: + bytes_data: bytes = codec.encode(msg) + except TypeError as typerr: + raise MsgTypeError( + 'A msg field violates the current spec\n' + f'{codec.pld_spec}\n\n' + f'{pretty_struct.Struct.pformat(msg)}' + ) from typerr # supposedly the fastest says, # https://stackoverflow.com/a/54027962 size: bytes = struct.pack(" seems like that might be re-inventing scalability + # prots tho no? # try: # return await self._transport.recv() # except trio.BrokenResourceError: -- 2.34.1 From 78b08e2a915541cbbf56fcb2f215ed10fecf3719 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Apr 2024 19:07:12 -0400 Subject: [PATCH 028/305] Drop `None`-sentinel cancels RPC loop mechanism Pretty sure we haven't *needed it* for a while, it was always generally hazardous in terms of IPC msg types, AND it's definitely incompatible with a dynamically applied typed msg spec: you can't just expect a `None` to be willy nilly handled all the time XD For now I'm masking out all the code and leaving very detailed surrounding notes but am not removing it quite yet in case for strange reason it is needed by some edge case (though I haven't found according to the test suite). Backstory: ------ - ------ Originally (i'm pretty sure anyway) it was added as a super naive "remote cancellation" mechanism (back before there were specific `Actor` methods for such things) that was mostly (only?) used before IPC `Channel` closures to "more gracefully cancel" the connection's parented RPC tasks. Since we now have explicit runtime-RPC endpoints for conducting remote cancellation of both tasks and full actors, it should really be removed anyway, because: - a `None`-msg setinel is inconsistent with other RPC endpoint handling input patterns which (even prior to typed msging) had specific msg-value triggers. - the IPC endpoint's (block) implementation should use `Actor.cancel_rpc_tasks(parent_chan=chan)` instead of a manual loop through a `Actor._rpc_tasks.copy()`.. Deats: - mask the `Channel.send(None)` calls from both the `Actor._stream_handler()` tail as well as from the `._portal.open_portal()` was connected block. - mask the msg loop endpoint block and toss in lotsa notes. Unrelated tweaks: - drop `Actor._debug_mode`; unused. - make `Actor.cancel_server()` return a `bool`. - use `.msg.pretty_struct.Struct.pformat()` to show any msg that is ignored (bc invalid) in `._push_result()`. --- tractor/_portal.py | 6 ++-- tractor/_rpc.py | 70 +++++++++++++++++++++++------------- tractor/_runtime.py | 88 ++++++++++++++++++++++++++------------------- 3 files changed, 100 insertions(+), 64 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index cc9052ba..957eae59 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -502,7 +502,7 @@ async def open_portal( ''' actor = current_actor() assert actor - was_connected = False + was_connected: bool = False async with maybe_open_nursery(nursery, shield=shield) as nursery: @@ -533,9 +533,7 @@ async def open_portal( await portal.aclose() if was_connected: - # gracefully signal remote channel-msg loop - await channel.send(None) - # await channel.aclose() + await channel.aclose() # cancel background msg loop task if msg_loop_cs: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 9b179524..a765d666 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -424,8 +424,6 @@ async def _invoke( # XXX for .pause_from_sync()` usage we need to make sure # `greenback` is boostrapped in the subactor! await _debug.maybe_init_greenback() - # else: - # await pause() # TODO: possibly a specially formatted traceback # (not sure what typing is for this..)? @@ -850,30 +848,54 @@ async def process_messages( match msg: + # NOTE: this *was a dedicated + # "graceful-terminate-loop" mechanism using + # a `None`-msg-sentinel which would cancel all RPC + # tasks parented by this loop's IPC channel; that + # is all rpc-scheduled-tasks started over the + # connection were explicitly per-task cancelled + # normally prior to the `Channel`'s underlying + # transport being later closed. + # + # * all `.send(None)`s were # removed as part of + # typed-msging requirements + # + # TODO: if this mechanism is still desired going + # forward it should be implemented as part of the + # normal runtime-cancel-RPC endpoints with either, + # - a special `msg.types.Msg` to trigger the loop endpoint + # (like `None` was used prior) or, + # - it should just be accomplished using A + # `Start(ns='self', func='cancel_rpc_tasks())` + # request instead? + # # if msg is None: - # dedicated loop terminate sentinel - case None: + # case None: + # tasks: dict[ + # tuple[Channel, str], + # tuple[Context, Callable, trio.Event] + # ] = actor._rpc_tasks.copy() + # log.cancel( + # f'Peer IPC channel terminated via `None` setinel msg?\n' + # f'=> Cancelling all {len(tasks)} local RPC tasks..\n' + # f'peer: {chan.uid}\n' + # f'|_{chan}\n' + # ) + # # TODO: why aren't we just calling + # # `.cancel_rpc_tasks()` with the parent + # # chan as input instead? + # for (channel, cid) in tasks: + # if channel is chan: + # await actor._cancel_task( + # cid, + # channel, + # requesting_uid=channel.uid, - tasks: dict[ - tuple[Channel, str], - tuple[Context, Callable, trio.Event] - ] = actor._rpc_tasks.copy() - log.cancel( - f'Peer IPC channel terminated via `None` setinel msg?\n' - f'=> Cancelling all {len(tasks)} local RPC tasks..\n' - f'peer: {chan.uid}\n' - f'|_{chan}\n' - ) - for (channel, cid) in tasks: - if channel is chan: - await actor._cancel_task( - cid, - channel, - requesting_uid=channel.uid, + # ipc_msg=msg, + # ) - ipc_msg=msg, - ) - break + # # immediately break out of this loop! + # break # cid = msg.get('cid') # if cid: @@ -911,7 +933,7 @@ async def process_messages( cid=cid, ns=ns, func=funcname, - kwargs=kwargs, + kwargs=kwargs, # type-spec this? see `msg.types` uid=actorid, ): # try: diff --git a/tractor/_runtime.py b/tractor/_runtime.py index eee78973..66a1db62 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -65,7 +65,11 @@ from trio import ( TaskStatus, ) -from .msg import NamespacePath +from tractor.msg import ( + pretty_struct, + NamespacePath, + types as msgtypes, +) from ._ipc import Channel from ._context import ( mk_context, @@ -87,10 +91,6 @@ from ._rpc import ( process_messages, try_ship_error_to_remote, ) -from tractor.msg import ( - types as msgtypes, - pretty_struct, -) # from tractor.msg.types import ( # Aid, # SpawnSpec, @@ -160,18 +160,15 @@ class Actor: # Information about `__main__` from parent _parent_main_data: dict[str, str] _parent_chan_cs: CancelScope|None = None - _spawn_spec: SpawnSpec|None = None + _spawn_spec: msgtypes.SpawnSpec|None = None # syncs for setup/teardown sequences _server_down: trio.Event|None = None - # user toggled crash handling (including monkey-patched in - # `trio.open_nursery()` via `.trionics._supervisor` B) - _debug_mode: bool = False - # if started on ``asycio`` running ``trio`` in guest mode _infected_aio: bool = False + # TODO: nursery tracking like `trio` does? # _ans: dict[ # tuple[str, str], # list[ActorNursery], @@ -718,35 +715,50 @@ class Actor: # TODO: figure out why this breaks tests.. db_cs.cancel() - # XXX: is this necessary (GC should do it)? + # XXX TODO XXX: DO WE NEED THIS? + # -[ ] is it necessary any more (GC should do it) now + # that we have strict(er) graceful cancellation + # semantics? # XXX WARNING XXX # Be AWARE OF THE INDENT LEVEL HERE # -> ONLY ENTER THIS BLOCK WHEN ._peers IS # EMPTY!!!! - if ( - not self._peers - and chan.connected() - ): - # if the channel is still connected it may mean the far - # end has not closed and we may have gotten here due to - # an error and so we should at least try to terminate - # the channel from this end gracefully. - log.runtime( - 'Terminating channel with `None` setinel msg\n' - f'|_{chan}\n' - ) - try: - # send msg loop terminate sentinel which - # triggers cancellation of all remotely - # started tasks. - await chan.send(None) + # + # if the channel is still connected it may mean the far + # end has not closed and we may have gotten here due to + # an error and so we should at least try to terminate + # the channel from this end gracefully. + #if ( + # not self._peers + # and chan.connected() + #): + # log.runtime( + # 'Terminating channel with `None` setinel msg\n' + # f'|_{chan}\n' + # ) + # try: + # # ORIGINALLY we sent a msg loop terminate + # # sentinel (`None`) which triggers + # # cancellation of all remotely started + # # tasks. + # # + # # HOWEVER, after we added typed msging, + # # you can't just willy nilly send `None` + # # wherever since it might be invalid given + # # the currently configured msg-spec. + # # + # # SO, this was all removed and I'm pretty + # # confident we don't need it replaced with + # # a manual RPC to + # # a `Actor.cancel_rpc_tasks()` right? + # await chan.send(None) - # XXX: do we want this? no right? - # causes "[104] connection reset by peer" on other end - # await chan.aclose() + # # XXX: do we want this? NO RIGHT? + # # causes "[104] connection reset by peer" on other end + # # await chan.aclose() - except trio.BrokenResourceError: - log.runtime(f"Channel {chan.uid} was already closed") + # except trio.BrokenResourceError: + # log.runtime(f"Channel {chan.uid} was already closed") # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD @@ -776,9 +788,10 @@ class Actor: log.warning( 'Ignoring invalid IPC ctx msg!\n\n' f'<= sender: {uid}\n' - f'=> cid: {cid}\n\n' + # XXX don't need right since it's always in msg? + # f'=> cid: {cid}\n\n' - f'{msg}\n' + f'{pretty_struct.Struct.pformat(msg)}\n' ) return @@ -1439,7 +1452,7 @@ class Actor: ) await self._ongoing_rpc_tasks.wait() - def cancel_server(self) -> None: + def cancel_server(self) -> bool: ''' Cancel the internal IPC transport server nursery thereby preventing any new inbound IPC connections establishing. @@ -1448,6 +1461,9 @@ class Actor: if self._server_n: log.runtime("Shutting down channel server") self._server_n.cancel_scope.cancel() + return True + + return False @property def accept_addrs(self) -> list[tuple[str, int]]: -- 2.34.1 From 344d8ebc0c803ff2e9e25f33266e6d8282236530 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 10:40:01 -0400 Subject: [PATCH 029/305] Flatten out RPC loop with `match:`/`case:` Mainly expanding out the runtime endpoints for cancellation to separate cases and flattening them with the main RPC-request-invoke block, moving the non-cancel runtime case (where we call `getattr(actor, funcname)`) inside the main `Start` case (for now) which branches on `ns=="self"`. Also, add a new IPC msg `class CancelAck(Return):` which is always included in the default msg-spec such that runtime cancellation (and eventually all) endpoints return that msg (instead of a `Return`) and thus sidestep any currently applied `MsgCodec` such that the results (`bool`s for most cancel methods) are never violating the current type limit(s) on `Msg.pld`. To support this expose a new variable `return_msg: Return|CancelAck` param from `_invoke()`/`_invoke_non_context)()` and set it to `CancelAck` in the appropriate endpoint case-blocks of the msg loop. Clean out all the lingering legacy `chan.send()` commented codez from the invoker funcs, with more cleaning likely to come B) --- tractor/_rpc.py | 308 ++++++++++++++++--------------------------- tractor/msg/types.py | 4 + 2 files changed, 119 insertions(+), 193 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index a765d666..b7638335 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -58,13 +58,15 @@ from .devx import _debug from . import _state from .log import get_logger from tractor.msg.types import ( + CancelAck, + Error, + Msg, + Return, Start, StartAck, Started, Stop, Yield, - Return, - Error, ) if TYPE_CHECKING: @@ -85,6 +87,7 @@ async def _invoke_non_context( treat_as_gen: bool, is_rpc: bool, + return_msg: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -93,7 +96,6 @@ async def _invoke_non_context( # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): - # await chan.send({ await chan.send( StartAck( cid=cid, @@ -119,11 +121,6 @@ async def _invoke_non_context( # to_send = await chan.recv_nowait() # if to_send is not None: # to_yield = await coro.asend(to_send) - # await chan.send({ - # # Yield() - # 'cid': cid, - # 'yield': item, - # }) await chan.send( Yield( cid=cid, @@ -138,11 +135,6 @@ async def _invoke_non_context( await chan.send( Stop(cid=cid) ) - # await chan.send({ - # # Stop( - # 'cid': cid, - # 'stop': True, - # }) # one way @stream func that gets treated like an async gen # TODO: can we unify this with the `context=True` impl below? @@ -153,11 +145,6 @@ async def _invoke_non_context( functype='asyncgen', ) ) - # await chan.send({ - # # StartAck() - # 'cid': cid, - # 'functype': 'asyncgen', - # }) # XXX: the async-func may spawn further tasks which push # back values like an async-generator would but must # manualy construct the response dict-packet-responses as @@ -173,11 +160,6 @@ async def _invoke_non_context( await chan.send( Stop(cid=cid) ) - # await chan.send({ - # # Stop( - # 'cid': cid, - # 'stop': True, - # }) else: # regular async function/method # XXX: possibly just a scheduled `Actor._cancel_task()` @@ -195,11 +177,6 @@ async def _invoke_non_context( functype='asyncfunc', ) ) - # await chan.send({ - # # StartAck() - # 'cid': cid, - # 'functype': 'asyncfunc', - # }) except ( trio.ClosedResourceError, trio.BrokenResourceError, @@ -233,13 +210,8 @@ async def _invoke_non_context( and chan.connected() ): try: - # await chan.send({ - # # Return() - # 'cid': cid, - # 'return': result, - # }) await chan.send( - Return( + return_msg( cid=cid, pld=result, ) @@ -404,6 +376,7 @@ async def _invoke( is_rpc: bool = True, hide_tb: bool = True, + return_msg: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -513,6 +486,7 @@ async def _invoke( kwargs, treat_as_gen, is_rpc, + return_msg, task_status, ) # below is only for `@context` funcs @@ -543,11 +517,6 @@ async def _invoke( functype='context', ) ) - # await chan.send({ - # # StartAck() - # 'cid': cid, - # 'functype': 'context', - # }) # TODO: should we also use an `.open_context()` equiv # for this callee side by factoring the impl from @@ -572,16 +541,11 @@ async def _invoke( # deliver final result to caller side. await chan.send( - Return( + return_msg( cid=cid, pld=res, ) ) - # await chan.send({ - # # Return() - # 'cid': cid, - # 'return': res, - # }) # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, @@ -670,7 +634,6 @@ async def _invoke( ctxc = ContextCancelled( msg, boxed_type=trio.Cancelled, - # boxed_type_str='Cancelled', canceller=canceller, ) # assign local error so that the `.outcome` @@ -771,12 +734,12 @@ async def try_ship_error_to_remote( trio.BrokenResourceError, BrokenPipeError, ): - # err_msg: dict = msg['error']['tb_str'] log.critical( 'IPC transport failure -> ' f'failed to ship error to {remote_descr}!\n\n' f'X=> {channel.uid}\n\n' - # f'{err_msg}\n' + + # TODO: use `.msg.preetty_struct` for this! f'{msg}\n' ) @@ -818,6 +781,8 @@ async def process_messages( ''' + assert actor._service_n # state sanity + # TODO: once `trio` get's an "obvious way" for req/resp we # should use it? # https://github.com/python-trio/trio/issues/467 @@ -827,7 +792,7 @@ async def process_messages( f'|_{chan}\n' ) nursery_cancelled_before_task: bool = False - msg: dict | None = None + msg: Msg|None = None try: # NOTE: this internal scope allows for keeping this # message loop running despite the current task having @@ -836,6 +801,7 @@ async def process_messages( # using ``scope = Nursery.start()`` with CancelScope(shield=shield) as loop_cs: task_status.started(loop_cs) + async for msg in chan: log.transport( # type: ignore f'<= IPC msg from peer: {chan.uid}\n\n' @@ -890,21 +856,18 @@ async def process_messages( # cid, # channel, # requesting_uid=channel.uid, - # ipc_msg=msg, # ) - # # immediately break out of this loop! # break - # cid = msg.get('cid') - # if cid: case ( StartAck(cid=cid) | Started(cid=cid) | Yield(cid=cid) | Stop(cid=cid) | Return(cid=cid) + | CancelAck(cid=cid) | Error(cid=cid) ): # deliver response to local caller/waiter @@ -914,17 +877,85 @@ async def process_messages( cid, msg, ) + # TODO: can remove right? + # continue + + # runtime-internal cancellation endpoints + case Start( + ns='self', + func='cancel', + cid=cid, + kwargs=kwargs, + ): + kwargs |= {'req_chan': chan} + + # XXX NOTE XXX don't start entire actor + # runtime cancellation if this actor is + # currently in debug mode! + pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete + if pdb_complete: + await pdb_complete.wait() + + # Either of `Actor.cancel()`/`.cancel_soon()` + # was called, so terminate this IPC msg + # loop, exit back out into `async_main()`, + # and immediately start the core runtime + # machinery shutdown! + with CancelScope(shield=True): + await _invoke( + actor, + cid, + chan, + actor.cancel, + kwargs, + is_rpc=False, + return_msg=CancelAck, + ) log.runtime( - 'Waiting on next IPC msg from\n' - f'peer: {chan.uid}:\n' + 'Cancelling IPC transport msg-loop with peer:\n' f'|_{chan}\n' - - # f'last msg: {msg}\n' ) - continue + loop_cs.cancel() + break - # process a 'cmd' request-msg upack + case Start( + ns='self', + func='_cancel_task', + cid=cid, + kwargs=kwargs, + ): + target_cid: str = kwargs['cid'] + kwargs |= { + 'requesting_uid': chan.uid, + 'ipc_msg': msg, + + # XXX NOTE! ONLY the rpc-task-owning + # parent IPC channel should be able to + # cancel it! + 'parent_chan': chan, + } + try: + await _invoke( + actor, + cid, + chan, + actor._cancel_task, + kwargs, + is_rpc=False, + return_msg=CancelAck, + ) + except BaseException: + log.exception( + 'Failed to cancel task?\n' + f'<= canceller: {chan.uid}\n' + f' |_{chan}\n\n' + f'=> {actor}\n' + f' |_cid: {target_cid}\n' + ) + + # the "MAIN" RPC endpoint to schedule-a-`trio.Task` + # # TODO: impl with native `msgspec.Struct` support !! # -[ ] implement with ``match:`` syntax? # -[ ] discard un-authed msgs as per, @@ -936,139 +967,29 @@ async def process_messages( kwargs=kwargs, # type-spec this? see `msg.types` uid=actorid, ): - # try: - # ( - # ns, - # funcname, - # kwargs, - # actorid, - # cid, - # ) = msg['cmd'] - - # # TODO: put in `case Error():` right? - # except KeyError: - # # This is the non-rpc error case, that is, an - # # error **not** raised inside a call to ``_invoke()`` - # # (i.e. no cid was provided in the msg - see above). - # # Push this error to all local channel consumers - # # (normally portals) by marking the channel as errored - # assert chan.uid - # exc = unpack_error(msg, chan=chan) - # chan._exc = exc - # raise exc - log.runtime( 'Handling RPC `Start` request from\n' f'peer: {actorid}\n' '\n' f'=> {ns}.{funcname}({kwargs})\n' ) - # case Start( - # ns='self', - # funcname='cancel', - # ): + + # runtime-internal endpoint: `Actor.` + # only registry methods exist now yah, + # like ``.register_actor()`` etc. ? if ns == 'self': - if funcname == 'cancel': - func: Callable = actor.cancel - kwargs |= { - 'req_chan': chan, - } + func: Callable = getattr(actor, funcname) - # don't start entire actor runtime cancellation - # if this actor is currently in debug mode! - pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete - if pdb_complete: - await pdb_complete.wait() - - # Either of `Actor.cancel()`/`.cancel_soon()` - # was called, so terminate this IPC msg - # loop, exit back out into `async_main()`, - # and immediately start the core runtime - # machinery shutdown! - with CancelScope(shield=True): - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - - log.runtime( - 'Cancelling IPC transport msg-loop with peer:\n' - f'|_{chan}\n' - ) - loop_cs.cancel() - break - - # case Start( - # ns='self', - # funcname='_cancel_task', - # ): - if funcname == '_cancel_task': - func: Callable = actor._cancel_task - - # we immediately start the runtime machinery - # shutdown - # with CancelScope(shield=True): - target_cid: str = kwargs['cid'] - kwargs |= { - # NOTE: ONLY the rpc-task-owning - # parent IPC channel should be able to - # cancel it! - 'parent_chan': chan, - 'requesting_uid': chan.uid, - 'ipc_msg': msg, - } - # TODO: remove? already have emit in meth. - # log.runtime( - # f'Rx RPC task cancel request\n' - # f'<= canceller: {chan.uid}\n' - # f' |_{chan}\n\n' - # f'=> {actor}\n' - # f' |_cid: {target_cid}\n' - # ) - try: - await _invoke( - actor, - cid, - chan, - func, - kwargs, - is_rpc=False, - ) - except BaseException: - log.exception( - 'Failed to cancel task?\n' - f'<= canceller: {chan.uid}\n' - f' |_{chan}\n\n' - f'=> {actor}\n' - f' |_cid: {target_cid}\n' - ) - continue - - # case Start( - # ns='self', - # funcname='register_actor', - # ): - else: - # normally registry methods, eg. - # ``.register_actor()`` etc. - func: Callable = getattr(actor, funcname) - - # case Start( - # ns=str(), - # funcname=funcname, - # ): + # application RPC endpoint else: - # complain to client about restricted modules try: - func = actor._get_rpc_func(ns, funcname) + func: Callable = actor._get_rpc_func(ns, funcname) except ( ModuleNotExposed, AttributeError, ) as err: + # always complain to requester + # client about un-enabled modules err_msg: dict[str, dict] = pack_error( err, cid=cid, @@ -1078,6 +999,7 @@ async def process_messages( # schedule a task for the requested RPC function # in the actor's main "service nursery". + # # TODO: possibly a service-tn per IPC channel for # supervision isolation? would avoid having to # manage RPC tasks individually in `._rpc_tasks` @@ -1086,7 +1008,7 @@ async def process_messages( f'Spawning task for RPC request\n' f'<= caller: {chan.uid}\n' f' |_{chan}\n\n' - # TODO: maddr style repr? + # ^-TODO-^ maddr style repr? # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' # f'cid="{cid[-16:]} .."\n\n' @@ -1094,7 +1016,6 @@ async def process_messages( f' |_cid: {cid}\n' f' |>> {func}()\n' ) - assert actor._service_n # wait why? do it at top? try: ctx: Context = await actor._service_n.start( partial( @@ -1124,13 +1045,12 @@ async def process_messages( log.warning( 'Task for RPC failed?' f'|_ {func}()\n\n' - f'{err}' ) continue else: - # mark that we have ongoing rpc tasks + # mark our global state with ongoing rpc tasks actor._ongoing_rpc_tasks = trio.Event() # store cancel scope such that the rpc task can be @@ -1141,23 +1061,24 @@ async def process_messages( trio.Event(), ) - case Error()|_: - # This is the non-rpc error case, that is, an - # error **not** raised inside a call to ``_invoke()`` - # (i.e. no cid was provided in the msg - see above). - # Push this error to all local channel consumers - # (normally portals) by marking the channel as errored + case Error() | _: + # NOTE: this is the non-rpc error case, + # that is, an error **not** raised inside + # a call to ``_invoke()`` (i.e. no cid was + # provided in the msg - see above). Push + # this error to all local channel + # consumers (normally portals) by marking + # the channel as errored log.exception( f'Unhandled IPC msg:\n\n' f'{msg}\n' ) - assert chan.uid - exc = unpack_error( + # assert chan.uid + chan._exc: Exception = unpack_error( msg, chan=chan, ) - chan._exc = exc - raise exc + raise chan._exc log.runtime( 'Waiting on next IPC msg from\n' @@ -1168,7 +1089,8 @@ async def process_messages( # end of async for, channel disconnect vis # ``trio.EndOfChannel`` log.runtime( - f"{chan} for {chan.uid} disconnected, cancelling tasks" + f'channel for {chan.uid} disconnected, cancelling RPC tasks\n' + f'|_{chan}\n' ) await actor.cancel_rpc_tasks( req_uid=actor.uid, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 3e7a2d7a..7355a610 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -454,6 +454,10 @@ _runtime_msgs: list[Msg] = [ # emission from `MsgStream.aclose()` Stop, + # `Return` sub-type that we always accept from + # runtime-internal cancel endpoints + CancelAck, + # box remote errors, normally subtypes # of `RemoteActorError`. Error, -- 2.34.1 From 6c672a67e246a926d898c1495b82b67899eedd6f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 16:29:21 -0400 Subject: [PATCH 030/305] Use `object()` when checking for error field value Since the field value could be `None` or some other type with truthy-ness evaluating to `False`.. --- tractor/_ipc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 9af28e5a..694eaf9e 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -172,7 +172,8 @@ def _raise_msg_type_err( # specific field's type problem msgspec_msg: str = validation_err.args[0].rstrip('`') msg, _, maybe_field = msgspec_msg.rpartition('$.') - if field_val := msg_dict.get(maybe_field): + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: field_type: Union[Type] = msg_type.__signature__.parameters[ maybe_field ].annotation -- 2.34.1 From e9f1d8e8bebc412ccced803be8b2d55d13cae04f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 16:35:00 -0400 Subject: [PATCH 031/305] Detail out EoC-by-self log msg --- tractor/_streaming.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 941cfe8d..dc30ac6e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -46,7 +46,6 @@ from .trionics import ( from tractor.msg import ( Stop, Yield, - Error, ) if TYPE_CHECKING: @@ -391,11 +390,11 @@ class MsgStream(trio.abc.Channel): if not self._eoc: log.cancel( - 'Stream closed before it received an EoC?\n' + 'Stream closed by self before it received an EoC?\n' 'Setting eoc manually..\n..' ) self._eoc: bool = trio.EndOfChannel( - f'Context stream closed by {self._ctx.side}\n' + f'Context stream closed by self({self._ctx.side})\n' f'|_{self}\n' ) # ?XXX WAIT, why do we not close the local mem chan `._rx_chan` XXX? -- 2.34.1 From a13160d920ed8526020820944f4bab25db57060e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 7 Apr 2024 18:54:03 -0400 Subject: [PATCH 032/305] Finally drop masked `chan.send(None)` related code blocks --- tractor/_rpc.py | 89 ++++++++++++--------------------------------- tractor/_runtime.py | 48 +----------------------- 2 files changed, 25 insertions(+), 112 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b7638335..de76e3cf 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -806,61 +806,15 @@ async def process_messages( log.transport( # type: ignore f'<= IPC msg from peer: {chan.uid}\n\n' - # TODO: conditionally avoid fmting depending - # on log level (for perf)? - # => specifically `pformat()` sub-call..? + # TODO: avoid fmting depending on loglevel for perf? + # -[ ] specifically `pformat()` sub-call..? + # -[ ] use `.msg.pretty_struct` here now instead! f'{pformat(msg)}\n' ) match msg: - - # NOTE: this *was a dedicated - # "graceful-terminate-loop" mechanism using - # a `None`-msg-sentinel which would cancel all RPC - # tasks parented by this loop's IPC channel; that - # is all rpc-scheduled-tasks started over the - # connection were explicitly per-task cancelled - # normally prior to the `Channel`'s underlying - # transport being later closed. - # - # * all `.send(None)`s were # removed as part of - # typed-msging requirements - # - # TODO: if this mechanism is still desired going - # forward it should be implemented as part of the - # normal runtime-cancel-RPC endpoints with either, - # - a special `msg.types.Msg` to trigger the loop endpoint - # (like `None` was used prior) or, - # - it should just be accomplished using A - # `Start(ns='self', func='cancel_rpc_tasks())` - # request instead? - # - # if msg is None: - # case None: - # tasks: dict[ - # tuple[Channel, str], - # tuple[Context, Callable, trio.Event] - # ] = actor._rpc_tasks.copy() - # log.cancel( - # f'Peer IPC channel terminated via `None` setinel msg?\n' - # f'=> Cancelling all {len(tasks)} local RPC tasks..\n' - # f'peer: {chan.uid}\n' - # f'|_{chan}\n' - # ) - # # TODO: why aren't we just calling - # # `.cancel_rpc_tasks()` with the parent - # # chan as input instead? - # for (channel, cid) in tasks: - # if channel is chan: - # await actor._cancel_task( - # cid, - # channel, - # requesting_uid=channel.uid, - # ipc_msg=msg, - # ) - # # immediately break out of this loop! - # break - + # msg for an ongoing IPC ctx session, deliver msg to + # local task. case ( StartAck(cid=cid) | Started(cid=cid) @@ -868,7 +822,7 @@ async def process_messages( | Stop(cid=cid) | Return(cid=cid) | CancelAck(cid=cid) - | Error(cid=cid) + | Error(cid=cid) # RPC-task ctx specific ): # deliver response to local caller/waiter # via its per-remote-context memory channel. @@ -877,10 +831,8 @@ async def process_messages( cid, msg, ) - # TODO: can remove right? - # continue - # runtime-internal cancellation endpoints + # `Actor`(-internal) runtime cancel requests case Start( ns='self', func='cancel', @@ -955,11 +907,9 @@ async def process_messages( ) # the "MAIN" RPC endpoint to schedule-a-`trio.Task` - # - # TODO: impl with native `msgspec.Struct` support !! - # -[ ] implement with ``match:`` syntax? - # -[ ] discard un-authed msgs as per, - # + # ------ - ------ + # -[x] discard un-authed msgs as per, + # case Start( cid=cid, ns=ns, @@ -983,7 +933,10 @@ async def process_messages( # application RPC endpoint else: try: - func: Callable = actor._get_rpc_func(ns, funcname) + func: Callable = actor._get_rpc_func( + ns, + funcname, + ) except ( ModuleNotExposed, AttributeError, @@ -1061,6 +1014,8 @@ async def process_messages( trio.Event(), ) + # XXX remote (runtime scoped) error or uknown + # msg (type). case Error() | _: # NOTE: this is the non-rpc error case, # that is, an error **not** raised inside @@ -1086,8 +1041,9 @@ async def process_messages( f'|_{chan}\n' ) - # end of async for, channel disconnect vis - # ``trio.EndOfChannel`` + # END-OF `async for`: + # IPC disconnected via `trio.EndOfChannel`, likely + # due to a (graceful) `Channel.aclose()`. log.runtime( f'channel for {chan.uid} disconnected, cancelling RPC tasks\n' f'|_{chan}\n' @@ -1107,9 +1063,10 @@ async def process_messages( # connection-reset) is ok since we don't have a teardown # handshake for them (yet) and instead we simply bail out of # the message loop and expect the teardown sequence to clean - # up. - # TODO: don't show this msg if it's an emphemeral - # discovery ep call? + # up.. + # TODO: add a teardown handshake? and, + # -[ ] don't show this msg if it's an ephemeral discovery ep call? + # -[ ] figure out how this will break with other transports? log.runtime( f'channel closed abruptly with\n' f'peer: {chan.uid}\n' diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 66a1db62..0b00f747 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -389,8 +389,9 @@ class Actor: raise mne + # TODO: maybe change to mod-func and rename for implied + # multi-transport semantics? async def _stream_handler( - self, stream: trio.SocketStream, @@ -715,51 +716,6 @@ class Actor: # TODO: figure out why this breaks tests.. db_cs.cancel() - # XXX TODO XXX: DO WE NEED THIS? - # -[ ] is it necessary any more (GC should do it) now - # that we have strict(er) graceful cancellation - # semantics? - # XXX WARNING XXX - # Be AWARE OF THE INDENT LEVEL HERE - # -> ONLY ENTER THIS BLOCK WHEN ._peers IS - # EMPTY!!!! - # - # if the channel is still connected it may mean the far - # end has not closed and we may have gotten here due to - # an error and so we should at least try to terminate - # the channel from this end gracefully. - #if ( - # not self._peers - # and chan.connected() - #): - # log.runtime( - # 'Terminating channel with `None` setinel msg\n' - # f'|_{chan}\n' - # ) - # try: - # # ORIGINALLY we sent a msg loop terminate - # # sentinel (`None`) which triggers - # # cancellation of all remotely started - # # tasks. - # # - # # HOWEVER, after we added typed msging, - # # you can't just willy nilly send `None` - # # wherever since it might be invalid given - # # the currently configured msg-spec. - # # - # # SO, this was all removed and I'm pretty - # # confident we don't need it replaced with - # # a manual RPC to - # # a `Actor.cancel_rpc_tasks()` right? - # await chan.send(None) - - # # XXX: do we want this? NO RIGHT? - # # causes "[104] connection reset by peer" on other end - # # await chan.aclose() - - # except trio.BrokenResourceError: - # log.runtime(f"Channel {chan.uid} was already closed") - # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD async def _push_result( -- 2.34.1 From c9d29933381fc92ef85ece1e03a7b229eeaa83f4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 8 Apr 2024 10:13:14 -0400 Subject: [PATCH 033/305] Caps-msging test tweaks to get correct failures These are likely temporary changes but still needed to actually see the desired/correct failures (of which 5 of 6 tests are supposed to fail rn) mostly to do with `Start` and `Return` msgs which are invalid under each test's applied msg-spec. Tweak set here: - bit more `print()`s in root and sub for grokin test flow. - never use `pytes.fail()` in subactor.. should know this by now XD - comment out some bits that can't ever pass rn and make the underlying expected failues harder to grok: - the sub's child-side-of-ctx task doing sends should only fail for certain msg types like `Started` + `Return`, `Yield`s are processed receiver/parent side. - don't expect `sent` list to match predicate set for the same reason as last bullet. The outstanding msg-type-semantic validation questions are: - how to handle `.open_context()` with an input `kwargs` set that doesn't adhere to the currently applied msg-spec? - should the initial `@acm` entry fail before sending to the child side? - where should received `MsgTypeError`s be raised, at the `MsgStream` `.receive()` or lower in the stack? - i'm thinking we should mk `MsgTypeError` derive from `RemoteActorError` and then have it be delivered as an error to the `Context`/`MsgStream` for per-ctx-task handling; would lead to more flexible/modular policy overrides in user code outside any defaults we provide. --- tests/test_caps_based_msging.py | 85 ++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index acc1f307..f7cab2a5 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -374,7 +374,7 @@ def enc_type_union( @tractor.context -async def send_back_nsp( +async def send_back_values( ctx: Context, expect_debug: bool, pld_spec_type_strs: list[str], @@ -388,6 +388,8 @@ async def send_back_nsp( and ensure we can round trip a func ref with our parent. ''' + uid: tuple = tractor.current_actor().uid + # debug mode sanity check (prolly superfluous but, meh) assert expect_debug == _state.debug_mode() @@ -414,7 +416,7 @@ async def send_back_nsp( ) print( - 'CHILD attempting `Started`-bytes DECODE..\n' + f'{uid}: attempting `Started`-bytes DECODE..\n' ) try: msg: Started = nsp_codec.decode(started_msg_bytes) @@ -436,7 +438,7 @@ async def send_back_nsp( raise else: print( - 'CHILD (correctly) unable to DECODE `Started`-bytes\n' + f'{uid}: (correctly) unable to DECODE `Started`-bytes\n' f'{started_msg_bytes}\n' ) @@ -445,7 +447,7 @@ async def send_back_nsp( for send_value, expect_send in iter_send_val_items: try: print( - f'CHILD attempting to `.started({send_value})`\n' + f'{uid}: attempting to `.started({send_value})`\n' f'=> expect_send: {expect_send}\n' f'SINCE, ipc_pld_spec: {ipc_pld_spec}\n' f'AND, codec: {codec}\n' @@ -460,7 +462,6 @@ async def send_back_nsp( # await tractor.pause() raise RuntimeError( - # pytest.fail( f'NOT-EXPECTED able to roundtrip value given spec:\n' f'ipc_pld_spec -> {ipc_pld_spec}\n' f'value -> {send_value}: {type(send_value)}\n' @@ -468,53 +469,76 @@ async def send_back_nsp( break # move on to streaming block.. - except NotImplementedError: - print('FAILED ENCODE!') - except tractor.MsgTypeError: # await tractor.pause() if expect_send: - pytest.fail( + raise RuntimeError( f'EXPECTED to `.started()` value given spec:\n' f'ipc_pld_spec -> {ipc_pld_spec}\n' f'value -> {send_value}: {type(send_value)}\n' ) async with ctx.open_stream() as ipc: + print( + f'{uid}: Entering streaming block to send remaining values..' + ) + for send_value, expect_send in iter_send_val_items: send_type: Type = type(send_value) print( - 'CHILD report on send value\n' + '------ - ------\n' + f'{uid}: SENDING NEXT VALUE\n' f'ipc_pld_spec: {ipc_pld_spec}\n' f'expect_send: {expect_send}\n' f'val: {send_value}\n' + '------ - ------\n' ) try: await ipc.send(send_value) + print(f'***\n{uid}-CHILD sent {send_value!r}\n***\n') sent.append(send_value) - if not expect_send: - pytest.fail( - f'NOT-EXPECTED able to roundtrip value given spec:\n' - f'ipc_pld_spec -> {ipc_pld_spec}\n' - f'value -> {send_value}: {send_type}\n' - ) + + # NOTE: should only raise above on + # `.started()` or a `Return` + # if not expect_send: + # raise RuntimeError( + # f'NOT-EXPECTED able to roundtrip value given spec:\n' + # f'ipc_pld_spec -> {ipc_pld_spec}\n' + # f'value -> {send_value}: {send_type}\n' + # ) + except ValidationError: + print(f'{uid} FAILED TO SEND {send_value}!') + + # await tractor.pause() if expect_send: - pytest.fail( + raise RuntimeError( f'EXPECTED to roundtrip value given spec:\n' f'ipc_pld_spec -> {ipc_pld_spec}\n' f'value -> {send_value}: {send_type}\n' ) - continue + # continue - assert ( - len(sent) - == - len([val - for val, expect in - expect_ipc_send.values() - if expect is True]) - ) + else: + print( + f'{uid}: finished sending all values\n' + 'Should be exiting stream block!\n' + ) + + print(f'{uid}: exited streaming block!') + + # TODO: this won't be true bc in streaming phase we DO NOT + # msgspec check outbound msgs! + # -[ ] once we implement the receiver side `InvalidMsg` + # then we can expect it here? + # assert ( + # len(sent) + # == + # len([val + # for val, expect in + # expect_ipc_send.values() + # if expect is True]) + # ) def ex_func(*args): @@ -635,7 +659,7 @@ def test_codec_hooks_mod( async with ( p.open_context( - send_back_nsp, + send_back_values, expect_debug=debug_mode, pld_spec_type_strs=pld_spec_type_strs, add_hooks=add_codec_hooks, @@ -665,10 +689,13 @@ def test_codec_hooks_mod( async for next_sent in ipc: print( - 'Child sent next value\n' + 'Parent: child sent next value\n' f'{next_sent}: {type(next_sent)}\n' ) - expect_to_send.remove(next_sent) + if expect_to_send: + expect_to_send.remove(next_sent) + else: + print('PARENT should terminate stream loop + block!') # all sent values should have arrived! assert not expect_to_send -- 2.34.1 From 28a8d1507168fef0e82ea3d9d448645b1432baa6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 8 Apr 2024 10:25:57 -0400 Subject: [PATCH 034/305] Rename `Actor._push_result()` -> `._deliver_ctx_payload()` Better describes the internal RPC impl/latest-architecture with the msgs delivered being those which either define a `.pld: PayloadT` that gets passed up to user code, or the error-msg subset that similarly is raised in a ctx-linked task. --- tractor/_context.py | 10 +++++----- tractor/_rpc.py | 2 +- tractor/_runtime.py | 15 ++++++++++----- tractor/_streaming.py | 2 +- 4 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index b4e207a4..e0f62ec8 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1210,7 +1210,7 @@ class Context: # XXX: (MEGA IMPORTANT) if this is a root opened process we # wait for any immediate child in debug before popping the # context from the runtime msg loop otherwise inside - # ``Actor._push_result()`` the msg will be discarded and in + # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in # the case where that msg is global debugger unlock (via # a "stop" msg for a stream), this can result in a deadlock # where the root is waiting on the lock to clear but the @@ -1701,11 +1701,11 @@ class Context: # raise any msg type error NO MATTER WHAT! except msgspec.ValidationError as verr: - from tractor._ipc import _raise_msg_type_err - _raise_msg_type_err( + from tractor._ipc import _mk_msg_type_err + raise _mk_msg_type_err( msg=msg_bytes, codec=codec, - validation_err=verr, + src_validation_error=verr, verb_header='Trying to send payload' # > 'invalid `Started IPC msgs\n' ) @@ -2418,7 +2418,7 @@ async def open_context_from_portal( # XXX: (MEGA IMPORTANT) if this is a root opened process we # wait for any immediate child in debug before popping the # context from the runtime msg loop otherwise inside - # ``Actor._push_result()`` the msg will be discarded and in + # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in # the case where that msg is global debugger unlock (via # a "stop" msg for a stream), this can result in a deadlock # where the root is waiting on the lock to clear but the diff --git a/tractor/_rpc.py b/tractor/_rpc.py index de76e3cf..b494af2b 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -826,7 +826,7 @@ async def process_messages( ): # deliver response to local caller/waiter # via its per-remote-context memory channel. - await actor._push_result( + await actor._deliver_ctx_payload( chan, cid, msg, diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 0b00f747..435464be 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -69,6 +69,7 @@ from tractor.msg import ( pretty_struct, NamespacePath, types as msgtypes, + Msg, ) from ._ipc import Channel from ._context import ( @@ -77,9 +78,10 @@ from ._context import ( ) from .log import get_logger from ._exceptions import ( - unpack_error, - ModuleNotExposed, ContextCancelled, + ModuleNotExposed, + MsgTypeError, + unpack_error, TransportClosed, ) from .devx import _debug @@ -559,7 +561,7 @@ class Actor: cid: str|None = msg.cid if cid: # deliver response to local caller/waiter - await self._push_result( + await self._deliver_ctx_payload( chan, cid, msg, @@ -718,11 +720,11 @@ class Actor: # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD - async def _push_result( + async def _deliver_ctx_payload( self, chan: Channel, cid: str, - msg: dict[str, Any], + msg: Msg|MsgTypeError, ) -> None|bool: ''' @@ -751,6 +753,9 @@ class Actor: ) return + # if isinstance(msg, MsgTypeError): + # return await ctx._deliver_bad_msg() + return await ctx._deliver_msg(msg) def get_context( diff --git a/tractor/_streaming.py b/tractor/_streaming.py index dc30ac6e..fcf8dafc 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -183,7 +183,7 @@ class MsgStream(trio.abc.Channel): # - via a received `{'stop': ...}` msg from remote side. # |_ NOTE: previously this was triggered by calling # ``._rx_chan.aclose()`` on the send side of the channel inside - # `Actor._push_result()`, but now the 'stop' message handling + # `Actor._deliver_ctx_payload()`, but now the 'stop' message handling # has been put just above inside `_raise_from_no_key_in_msg()`. except ( trio.EndOfChannel, -- 2.34.1 From 6e72f2ef13d68fc8007a55a5abb7d23e3fadbfcb Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 08:44:06 -0400 Subject: [PATCH 035/305] Unify `MsgTypeError` as a `RemoteActorError` subtype Since in the receive-side error case the source of the exception is the sender side (normally causing a local `TypeError` at decode time), might as well bundle the error in remote-capture-style using boxing semantics around the causing local type error raised from the `msgspec.msgpack.Decoder.decode()` and with a traceback packed from `msgspec`-specific knowledge of any field-type spec matching failure. Deats on new `MsgTypeError` interface: - includes a `.msg_dict` to get access to any `Decoder.type`-applied load of the original (underlying and offending) IPC msg into a `dict` form using a vanilla decoder which is normally packed into the instance as a `._msg_dict`. - a public getter to the "supposed offending msg" via `.payload_msg` which attempts to take the above `.msg_dict` and load it manually into the corresponding `.msg.types.MsgType` struct. - a constructor `.from_decode()` to make it simple to build out error instances from a failed decode scope where the aforementioned `msgdict: dict` from the vanilla decode can be provided directly. - ALSO, we now pack into `MsgTypeError` directly just like ctxc in `unpack_error()` This also completes the while-standing todo for `RemoteActorError` to contain a ref to the underlying `Error` msg as `._ipc_msg` with public `@property` access that `defstruct()`-creates a pretty struct version via `.ipc_msg`. Internal tweaks for this include: - `._ipc_msg` is the internal literal `Error`-msg instance if provided with `.ipc_msg` the dynamic wrapper as mentioned above. - `.__init__()` now can still take variable `**extra_msgdata` (similar to the `dict`-msgdata as before) to maintain support for subtypes which are constructed manually (not only by `pack_error()`) and insert their own attrs which get placed in a `._extra_msgdata: dict` if no `ipc_msg: Error` is provided as input. - the `.msgdata` is now a merge of any `._extra_msgdata` and a `dict`-casted form of any `._ipc_msg`. - adjust all previous `.msgdata` field lookups to try equivalent field reads on `._ipc_msg: Error`. - drop default single ws indent from `.tb_str` and do a failover lookup to `.msgdata` when `._ipc_msg is None` for the manually constructed subtype-instance case. - add a new class attr `.extra_body_fields: list[str]` to allow subtypes to declare attrs they want shown in the `.__repr__()` output, eg. `ContextCancelled.canceller`, `StreamOverrun.sender` and `MsgTypeError.payload_msg`. - ^-rework defaults pertaining to-^ with rename from `_msgdata_keys` -> `_ipcmsg_keys` with latter now just loading directly from the `Error` fields def and `_body_fields: list[str]` just taking that value and removing the not-so-useful-in-REPL or already shown (i.e. `.tb_str: str`) field names. - add a new mod level `.pack_from_raise()` helper for auto-boxing RAE subtypes constructed manually into `Error`s which is normally how `StreamOverrun` and `MsgTypeError` get created in the runtime. - in support of the above expose a `src_uid: tuple` override to `pack_error()` such that the runtime can provide any remote actor id when packing a locally-created yet remotely-caused RAE subtype. - adjust all typing to expect `Error`s over `dict`-msgs. Adjust some tests to match these changes: - context and inter-peer-cancel tests to make their `.msgdata` related checks against the new `.ipc_msg` as well and `.tb_str` directly. - toss in an extra sleep to `sleep_a_bit_then_cancel_peer()` to keep the 'canceller' ctx child task cancelled by it's parent in the 'root' for the rte-raised-during-ctxc-handling case (apparently now it's returning too fast, cool?). --- tests/test_context_stream_semantics.py | 7 +- tests/test_inter_peer_cancellation.py | 10 + tractor/_exceptions.py | 418 +++++++++++++++++++------ 3 files changed, 333 insertions(+), 102 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index d2b572cc..cedddf73 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -796,10 +796,12 @@ async def test_callee_cancels_before_started( # raises a special cancel signal except tractor.ContextCancelled as ce: + _ce = ce # for debug on crash ce.boxed_type == trio.Cancelled # the traceback should be informative - assert 'itself' in ce.msgdata['tb_str'] + assert 'itself' in ce.tb_str + assert ce.tb_str == ce.msgdata['tb_str'] # teardown the actor await portal.cancel_actor() @@ -1157,7 +1159,8 @@ def test_maybe_allow_overruns_stream( elif slow_side == 'parent': assert err.boxed_type == tractor.RemoteActorError - assert 'StreamOverrun' in err.msgdata['tb_str'] + assert 'StreamOverrun' in err.tb_str + assert err.tb_str == err.msgdata['tb_str'] else: # if this hits the logic blocks from above are not diff --git a/tests/test_inter_peer_cancellation.py b/tests/test_inter_peer_cancellation.py index 470287fb..aa05e3c8 100644 --- a/tests/test_inter_peer_cancellation.py +++ b/tests/test_inter_peer_cancellation.py @@ -185,6 +185,10 @@ async def sleep_a_bit_then_cancel_peer( await trio.sleep(cancel_after) await peer.cancel_actor() + # such that we're cancelled by our rent ctx-task + await trio.sleep(3) + print('CANCELLER RETURNING!') + @tractor.context async def stream_ints( @@ -245,6 +249,12 @@ async def stream_from_peer( assert peer_ctx._remote_error is ctxerr assert peer_ctx._remote_error.msgdata == ctxerr.msgdata + # XXX YES, bc exact same msg instances + assert peer_ctx._remote_error._ipc_msg is ctxerr._ipc_msg + + # XXX NO, bc new one always created for property accesss + assert peer_ctx._remote_error.ipc_msg != ctxerr.ipc_msg + # the peer ctx is the canceller even though it's canceller # is the "canceller" XD assert peer_name in peer_ctx.canceller diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 28c61628..a31aa11e 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -31,7 +31,10 @@ import textwrap import traceback import trio -from msgspec import structs +from msgspec import ( + structs, + defstruct, +) from tractor._state import current_actor from tractor.log import get_logger @@ -40,6 +43,8 @@ from tractor.msg import ( Msg, Stop, Yield, + pretty_struct, + types as msgtypes, ) if TYPE_CHECKING: @@ -64,21 +69,38 @@ class InternalError(RuntimeError): ''' -_body_fields: list[str] = [ - 'boxed_type', - 'src_type', - # TODO: format this better if we're going to include it. - # 'relay_path', - 'src_uid', - # only in sub-types - 'canceller', - 'sender', +# NOTE: more or less should be close to these: +# 'boxed_type', +# 'src_type', +# 'src_uid', +# 'canceller', +# 'sender', +# TODO: format this better if we're going to include it. +# 'relay_path', +# +_ipcmsg_keys: list[str] = [ + fi.name + for fi, k, v + in pretty_struct.iter_fields(Error) + ] -_msgdata_keys: list[str] = [ - 'boxed_type_str', -] + _body_fields +_body_fields: list[str] = list( + set(_ipcmsg_keys) + + # NOTE: don't show fields that either don't provide + # any extra useful info or that are already shown + # as part of `.__repr__()` output. + - { + 'src_type_str', + 'boxed_type_str', + 'tb_str', + 'relay_path', + '_msg_dict', + 'cid', + } +) def get_err_type(type_name: str) -> BaseException|None: @@ -137,7 +159,7 @@ def pformat_boxed_tb( f'|\n' f' ------ - ------\n\n' f'{tb_str}\n' - f' ------ - ------\n' + f' ------ - ------\n' f'_|\n' ) if len(indent): @@ -152,10 +174,40 @@ def pformat_boxed_tb( + body ) - # return body -# TODO: rename to just `RemoteError`? +def pack_from_raise( + local_err: ( + ContextCancelled + |StreamOverrun + |MsgTypeError + ), + cid: str, + + **rae_fields, + +) -> Error: + ''' + Raise the provided `RemoteActorError` subtype exception + instance locally to get a traceback and pack it into an IPC + `Error`-msg using `pack_error()` to extract the tb info. + + ''' + try: + raise local_err + except type(local_err) as local_err: + err_msg: dict[str, dict] = pack_error( + local_err, + cid=cid, + **rae_fields, + ) + return err_msg + + +# TODO: better compat with IPC msg structs? +# -[ ] rename to just `RemoteError` like in `mp.manager`? +# -[ ] make a `Struct`-subtype by using the .__post_init__()`? +# https://jcristharif.com/msgspec/structs.html#post-init-processing class RemoteActorError(Exception): ''' A box(ing) type which bundles a remote actor `BaseException` for @@ -170,12 +222,28 @@ class RemoteActorError(Exception): 'src_uid', # 'relay_path', ] + extra_body_fields: list[str] = [ + 'cid', + 'boxed_type', + ] def __init__( self, message: str, + ipc_msg: Error|None = None, boxed_type: Type[BaseException]|None = None, - **msgdata + + # NOTE: only provided by subtypes (ctxc and overruns) + # wishing to both manually instantiate and add field + # values defined on `Error` without having to construct an + # `Error()` before the exception is processed by + # `pack_error()`. + # + # TODO: a better way to support this without the extra + # private `._extra_msgdata`? + # -[ ] ctxc constructed inside `._rpc._invoke()` L:638 + # -[ ] overrun @ `._context.Context._deliver_msg()` L:1958 + **extra_msgdata, ) -> None: super().__init__(message) @@ -188,14 +256,24 @@ class RemoteActorError(Exception): # - .remote_type # also pertains to our long long oustanding issue XD # https://github.com/goodboy/tractor/issues/5 - # - # TODO: always set ._boxed_type` as `None` by default - # and instead render if from `.boxed_type_str`? self._boxed_type: BaseException = boxed_type self._src_type: BaseException|None = None + self._ipc_msg: Error|None = ipc_msg - # TODO: make this a `.errmsg: Error` throughout? - self.msgdata: dict[str, Any] = msgdata + if ( + extra_msgdata + and ipc_msg + ): + # XXX mutate the orig msg directly from + # manually provided input params. + for k, v in extra_msgdata.items(): + setattr( + self._ipc_msg, + k, + v, + ) + else: + self._extra_msgdata = extra_msgdata # TODO: mask out eventually or place in `pack_error()` # pre-`return` lines? @@ -214,14 +292,56 @@ class RemoteActorError(Exception): # either by customizing `ContextCancelled.__init__()` or # through a special factor func? elif boxed_type: - if not self.msgdata.get('boxed_type_str'): - self.msgdata['boxed_type_str'] = str( - type(boxed_type).__name__ - ) + boxed_type_str: str = type(boxed_type).__name__ + if ( + ipc_msg + and not self._ipc_msg.boxed_type_str + ): + self._ipc_msg.boxed_type_str = boxed_type_str + assert self.boxed_type_str == self._ipc_msg.boxed_type_str + + else: + self._extra_msgdata['boxed_type_str'] = boxed_type_str - assert self.boxed_type_str == self.msgdata['boxed_type_str'] assert self.boxed_type is boxed_type + @property + def ipc_msg(self) -> pretty_struct.Struct: + ''' + Re-render the underlying `._ipc_msg: Msg` as + a `pretty_struct.Struct` for introspection such that the + returned value is a read-only copy of the original. + + ''' + if self._ipc_msg is None: + return None + + msg_type: Msg = type(self._ipc_msg) + fields: dict[str, Any] = { + k: v for _, k, v in + pretty_struct.iter_fields(self._ipc_msg) + } + return defstruct( + msg_type.__name__, + fields=fields.keys(), + bases=(msg_type, pretty_struct.Struct), + )(**fields) + + @property + def msgdata(self) -> dict[str, Any]: + ''' + The (remote) error data provided by a merge of the + `._ipc_msg: Error` msg and any input `._extra_msgdata: dict` + (provided by subtypes via `.__init__()`). + + ''' + msgdata: dict = ( + structs.asdict(self._ipc_msg) + if self._ipc_msg + else {} + ) + return self._extra_msgdata | msgdata + @property def src_type_str(self) -> str: ''' @@ -231,7 +351,7 @@ class RemoteActorError(Exception): at the first relay/hop's receiving actor. ''' - return self.msgdata['src_type_str'] + return self._ipc_msg.src_type_str @property def src_type(self) -> str: @@ -241,7 +361,7 @@ class RemoteActorError(Exception): ''' if self._src_type is None: self._src_type = get_err_type( - self.msgdata['src_type_str'] + self._ipc_msg.src_type_str ) return self._src_type @@ -252,7 +372,7 @@ class RemoteActorError(Exception): String-name of the (last hop's) boxed error type. ''' - return self.msgdata['boxed_type_str'] + return self._ipc_msg.boxed_type_str @property def boxed_type(self) -> str: @@ -262,7 +382,7 @@ class RemoteActorError(Exception): ''' if self._boxed_type is None: self._boxed_type = get_err_type( - self.msgdata['boxed_type_str'] + self._ipc_msg.boxed_type_str ) return self._boxed_type @@ -275,40 +395,44 @@ class RemoteActorError(Exception): actor's hop. NOTE: a `list` field with the same name is expected to be - passed/updated in `.msgdata`. + passed/updated in `.ipc_msg`. ''' - return self.msgdata['relay_path'] + return self._ipc_msg.relay_path @property def relay_uid(self) -> tuple[str, str]|None: return tuple( - self.msgdata['relay_path'][-1] + self._ipc_msg.relay_path[-1] ) @property def src_uid(self) -> tuple[str, str]|None: if src_uid := ( - self.msgdata.get('src_uid') + self._ipc_msg.src_uid ): return tuple(src_uid) # TODO: use path lookup instead? # return tuple( - # self.msgdata['relay_path'][0] + # self._ipc_msg.relay_path[0] # ) @property def tb_str( self, - indent: str = ' ', + indent: str = '', ) -> str: - if remote_tb := self.msgdata.get('tb_str'): - return textwrap.indent( - remote_tb, - prefix=indent, - ) + remote_tb: str = '' - return '' + if self._ipc_msg: + remote_tb: str = self._ipc_msg.tb_str + else: + remote_tb = self.msgdata.get('tb_str') + + return textwrap.indent( + remote_tb or '', + prefix=indent, + ) def _mk_fields_str( self, @@ -320,14 +444,17 @@ class RemoteActorError(Exception): val: Any|None = ( getattr(self, key, None) or - self.msgdata.get(key) + getattr( + self._ipc_msg, + key, + None, + ) ) # TODO: for `.relay_path` on multiline? # if not isinstance(val, str): # val_str = pformat(val) # else: val_str: str = repr(val) - if val: _repr += f'{key}={val_str}{end_char}' @@ -358,7 +485,9 @@ class RemoteActorError(Exception): ''' fields: str = self._mk_fields_str( - _body_fields, + _body_fields + + + self.extra_body_fields, ) body: str = pformat_boxed_tb( tb_str=self.tb_str, @@ -415,15 +544,6 @@ class RemoteActorError(Exception): # raise NotImplementedError -class InternalActorError(RemoteActorError): - ''' - (Remote) internal `tractor` error indicating failure of some - primitive, machinery state or lowlevel task that should never - occur. - - ''' - - class ContextCancelled(RemoteActorError): ''' Inter-actor task context was cancelled by either a call to @@ -433,6 +553,10 @@ class ContextCancelled(RemoteActorError): reprol_fields: list[str] = [ 'canceller', ] + extra_body_fields: list[str] = [ + 'cid', + 'canceller', + ] @property def canceller(self) -> tuple[str, str]|None: ''' @@ -454,7 +578,7 @@ class ContextCancelled(RemoteActorError): |_`._cancel_task()` ''' - value = self.msgdata.get('canceller') + value: tuple[str, str]|None = self._ipc_msg.canceller if value: return tuple(value) @@ -468,6 +592,132 @@ class ContextCancelled(RemoteActorError): # src_actor_uid = canceller +class MsgTypeError( + RemoteActorError, +): + ''' + Equivalent of a runtime `TypeError` for IPC dialogs. + + Raise when any IPC wire-message is decoded to have invalid + field values (due to type) or for other `MsgCodec` related + violations such as having no extension-type for a field with + a custom type but no `enc/dec_hook()` support. + + Can be raised on the send or recv side of an IPC `Channel` + depending on the particular msg. + + Msgs which cause this to be raised on the `.send()` side (aka + in the "ctl" dialog phase) include: + - `Start` + - `Started` + - `Return` + + Those which cause it on on the `.recv()` side (aka the "nasty + streaming" dialog phase) are: + - `Yield` + - TODO: any embedded `.pld` type defined by user code? + + Normally the source of an error is re-raised from some `.msg._codec` + decode which itself raises in a backend interchange + lib (eg. a `msgspec.ValidationError`). + + ''' + reprol_fields: list[str] = [ + 'ipc_msg', + ] + extra_body_fields: list[str] = [ + 'cid', + 'payload_msg', + ] + + @property + def msg_dict(self) -> dict[str, Any]: + ''' + If the underlying IPC `Msg` was received from a remote + actor but was unable to be decoded to a native + `Yield`|`Started`|`Return` struct, the interchange backend + native format decoder can be used to stash a `dict` + version for introspection by the invalidating RPC task. + + ''' + return self.msgdata.get('_msg_dict') + + @property + def payload_msg(self) -> Msg|None: + ''' + Attempt to construct what would have been the original + `Msg`-with-payload subtype (i.e. an instance from the set + of msgs in `.msg.types._payload_msgs`) which failed + validation. + + ''' + msg_dict: dict = self.msg_dict.copy() + name: str = msg_dict.pop('msg_type') + msg_type: Msg = getattr( + msgtypes, + name, + Msg, + ) + return msg_type(**msg_dict) + + @property + def cid(self) -> str: + # pre-packed using `.from_decode()` constructor + return self.msgdata.get('cid') + + @classmethod + def from_decode( + cls, + message: str, + msgdict: dict, + + ) -> MsgTypeError: + return cls( + message=message, + + # NOTE: original "vanilla decode" of the msg-bytes + # is placed inside a value readable from + # `.msgdata['_msg_dict']` + _msg_dict=msgdict, + + # expand and pack all RAE compat fields + # into the `._extra_msgdata` aux `dict`. + **{ + k: v + for k, v in msgdict.items() + if k in _ipcmsg_keys + }, + ) + + +class StreamOverrun( + RemoteActorError, + trio.TooSlowError, +): + reprol_fields: list[str] = [ + 'sender', + ] + ''' + This stream was overrun by its sender and can be optionally + handled by app code using `MsgStream.send()/.receive()`. + + ''' + @property + def sender(self) -> tuple[str, str] | None: + value = self._ipc_msg.sender + if value: + return tuple(value) + + +# class InternalActorError(RemoteActorError): +# ''' +# Boxed (Remote) internal `tractor` error indicating failure of some +# primitive, machinery state or lowlevel task that should never +# occur. + +# ''' + + class TransportClosed(trio.ClosedResourceError): "Underlying channel transport was closed prior to use" @@ -484,23 +734,6 @@ class NoRuntime(RuntimeError): "The root actor has not been initialized yet" -class StreamOverrun( - RemoteActorError, - trio.TooSlowError, -): - reprol_fields: list[str] = [ - 'sender', - ] - ''' - This stream was overrun by sender - - ''' - @property - def sender(self) -> tuple[str, str] | None: - value = self.msgdata.get('sender') - if value: - return tuple(value) - class AsyncioCancelled(Exception): ''' @@ -518,23 +751,12 @@ class MessagingError(Exception): ''' -class MsgTypeError(MessagingError): - ''' - Equivalent of a `TypeError` for an IPC wire-message - due to an invalid field value (type). - - Normally this is re-raised from some `.msg._codec` - decode error raised by a backend interchange lib - like `msgspec` or `pycapnproto`. - - ''' - - def pack_error( exc: BaseException|RemoteActorError, tb: str|None = None, cid: str|None = None, + src_uid: tuple[str, str]|None = None, ) -> Error: ''' @@ -560,7 +782,8 @@ def pack_error( ): error_msg.update(exc.msgdata) - # an onion/inception we need to pack + # an onion/inception we need to pack as a nested and relayed + # remotely boxed error. if ( type(exc) is RemoteActorError and (boxed := exc.boxed_type) @@ -584,7 +807,7 @@ def pack_error( error_msg['boxed_type_str'] = 'RemoteActorError' else: - error_msg['src_uid'] = our_uid + error_msg['src_uid'] = src_uid or our_uid error_msg['src_type_str'] = type(exc).__name__ error_msg['boxed_type_str'] = type(exc).__name__ @@ -596,7 +819,7 @@ def pack_error( # XXX NOTE: always ensure the traceback-str is from the # locally raised error (**not** the prior relay's boxed - # content's `.msgdata`). + # content's in `._ipc_msg.tb_str`). error_msg['tb_str'] = tb_str if cid is not None: @@ -606,7 +829,7 @@ def pack_error( def unpack_error( - msg: dict[str, Any]|Error, + msg: Error, chan: Channel|None = None, box_type: RemoteActorError = RemoteActorError, @@ -624,16 +847,10 @@ def unpack_error( ''' __tracebackhide__: bool = hide_tb - error_dict: dict[str, dict]|None if not isinstance(msg, Error): - # if ( - # error_dict := msg.get('error') - # ) is None: - # no error field, nothing to unpack. return None - # retrieve the remote error's msg encoded details - # tb_str: str = error_dict.get('tb_str', '') + # retrieve the remote error's encoded details from fields tb_str: str = msg.tb_str message: str = ( f'{chan.uid}\n' @@ -651,6 +868,10 @@ def unpack_error( box_type = ContextCancelled assert boxed_type is box_type + elif boxed_type_str == 'MsgTypeError': + box_type = MsgTypeError + assert boxed_type is box_type + # TODO: already included by `_this_mod` in else loop right? # # we have an inception/onion-error so ensure @@ -661,12 +882,9 @@ def unpack_error( # assert len(error_dict['relay_path']) >= 1 assert len(msg.relay_path) >= 1 - # TODO: mk RAE just take the `Error` instance directly? - error_dict: dict = structs.asdict(msg) - exc = box_type( message, - **error_dict, + ipc_msg=msg, ) return exc -- 2.34.1 From 7a050e5edbb546cd74964b92668e9588be1a8214 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 10:09:05 -0400 Subject: [PATCH 036/305] Expose `MsgType` and extend `MsgCodec` API a bit Make a new `MsgType: TypeAlias` for the union of all msg types such that it can be used in annots throughout the code base; just make `.msg.__msg_spec__` delegate to it. Add some new codec methods: - `pld_spec_str`: for the `str`-casted value of the payload spec, generally useful in logging content. - `msg_spec_items()`: to render a `dict` of msg types to their `str()`-casted values with support for singling out a specific `MsgType`, type by input `msg` instance. - `pformat_msg_spec()`: for rendering the (partial) `.msg_spec` as a formatted `str` useful in logging. Oh right, add a `Error._msg_dict: dict` in support of the previous commit (for `MsgTypeError` packing as RAEs) such that our error msg type can house a non-type-spec decoded wire-bytes for error reporting/analysis purposes. --- tractor/msg/__init__.py | 10 ++++----- tractor/msg/_codec.py | 46 +++++++++++++++++++++++++++++++++++++---- tractor/msg/types.py | 25 +++++++++++++++++++++- 3 files changed, 71 insertions(+), 10 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index fe965e0b..443b781b 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -19,7 +19,6 @@ Built-in messaging patterns, types, APIs and helpers. ''' from typing import ( - Union, TypeAlias, ) from .ptr import ( @@ -56,8 +55,9 @@ from .types import ( # full msg class set from above as list __msg_types__ as __msg_types__, + + # type-alias for union of all msgs + MsgType as MsgType, ) -# TODO: use new type declaration syntax for msg-type-spec -# https://docs.python.org/3/library/typing.html#type-aliases -# https://docs.python.org/3/reference/simple_stmts.html#type -__msg_spec__: TypeAlias = Union[*__msg_types__] + +__msg_spec__: TypeAlias = MsgType diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 56f24d62..de3316c8 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -57,7 +57,7 @@ from trio.lowlevel import ( from tractor.msg.pretty_struct import Struct from tractor.msg.types import ( mk_msg_spec, - Msg, + MsgType, ) @@ -87,12 +87,50 @@ class MsgCodec(Struct): pld_spec: Union[Type[Struct]]|None + @property + def pld_spec_str(self) -> str: + spec: Union[Type]|Type = self.pld_spec + + # TODO: could also use match: instead? + if getattr(spec, '__args__', False): + # `typing.Union` case + return str(spec) + else: + return spec.__name__ + # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions @property def msg_spec(self) -> Union[Type[Struct]]: return self._dec.type + def msg_spec_items( + self, + msg: MsgType|None = None, + + ) -> dict[str, MsgType]|str: + + msgt_table: dict[str, MsgType] = { + msgt: str(msgt) + for msgt in self.msg_spec.__args__ + } + if msg: + msgt: MsgType = type(msg) + str_repr: str = msgt_table[msgt] + return {msgt: str_repr} + + return msgt_table + + # TODO: some way to make `pretty_struct.Struct` use this + # wrapped field over the `.msg_spec` one? + def pformat_msg_spec( + self, + msg: MsgType|None = None, + ) -> str: + return '\n'.join( + self.msg_spec_items(msg=msg).values() + ) + lib: ModuleType = msgspec # TODO: a sub-decoder system as well? @@ -108,7 +146,7 @@ class MsgCodec(Struct): # OR # ) = { # # pre-seed decoders for std-py-type-set for use when - # # `Msg.pld == None|Any`. + # # `MsgType.pld == None|Any`. # None: msgpack.Decoder(Any), # Any: msgpack.Decoder(Any), # } @@ -303,7 +341,7 @@ def mk_codec( # by `tag_field: str` value key? # payload_msg_specs: dict[ # str, # tag_field value as sub-decoder key - # Union[Type[Struct]] # `Msg.pld` type spec + # Union[Type[Struct]] # `MsgType.pld` type spec # ]|None = None, libname: str = 'msgspec', @@ -336,7 +374,7 @@ def mk_codec( raise RuntimeError( f'If a payload spec is provided,\n' "the builtin SC-shuttle-protocol's msg set\n" - f'(i.e. `{Msg}`) MUST be used!\n\n' + f'(i.e. a `{MsgType}`) MUST be used!\n\n' f'However both values were passed as => mk_codec(\n' f' ipc_msg_spec={ipc_msg_spec}`\n' f' ipc_pld_spec={ipc_pld_spec}`\n)\n' diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 7355a610..14db09cd 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -31,6 +31,7 @@ from typing import ( Literal, Type, TypeVar, + TypeAlias, Union, ) @@ -400,16 +401,29 @@ class CancelAck( pld: bool +# TODO: unify this with `._exceptions.RemoteActorError` +# such that we can have a msg which is both raisable and +# IPC-wire ready? +# B~o class Error( Struct, tag=True, tag_field='msg_type', + + # TODO may omit defaults? + # https://jcristharif.com/msgspec/structs.html#omitting-default-values + # omit_defaults=True, ): ''' A pkt that wraps `RemoteActorError`s for relay and raising. Fields are 1-to-1 meta-data as needed originally by - `RemoteActorError.msgdata: dict`. + `RemoteActorError.msgdata: dict` but now are defined here. + + Note: this msg shuttles `ContextCancelled` and `StreamOverrun` + as well is used to rewrap any `MsgTypeError` for relay-reponse + to bad `Yield.pld` senders during an IPC ctx's streaming dialog + phase. ''' src_uid: tuple[str, str] @@ -428,6 +442,10 @@ class Error( # `StreamOverrun` sender: tuple[str, str]|None = None + # for the `MsgTypeError` case where the receiver side + # decodes the underlying original `Msg`-subtype + _msg_dict: dict|None = None + # TODO: should be make a msg version of `ContextCancelled?` # and/or with a scope field or a full `ActorCancelled`? @@ -486,6 +504,11 @@ __msg_types__: list[Msg] = ( _payload_msgs ) +# TODO: use new type declaration syntax for msg-type-spec +# https://docs.python.org/3/library/typing.html#type-aliases +# https://docs.python.org/3/reference/simple_stmts.html#type +MsgType: TypeAlias = Union[*__msg_types__] + def mk_msg_spec( payload_type_union: Union[Type] = Any, -- 2.34.1 From 6628fa00d965c66e7e0732ae6aea0b96c265bb42 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 10:36:25 -0400 Subject: [PATCH 037/305] Refine `MsgTypeError` handling to relay-up-on-`.recv()` Such that `Channel.recv()` + `MsgpackTCPStream.recv()` originating msg-type-errors are not raised at the IPC transport layer but instead relayed up the runtime stack for eventual handling by user-app code via the `Context`/`MsgStream` layer APIs. This design choice leads to a substantial amount of flexibility and modularity, and avoids `MsgTypeError` handling policies from being coupled to a particular backend IPC transport layer: - receive-side msg-type errors, as can be raised and handled in the `.open_stream()` "nasty" phase of a ctx, whilst being packed at the `MsgCodec`/transport layer (keeping the underlying src decode error coupled to the specific transport + interchange lib) and then relayed upward to app code for custom handling like a normal Error` msg. - the policy options for handling such cases could be implemented as `@acm` wrappers around `.open_context()`/`.open_stream()` blocks (and their respective delivered primitives) OR just plain old async generators around `MsgStream.receive()` such that both built-in policy handling and custom user-app solutions can be swapped without touching any `tractor` internals or providing specialized "registry APIs". -> eg. the ignore and relay-invalid-msg-to-sender approach can be more easily implemented as embedded `try: except MsgTypeError:` blocks around `MsgStream.receive()` possibly applied as either of an injected wrapper type around a stream or an async gen that `async for`s from the stream. - any performance based AOT-lang extensions used to implement a policy for handling recv-side errors space can avoid knowledge of the lower level IPC `Channel` (and-downward) primitives. - `Context` consuming code can choose to let all msg-type-errs bubble and handle them manually (like any other remote `Error` shuttled exception). - we can keep (as before) send-side msg type checks can be raised locally and cause offending senders to error and adjust before the streaming phase of an IPC ctx. Impl (related) deats: - obvi make `MsgpackTCPStream.recv()` yield up any `MsgTypeError` constructed by `_mk_msg_type_err()` such that the exception will eventually be relayed up to `._rpc.process_messages()` and from their delivered to the corresponding ctx-task. - in support of ^, make `Channel.recv()` detect said mtes and use the new `pack_from_raise()` to inject the far end `Actor.uid` for the `Error.src_uid`. - keep raising the send side equivalent (when strict enabled) errors inline immediately with no upward `Error` packing or relay. - improve `_mk_msg_type_err()` cases handling with far more detailed `MsgTypeError` "message" contents pertaining to `msgspec` specific failure-fixing-tips and type-spec mismatch info: * use `.from_decode()` constructor in recv-side case to inject the non-spec decoded `msg_dict: dict` and use the new `MsgCodec.pld_spec_str: str` when clarifying the type discrepancy with the offending field. * on send-side, if we detect that an unsupported field type was described in the original `src_type_error`, AND there is no `msgpack.Encoder.enc_hook()` set, that the real issue is likely that the user needs to extend the codec to support the non-std/custom type with a hook and link to `msgspec` docs. * if one of a `src_type/validation_error` is provided, set that error as the `.__cause__` in the new mte. --- tractor/_ipc.py | 163 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 115 insertions(+), 48 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 694eaf9e..7713811c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -38,7 +38,6 @@ from typing import ( Protocol, Type, TypeVar, - Union, ) import msgspec @@ -47,8 +46,9 @@ import trio from tractor.log import get_logger from tractor._exceptions import ( - TransportClosed, MsgTypeError, + pack_from_raise, + TransportClosed, ) from tractor.msg import ( _ctxvar_MsgCodec, @@ -118,40 +118,75 @@ class MsgTransport(Protocol[MsgType]): ... -def _raise_msg_type_err( +def _mk_msg_type_err( msg: Any|bytes, codec: MsgCodec, - validation_err: msgspec.ValidationError|None = None, + + message: str|None = None, verb_header: str = '', -) -> None: + src_validation_error: msgspec.ValidationError|None = None, + src_type_error: TypeError|None = None, - # if side == 'send': - if validation_err is None: # send-side +) -> MsgTypeError: - import traceback - from tractor._exceptions import pformat_boxed_tb + # `Channel.send()` case + if src_validation_error is None: # send-side - fmt_spec: str = '\n'.join( - map(str, codec.msg_spec.__args__) - ) - fmt_stack: str = ( - '\n'.join(traceback.format_stack(limit=3)) - ) - tb_fmt: str = pformat_boxed_tb( - tb_str=fmt_stack, - # fields_str=header, - field_prefix=' ', - indent='', - ) - raise MsgTypeError( - f'invalid msg -> {msg}: {type(msg)}\n\n' - f'{tb_fmt}\n' - f'Valid IPC msgs are:\n\n' - # f' ------ - ------\n' - f'{fmt_spec}\n' - ) + # no src error from `msgspec.msgpack.Decoder.decode()` so + # prolly a manual type-check on our part. + if message is None: + import traceback + from tractor._exceptions import pformat_boxed_tb + fmt_spec: str = '\n'.join( + map(str, codec.msg_spec.__args__) + ) + fmt_stack: str = ( + '\n'.join(traceback.format_stack(limit=3)) + ) + tb_fmt: str = pformat_boxed_tb( + tb_str=fmt_stack, + # fields_str=header, + field_prefix=' ', + indent='', + ) + message: str = ( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + # f' ------ - ------\n' + f'{fmt_spec}\n', + ) + elif src_type_error: + src_message: str = str(src_type_error) + patt: str = 'type ' + type_idx: int = src_message.find('type ') + invalid_type: str = src_message[type_idx + len(patt):].split()[0] + + enc_hook: Callable|None = codec.enc.enc_hook + if enc_hook is None: + message += ( + '\n\n' + + f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" + f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' + + f'Check the `msgspec` docs for ad-hoc type extending:\n' + '|_ https://jcristharif.com/msgspec/extending.html\n' + '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' + ) + + + msgtyperr = MsgTypeError( + message=message, + ipc_msg=msg, + ) + # ya, might be `None` + msgtyperr.__cause__ = src_type_error + return msgtyperr + + # `Channel.recv()` case else: # decode the msg-bytes using the std msgpack # interchange-prot (i.e. without any @@ -161,29 +196,31 @@ def _raise_msg_type_err( msg_dict: dict = msgspec.msgpack.decode(msg) msg_type_name: str = msg_dict['msg_type'] msg_type = getattr(msgtypes, msg_type_name) - errmsg: str = ( + message: str = ( f'invalid `{msg_type_name}` IPC msg\n\n' ) if verb_header: - errmsg = f'{verb_header} ' + errmsg + message = f'{verb_header} ' + message # XXX see if we can determine the exact invalid field # such that we can comprehensively report the # specific field's type problem - msgspec_msg: str = validation_err.args[0].rstrip('`') + msgspec_msg: str = src_validation_error.args[0].rstrip('`') msg, _, maybe_field = msgspec_msg.rpartition('$.') obj = object() if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_type: Union[Type] = msg_type.__signature__.parameters[ - maybe_field - ].annotation - errmsg += ( + message += ( f'{msg.rstrip("`")}\n\n' f'{msg_type}\n' - f' |_.{maybe_field}: {field_type} = {field_val!r}\n' + f' |_.{maybe_field}: {codec.pld_spec_str} = {field_val!r}\n' ) - raise MsgTypeError(errmsg) from validation_err + msgtyperr = MsgTypeError.from_decode( + message=message, + msgdict=msg_dict, + ) + msgtyperr.__cause__ = src_validation_error + return msgtyperr # TODO: not sure why we have to inherit here, but it seems to be an @@ -325,12 +362,15 @@ class MsgpackTCPStream(MsgTransport): # and always raise such that spec violations # are never allowed to be caught silently! except msgspec.ValidationError as verr: - # re-raise as type error - _raise_msg_type_err( + msgtyperr: MsgTypeError = _mk_msg_type_err( msg=msg_bytes, codec=codec, - validation_err=verr, + src_validation_error=verr, ) + # XXX deliver up to `Channel.recv()` where + # a re-raise and `Error`-pack can inject the far + # end actor `.uid`. + yield msgtyperr except ( msgspec.DecodeError, @@ -387,7 +427,7 @@ class MsgpackTCPStream(MsgTransport): if type(msg) not in msgtypes.__msg_types__: if strict_types: - _raise_msg_type_err( + raise _mk_msg_type_err( msg, codec=codec, ) @@ -400,11 +440,16 @@ class MsgpackTCPStream(MsgTransport): try: bytes_data: bytes = codec.encode(msg) except TypeError as typerr: - raise MsgTypeError( - 'A msg field violates the current spec\n' - f'{codec.pld_spec}\n\n' - f'{pretty_struct.Struct.pformat(msg)}' - ) from typerr + msgtyperr: MsgTypeError = _mk_msg_type_err( + msg, + codec=codec, + message=( + f'IPC-msg-spec violation in\n\n' + f'{pretty_struct.Struct.pformat(msg)}' + ), + src_type_error=typerr, + ) + raise msgtyperr from typerr # supposedly the fastest says, # https://stackoverflow.com/a/54027962 @@ -719,13 +764,35 @@ class Channel: assert self._transport while True: try: - async for item in self._transport: - yield item + async for msg in self._transport: + match msg: + # NOTE: if transport/interchange delivers + # a type error, we pack it with the far + # end peer `Actor.uid` and relay the + # `Error`-msg upward to the `._rpc` stack + # for normal RAE handling. + case MsgTypeError(): + yield pack_from_raise( + local_err=msg, + cid=msg.cid, + + # XXX we pack it here bc lower + # layers have no notion of an + # actor-id ;) + src_uid=self.uid, + ) + case _: + yield msg + + # TODO: if we were gonna do this it should be + # done up at the `MsgStream` layer! + # # sent = yield item # if sent is not None: # # optimization, passing None through all the # # time is pointless # await self._transport.send(sent) + except trio.BrokenResourceError: # if not self._autorecon: -- 2.34.1 From 7bb6a535819459f7c6e4cc0bb445607802bac109 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 13:46:34 -0400 Subject: [PATCH 038/305] Start tidying up `._context`, use `pack_from_raise()` Mostly removing commented (and replaced) code blocks lingering from the ctxc semantics work and new typed-msg-spec `MsgType`s handling AND use the new `._exceptions.pack_from_raise()` helper to construct `StreamOverrun` msgs. Deaterz: - clean out the drain loop now that it's implemented to handle our struct msg types including the `dict`-msg bits left in as fallback-reminders, any notes/todos better summarized at the top of their blocks, remove any `_final_result_is_set()` related duplicate/legacy tidbits. - use a `case Error()` block in drain loop with fallthrough to `_:` always resulting in an rte raise. - move "XXX" notes into the doc-string for `._deliver_msg()` as a "rules" section. - use `match:` syntax for logging the `result_or_err: MsgType` outcome from the final `.result()` call inside `open_context_from_portal()`. - generally speaking use `MsgType` type annotations throughout! --- tractor/_context.py | 226 ++++++++++++++++++-------------------------- tractor/_portal.py | 4 +- tractor/_runtime.py | 1 - 3 files changed, 95 insertions(+), 136 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index e0f62ec8..1d0f67f0 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -49,20 +49,21 @@ from ._exceptions import ( InternalError, RemoteActorError, StreamOverrun, - pack_error, + pack_from_raise, unpack_error, _raise_from_no_key_in_msg, ) from .log import get_logger from .msg import ( + Error, + MsgType, + MsgCodec, NamespacePath, - Msg, Return, Started, Stop, Yield, current_codec, - MsgCodec, pretty_struct, ) from ._ipc import Channel @@ -107,8 +108,7 @@ async def _drain_to_final_msg( # wait for a final context result by collecting (but # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. - # pre_result_drained: list[dict] = [] - pre_result_drained: list[Msg] = [] + pre_result_drained: list[MsgType] = [] while not ( ctx.maybe_error and not ctx._final_result_is_set() @@ -168,7 +168,7 @@ async def _drain_to_final_msg( # pray to the `trio` gawds that we're corrent with this # msg: dict = await ctx._recv_chan.receive() - msg: Msg = await ctx._recv_chan.receive() + msg: MsgType = await ctx._recv_chan.receive() # always capture unexpected/non-result msgs pre_result_drained.append(msg) @@ -191,13 +191,12 @@ async def _drain_to_final_msg( raise match msg: + + # final result arrived! case Return( - cid=cid, + # cid=cid, pld=res, ): - # try: - # ctx._result: Any = msg['return'] - # ctx._result: Any = msg.pld ctx._result: Any = res log.runtime( 'Context delivered final draining msg:\n' @@ -210,13 +209,9 @@ async def _drain_to_final_msg( # TODO: ^ we don't need it right? break - # except KeyError: - # except AttributeError: + # far end task is still streaming to us so discard + # and report depending on local ctx state. case Yield(): - # if 'yield' in msg: - - # far end task is still streaming to us so discard - # and report per local context state. if ( (ctx._stream.closed and (reason := 'stream was already closed') @@ -257,45 +252,34 @@ async def _drain_to_final_msg( ) continue + # stream terminated, but no result yet.. + # # TODO: work out edge cases here where # a stream is open but the task also calls # this? # -[ ] should be a runtime error if a stream is open right? # Stop() case Stop(): - # elif 'stop' in msg: log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' f'{pformat(msg)}\n' ) continue - # It's an internal error if any other msg type without - # a`'cid'` field arrives here! - case _: - # if not msg.get('cid'): - if not msg.cid: - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' - ) + # remote error msg, likely already handled inside + # `Context._deliver_msg()` + case Error(): - # XXX fallthrough to handle expected error XXX - # TODO: replace this with `ctx.maybe_raise()` + # TODO: can we replace this with `ctx.maybe_raise()`? + # -[ ] would this be handier for this case maybe? + # async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') # - # TODO: would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') - re: Exception|None = ctx._remote_error if re: - log.critical( - 'Remote ctx terminated due to "error" msg:\n' - f'{re}' - ) assert msg is ctx._cancel_msg - # NOTE: this solved a super dupe edge case XD + # NOTE: this solved a super duper edge case XD # this was THE super duper edge case of: # - local task opens a remote task, # - requests remote cancellation of far end @@ -312,9 +296,10 @@ async def _drain_to_final_msg( # does not re-raise any ctxc it receives # IFF **it** was the cancellation # requester.. - # will raise if necessary, ow break from - # loop presuming any error terminates the - # context! + # + # XXX will raise if necessary but ow break + # from loop presuming any supressed error + # (ctxc) should terminate the context! ctx._maybe_raise_remote_err( re, # NOTE: obvi we don't care if we @@ -338,6 +323,7 @@ async def _drain_to_final_msg( log.critical('SHOULD NEVER GET HERE!?') assert msg is ctx._cancel_msg assert error.msgdata == ctx._remote_error.msgdata + assert error.ipc_msg == ctx._remote_error.ipc_msg from .devx._debug import pause await pause() ctx._maybe_cancel_and_set_remote_error(error) @@ -346,6 +332,20 @@ async def _drain_to_final_msg( else: # bubble the original src key error raise + + # XXX should pretty much never get here unless someone + # overrides the default `MsgType` spec. + case _: + # It's definitely an internal error if any other + # msg type without a`'cid'` field arrives here! + if not msg.cid: + raise InternalError( + 'Unexpected cid-missing msg?\n\n' + f'{msg}\n' + ) + + raise RuntimeError('Unknown msg type: {msg}') + else: log.cancel( 'Skipping `MsgStream` drain since final outcome is set\n\n' @@ -1345,8 +1345,11 @@ class Context: # `._cancel_called == True`. not raise_overrun_from_self and isinstance(remote_error, RemoteActorError) - and remote_error.msgdata['boxed_type_str'] == 'StreamOverrun' - and tuple(remote_error.msgdata['sender']) == our_uid + + and remote_error.boxed_type_str == 'StreamOverrun' + + # and tuple(remote_error.msgdata['sender']) == our_uid + and tuple(remote_error.sender) == our_uid ): # NOTE: we set the local scope error to any "self # cancellation" error-response thus "absorbing" @@ -1415,16 +1418,11 @@ class Context: assert self._recv_chan raise_overrun: bool = not self._allow_overruns - # res_placeholder: int = id(self) if ( - # self._result == res_placeholder - # and not self._remote_error self.maybe_error is None - # not self._remote_error - # and not self._local_error - and not self._recv_chan._closed # type: ignore + and + not self._recv_chan._closed # type: ignore ): - # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" # msgs still in transit from the far end. @@ -1435,7 +1433,6 @@ class Context: for msg in drained_msgs: # TODO: mask this by default.. - # if 'return' in msg: if isinstance(msg, Return): # from .devx import pause # await pause() @@ -1451,6 +1448,9 @@ class Context: ) self.maybe_raise( + # NOTE: obvi we don't care if we + # overran the far end if we're already + # waiting on a final result (msg). raise_overrun_from_self=( raise_overrun and @@ -1461,34 +1461,12 @@ class Context: (not self._cancel_called) ) ) - # if ( - # (re := self._remote_error) - # # and self._result == res_placeholder - # ): - # self._maybe_raise_remote_err( - # re, - # # NOTE: obvi we don't care if we - # # overran the far end if we're already - # # waiting on a final result (msg). - # # raise_overrun_from_self=False, - # raise_overrun_from_self=( - # raise_overrun - # and - # # only when we ARE NOT the canceller - # # should we raise overruns, bc ow we're - # # raising something we know might happen - # # during cancellation ;) - # (not self._cancel_called) - # ), - # ) - # if maybe_err: - # self._result = maybe_err - return self.outcome - # TODO: switch this with above which should be named - # `.wait_for_outcome()` and instead do - # a `.outcome.Outcome.unwrap()` ? + # TODO: switch this with above! + # -[ ] should be named `.wait_for_outcome()` and instead do + # a `.outcome.Outcome.unwrap()` ? + # # @property # def result(self) -> Any|None: # if self._final_result_is_set(): @@ -1547,7 +1525,6 @@ class Context: return None def _final_result_is_set(self) -> bool: - # return not (self._result == id(self)) return self._result is not Unresolved # def get_result_nowait(self) -> Any|None: @@ -1764,8 +1741,7 @@ class Context: async def _deliver_msg( self, - # msg: dict, - msg: Msg, + msg: MsgType, ) -> bool: ''' @@ -1779,6 +1755,20 @@ class Context: `._scope_nursery: trio.Nursery`) which ensures that such messages are queued up and eventually sent if possible. + XXX RULES XXX + ------ - ------ + - NEVER raise remote errors from this method; a runtime task caller. + An error "delivered" to a ctx should always be raised by + the corresponding local task operating on the + `Portal`/`Context` APIs. + + - NEVER `return` early before delivering the msg! + bc if the error is a ctxc and there is a task waiting on + `.result()` we need the msg to be + `send_chan.send_nowait()`-ed over the `._recv_chan` so + that the error is relayed to that waiter task and thus + raised in user code! + ''' cid: str = self.cid chan: Channel = self.chan @@ -1809,28 +1799,14 @@ class Context: ) self._cancel_msg: dict = msg - # NOTE: this will not raise an error, merely set + # XXX NOTE: this will not raise an error, merely set # `._remote_error` and maybe cancel any task currently # entered in `Portal.open_context()` presuming the # error is "cancel causing" (i.e. a `ContextCancelled` # or `RemoteActorError`). self._maybe_cancel_and_set_remote_error(re) - # XXX NEVER do this XXX..!! - # bc if the error is a ctxc and there is a task - # waiting on `.result()` we need the msg to be sent - # over the `send_chan`/`._recv_chan` so that the error - # is relayed to that waiter task.. - # return True - # - # XXX ALSO NO!! XXX - # => NEVER raise remote errors from the calling - # runtime task, they should always be raised by - # consumer side tasks operating on the - # `Portal`/`Context` APIs. - # if self._remote_error: - # self._maybe_raise_remote_err(error) - + # XXX only case where returning early is fine! if self._in_overrun: log.warning( f'Queueing OVERRUN msg on caller task:\n' @@ -1949,31 +1925,27 @@ class Context: # anything different. return False else: - # txt += f'\n{msg}\n' # raise local overrun and immediately pack as IPC # msg for far end. - try: - raise StreamOverrun( + err_msg: Error = pack_from_raise( + local_err=StreamOverrun( txt, sender=from_uid, - ) - except StreamOverrun as err: - err_msg: dict[str, dict] = pack_error( - err, - cid=cid, - ) - try: - # relay condition to sender side remote task - await chan.send(err_msg) - return True + ), + cid=cid, + ) + try: + # relay condition to sender side remote task + await chan.send(err_msg) + return True - except trio.BrokenResourceError: - # XXX: local consumer has closed their side - # so cancel the far end streaming task - log.warning( - 'Channel for ctx is already closed?\n' - f'|_{chan}\n' - ) + # XXX: local consumer has closed their side of + # the IPC so cancel the far end streaming task + except trio.BrokenResourceError: + log.warning( + 'Channel for ctx is already closed?\n' + f'|_{chan}\n' + ) # ow, indicate unable to deliver by default return False @@ -2382,28 +2354,17 @@ async def open_context_from_portal( # an exception type boxed in a `RemoteActorError` # is returned (meaning it was obvi not raised) # that we want to log-report on. - msgdata: str|None = getattr( - result_or_err, - 'msgdata', - None - ) - match (msgdata, result_or_err): - case ( - {'tb_str': tbstr}, - ContextCancelled(), - ): - log.cancel(tbstr) + match result_or_err: + case ContextCancelled() as ctxc: + log.cancel(ctxc.tb_str) - case ( - {'tb_str': tbstr}, - RemoteActorError(), - ): + case RemoteActorError() as rae: log.exception( 'Context remotely errored!\n' f'<= peer: {uid}\n' f' |_ {nsf}()\n\n' - f'{tbstr}' + f'{rae.tb_str}' ) case (None, _): log.runtime( @@ -2413,7 +2374,6 @@ async def open_context_from_portal( f'`{result_or_err}`\n' ) - finally: # XXX: (MEGA IMPORTANT) if this is a root opened process we # wait for any immediate child in debug before popping the diff --git a/tractor/_portal.py b/tractor/_portal.py index 957eae59..e4db93a6 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -46,6 +46,7 @@ from ._state import ( from ._ipc import Channel from .log import get_logger from .msg import ( + Error, NamespacePath, Return, ) @@ -69,8 +70,7 @@ log = get_logger(__name__) # `._raise_from_no_key_in_msg()` (after tweak to # accept a `chan: Channel` arg) in key block! def _unwrap_msg( - # msg: dict[str, Any], - msg: Return, + msg: Return|Error, channel: Channel, hide_tb: bool = True, diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 435464be..f61ec80d 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -50,7 +50,6 @@ from pprint import pformat import signal import sys from typing import ( - Any, Callable, TYPE_CHECKING, ) -- 2.34.1 From 162feec6e936bf0d1211923223fdae7f8389b84e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 9 Apr 2024 13:58:10 -0400 Subject: [PATCH 039/305] Relay `MsgTypeError`s upward in RPC loop via `._deliver_ctx_payload()` --- tractor/_rpc.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b494af2b..c9754ebc 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -47,12 +47,13 @@ from ._context import ( Context, ) from ._exceptions import ( - ModuleNotExposed, - is_multi_cancelled, ContextCancelled, + ModuleNotExposed, + MsgTypeError, + TransportClosed, + is_multi_cancelled, pack_error, unpack_error, - TransportClosed, ) from .devx import _debug from . import _state @@ -632,7 +633,7 @@ async def _invoke( # (callee) task, so relay this cancel signal to the # other side. ctxc = ContextCancelled( - msg, + message=msg, boxed_type=trio.Cancelled, canceller=canceller, ) @@ -822,7 +823,12 @@ async def process_messages( | Stop(cid=cid) | Return(cid=cid) | CancelAck(cid=cid) - | Error(cid=cid) # RPC-task ctx specific + + # `.cid` means RPC-ctx-task specific + | Error(cid=cid) + + # recv-side `MsgType` decode violation + | MsgTypeError(cid=cid) ): # deliver response to local caller/waiter # via its per-remote-context memory channel. -- 2.34.1 From 213e7dbb67b9129b0faa3d5698dcbd096a75234e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 20:23:55 -0400 Subject: [PATCH 040/305] Add msg-from-dict constructor helper Handy for re-constructing a struct-`MsgType` from a `dict` decoded from wire-bytes wherein the msg failed to decode normally due to a field type error but you'd still like to show the "potential" msg in struct form, say inside a `MsgTypeError`'s meta data. Supporting deats: - add a `.msg.types.from_dict_msg()` to implement it (the helper). - also a `.msg.types._msg_table: dict[str, MsgType]` for supporting this func ^ as well as providing just a general `MsgType`-by-`str`-name lookup. Unrelated: - Drop commented idea for still supporting `dict`-msg set via `enc/dec_hook()`s that would translate to/from `MsgType`s, but that would require a duplicate impl in the runtime.. so eff that XD --- tractor/msg/pretty_struct.py | 1 + tractor/msg/types.py | 117 ++++++++++++----------------------- 2 files changed, 40 insertions(+), 78 deletions(-) diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index 412b6ed6..a67bbd26 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -140,6 +140,7 @@ class Struct( return sin_props + # TODO: make thisi a mod-func! def pformat( self, field_indent: int = 2, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 14db09cd..9787504b 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -447,6 +447,29 @@ class Error( _msg_dict: dict|None = None +def from_dict_msg( + dict_msg: dict, + + msgT: MsgType|None = None, + tag_field: str = 'msg_type' + +) -> MsgType: + ''' + Helper to build a specific `MsgType` struct from + a "vanilla" decoded `dict`-ified equivalent of the + msg: i.e. if the `msgpack.Decoder.type == Any`. + + ''' + msg_type_tag_field: str = ( + msgT.__struct_config__.tag_field + if msgT is not None + else tag_field + ) + # XXX ensure tag field is removed + msgT_name: str = dict_msg.pop(msg_type_tag_field) + msgT: MsgType = _msg_table[msgT_name] + return msgT(**dict_msg) + # TODO: should be make a msg version of `ContextCancelled?` # and/or with a scope field or a full `ActorCancelled`? # class Cancelled(Msg): @@ -498,12 +521,18 @@ _payload_msgs: list[Msg] = [ # built-in SC shuttle protocol msg type set in # approx order of the IPC txn-state spaces. -__msg_types__: list[Msg] = ( +__msg_types__: list[MsgType] = ( _runtime_msgs + _payload_msgs ) + +_msg_table: dict[str, MsgType] = { + msgT.__name__: msgT + for msgT in __msg_types__ +} + # TODO: use new type declaration syntax for msg-type-spec # https://docs.python.org/3/library/typing.html#type-aliases # https://docs.python.org/3/reference/simple_stmts.html#type @@ -660,6 +689,11 @@ def mk_msg_spec( 'Generating new IPC msg-spec\n' f'{ipc_spec}\n' ) + assert ( + ipc_spec + and + ipc_spec is not Any + ) return ( ipc_spec, msgtypes_table[spec_build_method] + ipc_msg_types, @@ -669,9 +703,9 @@ def mk_msg_spec( # TODO: make something similar to this inside `._codec` such that # user can just pass a type table of some sort? # -[ ] we would need to decode all msgs to `pretty_struct.Struct` -# and then call `.to_dict()` on them? +# and then call `.to_dict()` on them? # -[ ] we're going to need to re-impl all the stuff changed in the -# runtime port such that it can handle dicts or `Msg`s? +# runtime port such that it can handle dicts or `Msg`s? # # def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: # ''' @@ -679,88 +713,15 @@ def mk_msg_spec( # manual convertion from our above native `Msg` set # to `dict` equivalent (wire msgs) in order to keep legacy compat # with the original runtime implementation. - +# # Note: this is is/was primarly used while moving the core # runtime over to using native `Msg`-struct types wherein we # start with the send side emitting without loading # a typed-decoder and then later flipping the switch over to # load to the native struct types once all runtime usage has # been adjusted appropriately. - +# # ''' -# def enc_to_dict(msg: Any) -> Any: -# ''' -# Encode `Msg`-structs to `dict` msgs instead -# of using `msgspec.msgpack.Decoder.type`-ed -# features. - -# ''' -# match msg: -# case Start(): -# dctmsg: dict = pretty_struct.Struct.to_dict( -# msg -# )['pld'] - -# case Error(): -# dctmsg: dict = pretty_struct.Struct.to_dict( -# msg -# )['pld'] -# return {'error': dctmsg} - - -# def dec_from_dict( -# type: Type, -# obj: Any, -# ) -> Any: -# ''' -# Decode to `Msg`-structs from `dict` msgs instead -# of using `msgspec.msgpack.Decoder.type`-ed -# features. - -# ''' -# cid: str = obj.get('cid') -# match obj: -# case {'cmd': pld}: -# return Start( -# cid=cid, -# pld=pld, -# ) -# case {'functype': pld}: -# return StartAck( -# cid=cid, -# functype=pld, -# # pld=IpcCtxSpec( -# # functype=pld, -# # ), -# ) -# case {'started': pld}: -# return Started( -# cid=cid, -# pld=pld, -# ) -# case {'yield': pld}: -# return Yield( -# cid=obj['cid'], -# pld=pld, -# ) -# case {'stop': pld}: -# return Stop( -# cid=cid, -# ) -# case {'return': pld}: -# return Return( -# cid=cid, -# pld=pld, -# ) - -# case {'error': pld}: -# return Error( -# cid=cid, -# pld=ErrorData( -# **pld -# ), -# ) - # return ( # # enc_to_dict, # dec_from_dict, -- 2.34.1 From d4d1dca81296e119e09fac15ad5ca59b58bf7159 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 20:42:54 -0400 Subject: [PATCH 041/305] Expose `tractor.msg.PayloadT` from subpkg --- tractor/msg/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 443b781b..8f13f5f8 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -53,6 +53,9 @@ from .types import ( Error as Error, + # type-var for `.pld` field + PayloadT as PayloadT, + # full msg class set from above as list __msg_types__ as __msg_types__, -- 2.34.1 From 797f7f6d63e981c13ce1402f7bf878aff0780243 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 21:04:48 -0400 Subject: [PATCH 042/305] Add custom `MsgCodec.__repr__()` Sure makes console grokability a lot better by showing only the customizeable fields. Further, clean up `mk_codec()` a bunch by removing the `ipc_msg_spec` param since we don't plan to support another msg-set (for now) which allows cleaning out a buncha logic that was mostly just a source of bugs.. Also, - add temporary `log.info()` around codec application. - throw in some sanity `assert`s to `limit_msg_spec()`. - add but mask out the `extend_msg_spec()` idea since it seems `msgspec` won't allow `Decoder.type` extensions when using a custom `dec_hook()` for some extension type.. (not sure what approach to take here yet). --- tractor/msg/_codec.py | 137 +++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 54 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index de3316c8..e117457f 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -37,6 +37,7 @@ from contextlib import ( # ContextVar, # Token, # ) +import textwrap from typing import ( Any, Callable, @@ -59,7 +60,9 @@ from tractor.msg.types import ( mk_msg_spec, MsgType, ) +from tractor.log import get_logger +log = get_logger(__name__) # TODO: overall IPC msg-spec features (i.e. in this mod)! # @@ -87,6 +90,27 @@ class MsgCodec(Struct): pld_spec: Union[Type[Struct]]|None + def __repr__(self) -> str: + speclines: str = textwrap.indent( + self.pformat_msg_spec(), + prefix=' '*3, + ) + body: str = textwrap.indent( + f'|_lib = {self.lib.__name__!r}\n' + f'|_enc_hook: {self.enc.enc_hook}\n' + f'|_dec_hook: {self.dec.dec_hook}\n' + f'|_pld_spec: {self.pld_spec_str}\n' + # f'|\n' + f'|__msg_spec__:\n' + f'{speclines}\n', + prefix=' '*2, + ) + return ( + f'<{type(self).__name__}(\n' + f'{body}' + ')>' + ) + @property def pld_spec_str(self) -> str: spec: Union[Type]|Type = self.pld_spec @@ -163,8 +187,8 @@ class MsgCodec(Struct): ) -> bytes: ''' - Encode input python objects to `msgpack` bytes for transfer - on a tranport protocol connection. + Encode input python objects to `msgpack` bytes for + transfer on a tranport protocol connection. ''' return self._enc.encode(py_obj) @@ -325,15 +349,9 @@ class MsgCodec(Struct): def mk_codec( - ipc_msg_spec: Union[Type[Struct]]|Any|None = None, - # - # ^TODO^: in the long run, do we want to allow using a diff IPC `Msg`-set? - # it would break the runtime, but maybe say if you wanted - # to add some kinda field-specific or wholesale `.pld` ecryption? - # struct type unions set for `Decoder` # https://jcristharif.com/msgspec/structs.html#tagged-unions - ipc_pld_spec: Union[Type[Struct]]|Any|None = None, + ipc_pld_spec: Union[Type[Struct]]|Any = Any, # TODO: offering a per-msg(-field) type-spec such that # the fields can be dynamically NOT decoded and left as `Raw` @@ -352,7 +370,6 @@ def mk_codec( dec_hook: Callable|None = None, enc_hook: Callable|None = None, # ------ - ------ - **kwargs, # # Encoder: # write_buffer_size=write_buffer_size, @@ -367,44 +384,19 @@ def mk_codec( `msgspec` ;). ''' - if ( - ipc_msg_spec is not None - and ipc_pld_spec - ): - raise RuntimeError( - f'If a payload spec is provided,\n' - "the builtin SC-shuttle-protocol's msg set\n" - f'(i.e. a `{MsgType}`) MUST be used!\n\n' - f'However both values were passed as => mk_codec(\n' - f' ipc_msg_spec={ipc_msg_spec}`\n' - f' ipc_pld_spec={ipc_pld_spec}`\n)\n' - ) - - elif ( - ipc_pld_spec - and - - # XXX required for now (or maybe forever?) until - # we can dream up a way to allow parameterizing and/or - # custom overrides to the `Msg`-spec protocol itself? - ipc_msg_spec is None - ): - # (manually) generate a msg-payload-spec for all relevant - # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` - # for the decoder such that all sub-type msgs in our SCIPP - # will automatically decode to a type-"limited" payload (`Struct`) - # object (set). - ( - ipc_msg_spec, - msg_types, - ) = mk_msg_spec( - payload_type_union=ipc_pld_spec, - ) - assert len(ipc_msg_spec.__args__) == len(msg_types) - assert ipc_msg_spec - - else: - ipc_msg_spec = ipc_msg_spec or Any + # (manually) generate a msg-payload-spec for all relevant + # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # for the decoder such that all sub-type msgs in our SCIPP + # will automatically decode to a type-"limited" payload (`Struct`) + # object (set). + ( + ipc_msg_spec, + msg_types, + ) = mk_msg_spec( + payload_type_union=ipc_pld_spec, + ) + assert len(ipc_msg_spec.__args__) == len(msg_types) + assert ipc_msg_spec enc = msgpack.Encoder( enc_hook=enc_hook, @@ -418,8 +410,6 @@ def mk_codec( _enc=enc, _dec=dec, pld_spec=ipc_pld_spec, - # payload_msg_specs=payload_msg_specs, - # **kwargs, ) # sanity on expected backend support @@ -500,8 +490,16 @@ def apply_codec( - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py ''' + __tracebackhide__: bool = True orig: MsgCodec = _ctxvar_MsgCodec.get() assert orig is not codec + if codec.pld_spec is None: + breakpoint() + + log.info( + 'Applying new msg-spec codec\n\n' + f'{codec}\n' + ) token: RunVarToken = _ctxvar_MsgCodec.set(codec) # TODO: for TreeVar approach, see docs for @cm `.being()` API: @@ -518,7 +516,10 @@ def apply_codec( _ctxvar_MsgCodec.reset(token) assert _ctxvar_MsgCodec.get() is orig - + log.info( + 'Reverted to last msg-spec codec\n\n' + f'{orig}\n' + ) def current_codec() -> MsgCodec: ''' @@ -532,14 +533,15 @@ def current_codec() -> MsgCodec: @cm def limit_msg_spec( - payload_types: Union[Type[Struct]], + payload_spec: Union[Type[Struct]], # TODO: don't need this approach right? # -> related to the `MsgCodec._payload_decs` stuff above.. # tagged_structs: list[Struct]|None = None, **codec_kwargs, -): + +) -> MsgCodec: ''' Apply a `MsgCodec` that will natively decode the SC-msg set's `Msg.pld: Union[Type[Struct]]` payload fields using @@ -547,10 +549,37 @@ def limit_msg_spec( for all IPC contexts in use by the current `trio.Task`. ''' + __tracebackhide__: bool = True + curr_codec = current_codec() msgspec_codec: MsgCodec = mk_codec( - payload_types=payload_types, + ipc_pld_spec=payload_spec, **codec_kwargs, ) with apply_codec(msgspec_codec) as applied_codec: assert applied_codec is msgspec_codec yield msgspec_codec + + assert curr_codec is current_codec() + + +# XXX: msgspec won't allow this with non-struct custom types +# like `NamespacePath`!@! +# @cm +# def extend_msg_spec( +# payload_spec: Union[Type[Struct]], + +# ) -> MsgCodec: +# ''' +# Extend the current `MsgCodec.pld_spec` (type set) by extending +# the payload spec to **include** the types specified by +# `payload_spec`. + +# ''' +# codec: MsgCodec = current_codec() +# pld_spec: Union[Type] = codec.pld_spec +# extended_spec: Union[Type] = pld_spec|payload_spec + +# with limit_msg_spec(payload_types=extended_spec) as ext_codec: +# # import pdbp; pdbp.set_trace() +# assert ext_codec.pld_spec == extended_spec +# yield ext_codec -- 2.34.1 From 304590abaa4624122758bed678f38e477c2e1ec7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Apr 2024 21:24:02 -0400 Subject: [PATCH 043/305] Tweak some `pformat_boxed_tb()` indent inputs - add some `tb_str: str` indent-prefix args for diff indent levels for the body vs. the surrounding "ascii box". - ^-use it-^ from `RemoteActorError.__repr()__` obvi. - use new `msg.types.from_dict_msg()` in impl of `MsgTypeError.payload_msg`, handy for showing what the message "would have looked like in `Struct` form" had it not failed it's type constraints. --- tractor/_exceptions.py | 73 ++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index a31aa11e..31b7b36e 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -40,7 +40,7 @@ from tractor._state import current_actor from tractor.log import get_logger from tractor.msg import ( Error, - Msg, + MsgType, Stop, Yield, pretty_struct, @@ -130,7 +130,10 @@ def pformat_boxed_tb( tb_str: str, fields_str: str|None = None, field_prefix: str = ' |_', - indent: str = ' '*2 + + tb_box_indent: int|None = None, + tb_body_indent: int = 1, + ) -> str: if ( fields_str @@ -139,15 +142,19 @@ def pformat_boxed_tb( ): fields: str = textwrap.indent( fields_str, - # prefix=' '*2, - # prefix=' |_', prefix=field_prefix, ) else: fields = fields_str or '' - # body_indent: str = len(field_prefix) * ' ' - body: str = ( + tb_body = tb_str + if tb_body_indent: + tb_body: str = textwrap.indent( + tb_str, + prefix=tb_body_indent * ' ', + ) + + tb_box: str = ( # orig # f' |\n' @@ -158,21 +165,29 @@ def pformat_boxed_tb( f'|\n' f' ------ - ------\n\n' - f'{tb_str}\n' + # f'{tb_str}\n' + f'{tb_body}' f' ------ - ------\n' f'_|\n' ) - if len(indent): - body: str = textwrap.indent( - body, - # prefix=body_indent, - prefix=indent, + tb_box_indent: str = ( + tb_box_indent + or + 1 + + # (len(field_prefix)) + # ? ^-TODO-^ ? if you wanted another indent level + ) + if tb_box_indent > 0: + tb_box: str = textwrap.indent( + tb_box, + prefix=tb_box_indent * ' ', ) return ( fields + - body + tb_box ) @@ -316,7 +331,7 @@ class RemoteActorError(Exception): if self._ipc_msg is None: return None - msg_type: Msg = type(self._ipc_msg) + msg_type: MsgType = type(self._ipc_msg) fields: dict[str, Any] = { k: v for _, k, v in pretty_struct.iter_fields(self._ipc_msg) @@ -493,7 +508,10 @@ class RemoteActorError(Exception): tb_str=self.tb_str, fields_str=fields, field_prefix=' |_', - indent=' ', # no indent? + # ^- is so that it's placed like so, + # just after dict[str, Any]: ''' - If the underlying IPC `Msg` was received from a remote + If the underlying IPC `MsgType` was received from a remote actor but was unable to be decoded to a native `Yield`|`Started`|`Return` struct, the interchange backend native format decoder can be used to stash a `dict` @@ -643,22 +661,21 @@ class MsgTypeError( return self.msgdata.get('_msg_dict') @property - def payload_msg(self) -> Msg|None: + def payload_msg( + self, + ) -> MsgType|None: ''' Attempt to construct what would have been the original - `Msg`-with-payload subtype (i.e. an instance from the set + `MsgType`-with-payload subtype (i.e. an instance from the set of msgs in `.msg.types._payload_msgs`) which failed validation. ''' - msg_dict: dict = self.msg_dict.copy() - name: str = msg_dict.pop('msg_type') - msg_type: Msg = getattr( - msgtypes, - name, - Msg, - ) - return msg_type(**msg_dict) + if msg_dict := self.msg_dict.copy(): + return msgtypes.from_dict_msg( + dict_msg=msg_dict, + ) + return None @property def cid(self) -> str: @@ -908,7 +925,7 @@ def is_multi_cancelled(exc: BaseException) -> bool: def _raise_from_no_key_in_msg( ctx: Context, - msg: Msg, + msg: MsgType, src_err: KeyError, log: StackLevelAdapter, # caller specific `log` obj -- 2.34.1 From 9ea5aa1cde0e2cab4844eec2ee158e58a2782d35 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 12 Apr 2024 11:47:10 -0400 Subject: [PATCH 044/305] TOSQUASH 322e015d Fix `mk_codec()` input arg --- tractor/msg/_codec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e117457f..82fd2011 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -420,7 +420,7 @@ def mk_codec( # instance of the default `msgspec.msgpack` codec settings, i.e. # no custom structs, hooks or other special types. -_def_msgspec_codec: MsgCodec = mk_codec(ipc_msg_spec=Any) +_def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # The built-in IPC `Msg` spec. # Our composing "shuttle" protocol which allows `tractor`-app code -- 2.34.1 From 9381d21281da85c9e9a25bd6caa9da71c7c24f61 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 12 Apr 2024 11:49:50 -0400 Subject: [PATCH 045/305] Extend recv-side `MsgTypeError` default message Display the new `MsgCodec.pld_spec_str` and format the incorrect field value to be placed entirely (txt block wise) right of the "type annot" part of the line: Iow if you had a bad `dict` value where something else should be it'd look something like this: MsgTypeError: + import textwrap + # `Channel.send()` case if src_validation_error is None: # send-side @@ -209,10 +211,24 @@ def _mk_msg_type_err( msg, _, maybe_field = msgspec_msg.rpartition('$.') obj = object() if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) message += ( f'{msg.rstrip("`")}\n\n' - f'{msg_type}\n' - f' |_.{maybe_field}: {codec.pld_spec_str} = {field_val!r}\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' ) msgtyperr = MsgTypeError.from_decode( @@ -338,7 +354,7 @@ class MsgpackTCPStream(MsgTransport): # self._task = task self._codec = codec log.runtime( - 'Using new codec in {self}.recv()\n' + f'Using new codec in {self}.recv()\n' f'codec: {self._codec}\n\n' f'msg_bytes: {msg_bytes}\n' ) @@ -420,7 +436,7 @@ class MsgpackTCPStream(MsgTransport): if self._codec.pld_spec != codec.pld_spec: self._codec = codec log.runtime( - 'Using new codec in {self}.send()\n' + f'Using new codec in {self}.send()\n' f'codec: {self._codec}\n\n' f'msg: {msg}\n' ) -- 2.34.1 From 2995a6afb78e370f3f64cd390cbe07623971be72 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 13 Apr 2024 15:19:08 -0400 Subject: [PATCH 046/305] IPC ctx refinements around `MsgTypeError` awareness Add a bit of special handling for msg-type-errors with a dedicated log-msg detailing which `.side: str` is the sender/causer and avoiding a `._scope.cancel()` call in such cases since the local task might be written to handle and tolerate the badly (typed) IPC msg. As part of ^, change the ctx task-pair "side" semantics from "caller" -> "callee" to be "parent" -> "child" which better matches the cross-process SC-linked-task supervision hierarchy, and `trio.Nursery.parent_task`; in `trio` the task that opens a nursery is also named the "parent". Impl deats / fixes around the `.side` semantics: - ensure that `._portal: Portal` is set ASAP after `Actor.start_remote_task()` such that if the `Started` transaction fails, the parent-vs.-child sides are still denoted correctly (since `._portal` being set is the predicate for that). - add a helper func `Context.peer_side(side: str) -> str:` which inverts from "child" to "parent" and vice versa, useful for logging info. Other tweaks: - make `_drain_to_final_msg()` return a tuple of a maybe-`Return` and the list of other `pre_result_drained: list[MsgType]` such that we don't ever have to warn about the return msg getting captured as a pre-"result" msg. - Add some strictness flags to `.started()` which allow for toggling whether to error or warn log about mismatching roundtripped `Started` msgs prior to IPC transit. --- tractor/_context.py | 179 ++++++++++++++++++++++++++++++++------------ 1 file changed, 132 insertions(+), 47 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 1d0f67f0..2d9e6363 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -47,6 +47,7 @@ import trio from ._exceptions import ( ContextCancelled, InternalError, + MsgTypeError, RemoteActorError, StreamOverrun, pack_from_raise, @@ -59,12 +60,14 @@ from .msg import ( MsgType, MsgCodec, NamespacePath, + PayloadT, Return, Started, Stop, Yield, current_codec, pretty_struct, + types as msgtypes, ) from ._ipc import Channel from ._streaming import MsgStream @@ -88,7 +91,10 @@ async def _drain_to_final_msg( hide_tb: bool = True, msg_limit: int = 6, -) -> list[dict]: +) -> tuple[ + Return|None, + list[MsgType] +]: ''' Drain IPC msgs delivered to the underlying rx-mem-chan `Context._recv_chan` from the runtime in search for a final @@ -109,6 +115,7 @@ async def _drain_to_final_msg( # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. pre_result_drained: list[MsgType] = [] + return_msg: Return|None = None while not ( ctx.maybe_error and not ctx._final_result_is_set() @@ -169,8 +176,6 @@ async def _drain_to_final_msg( # pray to the `trio` gawds that we're corrent with this # msg: dict = await ctx._recv_chan.receive() msg: MsgType = await ctx._recv_chan.receive() - # always capture unexpected/non-result msgs - pre_result_drained.append(msg) # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -207,11 +212,13 @@ async def _drain_to_final_msg( # if ctx._recv_chan: # await ctx._recv_chan.aclose() # TODO: ^ we don't need it right? + return_msg = msg break # far end task is still streaming to us so discard # and report depending on local ctx state. case Yield(): + pre_result_drained.append(msg) if ( (ctx._stream.closed and (reason := 'stream was already closed') @@ -236,7 +243,10 @@ async def _drain_to_final_msg( f'{pformat(msg)}\n' ) - return pre_result_drained + return ( + return_msg, + pre_result_drained, + ) # drain up to the `msg_limit` hoping to get # a final result or error/ctxc. @@ -260,6 +270,7 @@ async def _drain_to_final_msg( # -[ ] should be a runtime error if a stream is open right? # Stop() case Stop(): + pre_result_drained.append(msg) log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' f'{pformat(msg)}\n' @@ -269,7 +280,6 @@ async def _drain_to_final_msg( # remote error msg, likely already handled inside # `Context._deliver_msg()` case Error(): - # TODO: can we replace this with `ctx.maybe_raise()`? # -[ ] would this be handier for this case maybe? # async with maybe_raise_on_exit() as raises: @@ -336,6 +346,7 @@ async def _drain_to_final_msg( # XXX should pretty much never get here unless someone # overrides the default `MsgType` spec. case _: + pre_result_drained.append(msg) # It's definitely an internal error if any other # msg type without a`'cid'` field arrives here! if not msg.cid: @@ -352,7 +363,10 @@ async def _drain_to_final_msg( f'{ctx.outcome}\n' ) - return pre_result_drained + return ( + return_msg, + pre_result_drained, + ) class Unresolved: @@ -719,21 +733,36 @@ class Context: Return string indicating which task this instance is wrapping. ''' - return 'caller' if self._portal else 'callee' + return 'parent' if self._portal else 'child' + @staticmethod + def peer_side(side: str) -> str: + match side: + case 'child': + return 'parent' + case 'parent': + return 'child' + + # TODO: remove stat! + # -[ ] re-implement the `.experiemental._pubsub` stuff + # with `MsgStream` and that should be last usage? + # -[ ] remove from `tests/legacy_one_way_streaming.py`! async def send_yield( self, data: Any, - ) -> None: + ''' + Deprecated method for what now is implemented in `MsgStream`. + We need to rework / remove some stuff tho, see above. + + ''' warnings.warn( "`Context.send_yield()` is now deprecated. " "Use ``MessageStream.send()``. ", DeprecationWarning, stacklevel=2, ) - # await self.chan.send({'yield': data, 'cid': self.cid}) await self.chan.send( Yield( cid=self.cid, @@ -742,12 +771,11 @@ class Context: ) async def send_stop(self) -> None: - # await pause() - # await self.chan.send({ - # # Stop( - # 'stop': True, - # 'cid': self.cid - # }) + ''' + Terminate a `MsgStream` dialog-phase by sending the IPC + equiv of a `StopIteration`. + + ''' await self.chan.send( Stop(cid=self.cid) ) @@ -843,6 +871,7 @@ class Context: # self-cancel (ack) or, # peer propagated remote cancellation. + msgtyperr: bool = False if isinstance(error, ContextCancelled): whom: str = ( @@ -854,6 +883,16 @@ class Context: f'{error}' ) + elif isinstance(error, MsgTypeError): + msgtyperr = True + peer_side: str = self.peer_side(self.side) + log.error( + f'IPC dialog error due to msg-type caused by {peer_side!r} side\n\n' + + f'{error}\n' + f'{pformat(self)}\n' + ) + else: log.error( f'Remote context error:\n\n' @@ -894,9 +933,9 @@ class Context: # if `._cancel_called` then `.cancel_acked and .cancel_called` # always should be set. and not self._is_self_cancelled() - and not cs.cancel_called and not cs.cancelled_caught + and not msgtyperr ): # TODO: it'd sure be handy to inject our own # `trio.Cancelled` subtype here ;) @@ -1004,7 +1043,7 @@ class Context: # when the runtime finally receives it during teardown # (normally in `.result()` called from # `Portal.open_context().__aexit__()`) - if side == 'caller': + if side == 'parent': if not self._portal: raise InternalError( 'No portal found!?\n' @@ -1426,7 +1465,10 @@ class Context: # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" # msgs still in transit from the far end. - drained_msgs: list[dict] = await _drain_to_final_msg( + ( + return_msg, + drained_msgs, + ) = await _drain_to_final_msg( ctx=self, hide_tb=hide_tb, ) @@ -1444,7 +1486,10 @@ class Context: log.cancel( 'Ctx drained pre-result msgs:\n' - f'{pformat(drained_msgs)}' + f'{pformat(drained_msgs)}\n\n' + + f'Final return msg:\n' + f'{return_msg}\n' ) self.maybe_raise( @@ -1611,7 +1656,13 @@ class Context: async def started( self, - value: Any | None = None + + # TODO: how to type this so that it's the + # same as the payload type? Is this enough? + value: PayloadT|None = None, + + strict_parity: bool = False, + complain_no_parity: bool = True, ) -> None: ''' @@ -1632,7 +1683,7 @@ class Context: f'called `.started()` twice on context with {self.chan.uid}' ) - started = Started( + started_msg = Started( cid=self.cid, pld=value, ) @@ -1653,28 +1704,54 @@ class Context: # https://zguide.zeromq.org/docs/chapter7/#The-Cheap-or-Nasty-Pattern # codec: MsgCodec = current_codec() - msg_bytes: bytes = codec.encode(started) + msg_bytes: bytes = codec.encode(started_msg) try: # be a "cheap" dialog (see above!) - rt_started = codec.decode(msg_bytes) - if rt_started != started: + if ( + strict_parity + or + complain_no_parity + ): + rt_started: Started = codec.decode(msg_bytes) - # TODO: break these methods out from the struct subtype? - diff = pretty_struct.Struct.__sub__(rt_started, started) + # XXX something is prolly totes cucked with the + # codec state! + if isinstance(rt_started, dict): + rt_started = msgtypes.from_dict_msg( + dict_msg=rt_started, + ) + raise RuntimeError( + 'Failed to roundtrip `Started` msg?\n' + f'{pformat(rt_started)}\n' + ) - complaint: str = ( - 'Started value does not match after codec rountrip?\n\n' - f'{diff}' - ) - # TODO: rn this will pretty much always fail with - # any other sequence type embeded in the - # payload... - if self._strict_started: - raise ValueError(complaint) - else: - log.warning(complaint) + if rt_started != started_msg: + # TODO: break these methods out from the struct subtype? - await self.chan.send(rt_started) + diff = pretty_struct.Struct.__sub__( + rt_started, + started_msg, + ) + complaint: str = ( + 'Started value does not match after codec rountrip?\n\n' + f'{diff}' + ) + + # TODO: rn this will pretty much always fail with + # any other sequence type embeded in the + # payload... + if ( + self._strict_started + or + strict_parity + ): + raise ValueError(complaint) + else: + log.warning(complaint) + + # started_msg = rt_started + + await self.chan.send(started_msg) # raise any msg type error NO MATTER WHAT! except msgspec.ValidationError as verr: @@ -1685,7 +1762,7 @@ class Context: src_validation_error=verr, verb_header='Trying to send payload' # > 'invalid `Started IPC msgs\n' - ) + ) from verr self._started_called = True @@ -1786,13 +1863,17 @@ class Context: else: log_meth = log.runtime - log_meth( - f'Delivering error-msg to caller\n\n' + side: str = self.side - f'<= peer: {from_uid}\n' + peer_side: str = self.peer_side(side) + + log_meth( + f'Delivering IPC ctx error from {peer_side!r} to {side!r} task\n\n' + + f'<= peer {peer_side!r}: {from_uid}\n' f' |_ {nsf}()\n\n' - f'=> cid: {cid}\n' + f'=> {side!r} cid: {cid}\n' f' |_{self._task}\n\n' f'{pformat(re)}\n' @@ -1807,6 +1888,7 @@ class Context: self._maybe_cancel_and_set_remote_error(re) # XXX only case where returning early is fine! + structfmt = pretty_struct.Struct.pformat if self._in_overrun: log.warning( f'Queueing OVERRUN msg on caller task:\n' @@ -1816,7 +1898,7 @@ class Context: f'=> cid: {cid}\n' f' |_{self._task}\n\n' - f'{pformat(msg)}\n' + f'{structfmt(msg)}\n' ) self._overflow_q.append(msg) return False @@ -1830,7 +1912,7 @@ class Context: f'=> {self._task}\n' f' |_cid={self.cid}\n\n' - f'{pformat(msg)}\n' + f'{structfmt(msg)}\n' ) # NOTE: if an error is deteced we should always still @@ -2050,6 +2132,9 @@ async def open_context_from_portal( # place.. allow_overruns=allow_overruns, ) + # ASAP, so that `Context.side: str` can be determined for + # logging / tracing / debug! + ctx._portal: Portal = portal assert ctx._remote_func_type == 'context' msg: Started = await ctx._recv_chan.receive() @@ -2068,10 +2153,10 @@ async def open_context_from_portal( msg=msg, src_err=src_error, log=log, - expect_key='started', + expect_msg=Started, + # expect_key='started', ) - ctx._portal: Portal = portal uid: tuple = portal.channel.uid cid: str = ctx.cid -- 2.34.1 From 515d5faa0a29427575c2baca68bb8f261188b9c1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 16:29:21 -0400 Subject: [PATCH 047/305] Add `from_dict_msg(user_pretty: bool)` flag Allows for optionally (and dynamically) constructing the "expected" `MsgType` from a `dict` into a `pretty_struct.Struct`, mostly for logging usage. --- tractor/msg/types.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 9787504b..f7654f62 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -451,7 +451,8 @@ def from_dict_msg( dict_msg: dict, msgT: MsgType|None = None, - tag_field: str = 'msg_type' + tag_field: str = 'msg_type', + use_pretty: bool = False, ) -> MsgType: ''' @@ -468,6 +469,19 @@ def from_dict_msg( # XXX ensure tag field is removed msgT_name: str = dict_msg.pop(msg_type_tag_field) msgT: MsgType = _msg_table[msgT_name] + if use_pretty: + msgT = defstruct( + name=msgT_name, + fields=[ + (key, fi.type) + for fi, key, _ + in pretty_struct.iter_fields(msgT) + ], + bases=( + pretty_struct.Struct, + msgT, + ), + ) return msgT(**dict_msg) # TODO: should be make a msg version of `ContextCancelled?` -- 2.34.1 From 956ff11863fab0b943c8a2165461d253fa6ee7fb Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 16:32:18 -0400 Subject: [PATCH 048/305] Add `MsgTypeError.expected_msg_type` Which matches with renaming `.payload_msg` -> `.expected_msg` which is the value we attempt to construct from a vanilla-msgppack decode-to-`dict` and then construct manually into a `MsgType` using `.msg.types.from_dict_msg()`. Add a todo to use new `use_pretty` flag which currently conflicts with `._exceptions.pformat_boxed_type()` prefix formatting.. --- tractor/_exceptions.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 31b7b36e..259994a1 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -43,9 +43,12 @@ from tractor.msg import ( MsgType, Stop, Yield, - pretty_struct, types as msgtypes, ) +from tractor.msg.pretty_struct import ( + iter_fields, + Struct, +) if TYPE_CHECKING: from ._context import Context @@ -82,7 +85,7 @@ class InternalError(RuntimeError): _ipcmsg_keys: list[str] = [ fi.name for fi, k, v - in pretty_struct.iter_fields(Error) + in iter_fields(Error) ] @@ -321,7 +324,7 @@ class RemoteActorError(Exception): assert self.boxed_type is boxed_type @property - def ipc_msg(self) -> pretty_struct.Struct: + def ipc_msg(self) -> Struct: ''' Re-render the underlying `._ipc_msg: Msg` as a `pretty_struct.Struct` for introspection such that the @@ -334,12 +337,12 @@ class RemoteActorError(Exception): msg_type: MsgType = type(self._ipc_msg) fields: dict[str, Any] = { k: v for _, k, v in - pretty_struct.iter_fields(self._ipc_msg) + iter_fields(self._ipc_msg) } return defstruct( msg_type.__name__, fields=fields.keys(), - bases=(msg_type, pretty_struct.Struct), + bases=(msg_type, Struct), )(**fields) @property @@ -641,11 +644,11 @@ class MsgTypeError( ''' reprol_fields: list[str] = [ - 'payload_msg', + 'expected_msg_type', ] extra_body_fields: list[str] = [ 'cid', - 'payload_msg', + 'expected_msg', ] @property @@ -661,9 +664,7 @@ class MsgTypeError( return self.msgdata.get('_msg_dict') @property - def payload_msg( - self, - ) -> MsgType|None: + def expected_msg(self) -> MsgType|None: ''' Attempt to construct what would have been the original `MsgType`-with-payload subtype (i.e. an instance from the set @@ -674,9 +675,17 @@ class MsgTypeError( if msg_dict := self.msg_dict.copy(): return msgtypes.from_dict_msg( dict_msg=msg_dict, + # use_pretty=True, + # ^-TODO-^ would luv to use this BUT then the + # `field_prefix` in `pformat_boxed_tb()` cucks it + # all up.. XD ) return None + @property + def expected_msg_type(self) -> Type[MsgType]|None: + return type(self.expected_msg) + @property def cid(self) -> str: # pre-packed using `.from_decode()` constructor -- 2.34.1 From 7ca746e96eb691087516c193cf996d58b6be85aa Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 18:31:41 -0400 Subject: [PATCH 049/305] Use `_raise_from_no_key_in_msg(allow_msgs)` Instead of `allow_msg_keys` since we've fully flipped over to struct-types for msgs in the runtime. - drop the loop from `MsgStream.receive_nowait()` since `Yield/Return.pld` getting will handle both (instead of a loop of `dict`-key reads). --- tractor/_context.py | 1 - tractor/_exceptions.py | 3 +-- tractor/_streaming.py | 48 ++++++++++++++++-------------------------- 3 files changed, 19 insertions(+), 33 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 2d9e6363..29fee0b1 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -2154,7 +2154,6 @@ async def open_context_from_portal( src_err=src_error, log=log, expect_msg=Started, - # expect_key='started', ) uid: tuple = portal.channel.uid diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 259994a1..65637fb5 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -938,7 +938,6 @@ def _raise_from_no_key_in_msg( src_err: KeyError, log: StackLevelAdapter, # caller specific `log` obj - expect_key: str = 'yield', expect_msg: str = Yield, stream: MsgStream | None = None, @@ -1053,7 +1052,7 @@ def _raise_from_no_key_in_msg( # is activated above. _type: str = 'Stream' if stream else 'Context' raise MessagingError( - f"{_type} was expecting a '{expect_key.upper()}' message" + f"{_type} was expecting a {expect_msg} message" " BUT received a non-error msg:\n" f'{pformat(msg)}' ) from src_err diff --git a/tractor/_streaming.py b/tractor/_streaming.py index fcf8dafc..ac4d482e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -44,6 +44,7 @@ from .trionics import ( BroadcastReceiver, ) from tractor.msg import ( + Return, Stop, Yield, ) @@ -82,7 +83,7 @@ class MsgStream(trio.abc.Channel): self, ctx: Context, # typing: ignore # noqa rx_chan: trio.MemoryReceiveChannel, - _broadcaster: BroadcastReceiver | None = None, + _broadcaster: BroadcastReceiver|None = None, ) -> None: self._ctx = ctx @@ -96,36 +97,26 @@ class MsgStream(trio.abc.Channel): # delegate directly to underlying mem channel def receive_nowait( self, - allow_msg_keys: list[str] = ['yield'], + allow_msgs: list[str] = Yield, ): - # msg: dict = self._rx_chan.receive_nowait() msg: Yield|Stop = self._rx_chan.receive_nowait() - for ( - i, - key, - ) in enumerate(allow_msg_keys): - try: - # return msg[key] - return msg.pld - # except KeyError as kerr: - except AttributeError as attrerr: - if i < (len(allow_msg_keys) - 1): - continue - - _raise_from_no_key_in_msg( - ctx=self._ctx, - msg=msg, - # src_err=kerr, - src_err=attrerr, - log=log, - expect_key=key, - stream=self, - ) + # TODO: replace msg equiv of this or does the `.pld` + # interface read already satisfy it? I think so, yes? + try: + return msg.pld + except AttributeError as attrerr: + _raise_from_no_key_in_msg( + ctx=self._ctx, + msg=msg, + src_err=attrerr, + log=log, + stream=self, + ) async def receive( self, - hide_tb: bool = True, + hide_tb: bool = False, ): ''' Receive a single msg from the IPC transport, the next in @@ -157,10 +148,9 @@ class MsgStream(trio.abc.Channel): try: try: msg: Yield = await self._rx_chan.receive() - # return msg['yield'] return msg.pld - # except KeyError as kerr: + # TODO: implement with match: instead? except AttributeError as attrerr: # src_err = kerr src_err = attrerr @@ -170,10 +160,8 @@ class MsgStream(trio.abc.Channel): _raise_from_no_key_in_msg( ctx=self._ctx, msg=msg, - # src_err=kerr, src_err=attrerr, log=log, - expect_key='yield', stream=self, ) @@ -304,7 +292,7 @@ class MsgStream(trio.abc.Channel): while not drained: try: maybe_final_msg = self.receive_nowait( - allow_msg_keys=['yield', 'return'], + allow_msgs=[Yield, Return], ) if maybe_final_msg: log.debug( -- 2.34.1 From 36bf58887d0a8cc96074bb866b244e5764559eea Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 19:31:50 -0400 Subject: [PATCH 050/305] Pass a `use_greenback: bool` runtime var to subs Such that the top level `maybe_enable_greenback` from `open_root_actor()` can toggle the entire actor tree's usage. Read the rtv in `._rpc` tasks and only enable if set. Also, rigor up the `._rpc.process_messages()` loop to handle `Error()` and `case _:` separately such that we now raise an explicit rte for unknown / invalid msgs. Use "parent" / "child" for side descriptions in loop comments and put a fat comment before the `StartAck` in `_invoke()`. --- tractor/_root.py | 12 +++++++-- tractor/_rpc.py | 68 ++++++++++++++++++++++++++++++++++-------------- 2 files changed, 58 insertions(+), 22 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index 4469f3ed..3209555e 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -117,6 +117,8 @@ async def open_root_actor( os.environ['PYTHONBREAKPOINT'] = ( 'tractor.devx._debug.pause_from_sync' ) + _state._runtime_vars['use_greenback'] = True + else: # TODO: disable `breakpoint()` by default (without # `greenback`) since it will break any multi-actor @@ -392,14 +394,20 @@ async def open_root_actor( _state._last_actor_terminated = actor # restore built-in `breakpoint()` hook state - if debug_mode: + if ( + debug_mode + and + maybe_enable_greenback + ): if builtin_bp_handler is not None: sys.breakpointhook = builtin_bp_handler + if orig_bp_path is not None: os.environ['PYTHONBREAKPOINT'] = orig_bp_path + else: # clear env back to having no entry - os.environ.pop('PYTHONBREAKPOINT') + os.environ.pop('PYTHONBREAKPOINT', None) logger.runtime("Root actor terminated") diff --git a/tractor/_rpc.py b/tractor/_rpc.py index c9754ebc..7e259c1e 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -41,7 +41,6 @@ from trio import ( TaskStatus, ) -from .msg import NamespacePath from ._ipc import Channel from ._context import ( Context, @@ -58,6 +57,11 @@ from ._exceptions import ( from .devx import _debug from . import _state from .log import get_logger +from .msg import ( + current_codec, + MsgCodec, + NamespacePath, +) from tractor.msg.types import ( CancelAck, Error, @@ -94,6 +98,7 @@ async def _invoke_non_context( Context | BaseException ] = trio.TASK_STATUS_IGNORED, ): + __tracebackhide__: bool = True # TODO: can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): @@ -394,7 +399,11 @@ async def _invoke( __tracebackhide__: bool = hide_tb treat_as_gen: bool = False - if _state.debug_mode(): + if ( + _state.debug_mode() + and + _state._runtime_vars['use_greenback'] + ): # XXX for .pause_from_sync()` usage we need to make sure # `greenback` is boostrapped in the subactor! await _debug.maybe_init_greenback() @@ -508,10 +517,22 @@ async def _invoke( # wrapper that calls `Context.started()` and then does # the `await coro()`? - # a "context" endpoint type is the most general and - # "least sugary" type of RPC ep with support for + # ------ - ------ + # a "context" endpoint is the most general and + # "least sugary" type of RPC with support for # bi-dir streaming B) - # StartAck + # + # the concurrency relation is simlar to a task nursery + # wherein a "parent" task (the one that enters + # `trio.open_nursery()` in some actor "opens" (via + # `Portal.open_context()`) an IPC ctx to another peer + # (which is maybe a sub-) actor who then schedules (aka + # `trio.Nursery.start()`s) a new "child" task to execute + # the `@context` annotated func; that is this func we're + # running directly below! + # ------ - ------ + # + # StartAck: respond immediately with endpoint info await chan.send( StartAck( cid=cid, @@ -520,11 +541,11 @@ async def _invoke( ) # TODO: should we also use an `.open_context()` equiv - # for this callee side by factoring the impl from + # for this child side by factoring the impl from # `Portal.open_context()` into a common helper? # # NOTE: there are many different ctx state details - # in a callee side instance according to current impl: + # in a child side instance according to current impl: # - `.cancelled_caught` can never be `True`. # -> the below scope is never exposed to the # `@context` marked RPC function. @@ -550,7 +571,7 @@ async def _invoke( # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, - # - *this* callee task manually calling `ctx.cancel()`. + # - *this* child task manually calling `ctx.cancel()`. # - the runtime calling `ctx._deliver_msg()` which # itself calls `ctx._maybe_cancel_and_set_remote_error()` # which cancels the scope presuming the input error @@ -627,10 +648,11 @@ async def _invoke( # f' |_{ctx}' ) - # task-contex was either cancelled by request using - # ``Portal.cancel_actor()`` or ``Context.cancel()`` - # on the far end, or it was cancelled by the local - # (callee) task, so relay this cancel signal to the + # task-contex was either cancelled by request + # using ``Portal.cancel_actor()`` or + # ``Context.cancel()`` on the far end, or it + # was cancelled by the local child (or callee) + # task, so relay this cancel signal to the # other side. ctxc = ContextCancelled( message=msg, @@ -651,7 +673,7 @@ async def _invoke( ) as scope_error: - # always set this (callee) side's exception as the + # always set this (child) side's exception as the # local error on the context ctx._local_error: BaseException = scope_error @@ -1020,9 +1042,8 @@ async def process_messages( trio.Event(), ) - # XXX remote (runtime scoped) error or uknown - # msg (type). - case Error() | _: + # runtime-scoped remote error (since no `.cid`) + case Error(): # NOTE: this is the non-rpc error case, # that is, an error **not** raised inside # a call to ``_invoke()`` (i.e. no cid was @@ -1030,10 +1051,6 @@ async def process_messages( # this error to all local channel # consumers (normally portals) by marking # the channel as errored - log.exception( - f'Unhandled IPC msg:\n\n' - f'{msg}\n' - ) # assert chan.uid chan._exc: Exception = unpack_error( msg, @@ -1041,6 +1058,17 @@ async def process_messages( ) raise chan._exc + # unknown/invalid msg type? + case _: + codec: MsgCodec = current_codec() + message: str = ( + f'Unhandled IPC msg for codec?\n\n' + f'|_{codec}\n\n' + f'{msg}\n' + ) + log.exception(message) + raise RuntimeError(message) + log.runtime( 'Waiting on next IPC msg from\n' f'peer: {chan.uid}\n' -- 2.34.1 From ca43f15aa02ed7fdfb781451d0becc524812a22a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 14 Apr 2024 19:50:09 -0400 Subject: [PATCH 051/305] More msg-spec tests tidying - Drop `test_msg_spec_xor_pld_spec()` since we no longer support `ipc_msg_spec` arg to `mk_codec()`. - Expect `MsgTypeError`s around `.open_context()` calls when `add_codec_hooks == False`. - toss in some `.pause()` points in the subactor ctx body whilst hacking out a `.pld` protocol for debug mode TTY locking. --- tests/test_caps_based_msging.py | 65 ++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index f7cab2a5..9a73ba8d 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -14,19 +14,20 @@ from typing import ( from contextvars import ( Context, ) -# from inspect import Parameter from msgspec import ( structs, msgpack, - # defstruct, Struct, ValidationError, ) import pytest import tractor -from tractor import _state +from tractor import ( + _state, + MsgTypeError, +) from tractor.msg import ( _codec, _ctxvar_MsgCodec, @@ -47,21 +48,6 @@ from tractor.msg.types import ( import trio -def test_msg_spec_xor_pld_spec(): - ''' - If the `.msg.types.Msg`-set is overridden, we - can't also support a `Msg.pld` spec. - - ''' - # apply custom hooks and set a `Decoder` which only - # loads `NamespacePath` types. - with pytest.raises(RuntimeError): - mk_codec( - ipc_msg_spec=Any, - ipc_pld_spec=NamespacePath, - ) - - def mk_custom_codec( pld_spec: Union[Type]|Any, add_hooks: bool, @@ -134,7 +120,9 @@ def mk_custom_codec( f'{uid}\n' 'FAILED DECODE\n' f'type-> {obj_type}\n' - f'obj-arg-> `{obj}`: {type(obj)}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n\n' + f'current codec:\n' + f'{current_codec()}\n' ) # TODO: figure out the ignore subsys for this! # -[ ] option whether to defense-relay backc the msg @@ -409,7 +397,9 @@ async def send_back_values( pld_spec=ipc_pld_spec, add_hooks=add_hooks, ) - with apply_codec(nsp_codec) as codec: + with ( + apply_codec(nsp_codec) as codec, + ): chk_codec_applied( expect_codec=nsp_codec, enter_value=codec, @@ -459,7 +449,7 @@ async def send_back_values( # XXX NOTE XXX THIS WON'T WORK WITHOUT SPECIAL # `str` handling! or special debug mode IPC # msgs! - # await tractor.pause() + await tractor.pause() raise RuntimeError( f'NOT-EXPECTED able to roundtrip value given spec:\n' @@ -470,7 +460,8 @@ async def send_back_values( break # move on to streaming block.. except tractor.MsgTypeError: - # await tractor.pause() + await tractor.pause() + if expect_send: raise RuntimeError( f'EXPECTED to `.started()` value given spec:\n' @@ -652,12 +643,42 @@ def test_codec_hooks_mod( pld_spec_type_strs: list[str] = enc_type_union(ipc_pld_spec) + # XXX should raise an mte (`MsgTypeError`) + # when `add_codec_hooks == False` bc the input + # `expect_ipc_send` kwarg has a nsp which can't be + # serialized! + # + # TODO:can we ensure this happens from the + # `Return`-side (aka the sub) as well? + if not add_codec_hooks: + try: + async with p.open_context( + send_back_values, + expect_debug=debug_mode, + pld_spec_type_strs=pld_spec_type_strs, + add_hooks=add_codec_hooks, + started_msg_bytes=nsp_codec.encode(expected_started), + + # XXX NOTE bc we send a `NamespacePath` in this kwarg + expect_ipc_send=expect_ipc_send, + + ) as (ctx, first): + pytest.fail('ctx should fail to open without custom enc_hook!?') + + # this test passes bc we can go no further! + except MsgTypeError: + # teardown nursery + await p.cancel_actor() + return + # TODO: send the original nsp here and # test with `limit_msg_spec()` above? # await tractor.pause() print('PARENT opening IPC ctx!\n') async with ( + # XXX should raise an mte (`MsgTypeError`) + # when `add_codec_hooks == False`.. p.open_context( send_back_values, expect_debug=debug_mode, -- 2.34.1 From 59966e5650a003c2fb17258e09aa09c778339f0c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 15 Apr 2024 15:20:00 -0400 Subject: [PATCH 052/305] Tweak a couple more log message fmts --- tractor/_context.py | 2 +- tractor/_streaming.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 29fee0b1..052c198d 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1905,7 +1905,7 @@ class Context: try: log.runtime( - f'Delivering msg from IPC ctx:\n' + f'Delivering msg from IPC ctx:\n\n' f'<= {from_uid}\n' f' |_ {nsf}()\n\n' diff --git a/tractor/_streaming.py b/tractor/_streaming.py index ac4d482e..16e32cea 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -377,14 +377,17 @@ class MsgStream(trio.abc.Channel): # await rx_chan.aclose() if not self._eoc: - log.cancel( - 'Stream closed by self before it received an EoC?\n' - 'Setting eoc manually..\n..' - ) - self._eoc: bool = trio.EndOfChannel( - f'Context stream closed by self({self._ctx.side})\n' + message: str = ( + f'Context stream closed by {self._ctx.side!r}\n' f'|_{self}\n' ) + log.cancel( + 'Stream self-closed before receiving EoC\n\n' + + + message + ) + self._eoc = trio.EndOfChannel(message) + # ?XXX WAIT, why do we not close the local mem chan `._rx_chan` XXX? # => NO, DEFINITELY NOT! <= # if we're a bi-dir ``MsgStream`` BECAUSE this same -- 2.34.1 From 14583307ee4adb59e7ad845f89a95723120b6437 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 16 Apr 2024 10:09:45 -0400 Subject: [PATCH 053/305] First draft, sub-msg-spec for debugger `Lock` sys Since it's totes possible to have a spec applied that won't permit `str`s, might as well formalize a small msg set for subactors to request the tree-wide TTY `Lock`. BTW, I'm prolly not going into every single change here in this first WIP since there's still a variety of broken stuff mostly to do with races on the codec apply being done in a `trio.lowleve.RunVar`; it should be re-done with a `ContextVar` such that each task does NOT mutate the global setting.. New msg set and usage is simply: - `LockStatus` which is the reponse msg delivered from `lock_tty_for_child()` - `LockRelease` a one-off request msg from the subactor to drop the `Lock` from a `MsgStream.send()`. - use these msgs throughout the root and sub sides of the locking ctx funcs: `lock_tty_for_child()` & `wait_for_parent_stdin_hijack()` The codec is now applied in both the root and sub `Lock` request tasks: - for root inside `lock_tty_for_child()` before the `.started()`. - for subs, inside `wait_for_parent_stdin_hijack()` since we only want to affect the codec *for the locking task*. - (hence the need for ctx-var as mentioned above but currently this can cause races which will break against other app tasks competing for the codec setting). - add a `apply_debug_codec()` helper for use in both cases. - add more detailed logging to both the root and sub side of `Lock` requesting funcs including requiring that the sub-side task "uid" (a `tuple[str, int]` = (trio.Task.name, id(trio.Task)` be provided (more on this later). A main issue discovered while proto-testing all this was the ability of a sub to "double lock" (leading to self-deadlock) via an error in `wait_for_parent_stdin_hijack()` which, for ex., can happen in debug mode via crash handling of a `MsgTypeError` received from the root during a codec applied msg-spec race! Originally I was attempting to solve this by making the SIGINT override handler more resilient but this case is somewhat impossible to detect by an external root task other then checking for duplicate ownership via the new `subactor_task_uid`. => SO NOW, we always stick the current task uid in the `Lock._blocked: set` and raise an rte on a double request by the same remote task. Included is a variety of small refinements: - finally figured out how to mark a variety of `.__exit__()` frames with `pdbp.hideframe()` to actually hide them B) - add cls methods around managing `Lock._locking_task_cs` from root only. - re-org all the `Lock` attrs into those only used in root vs. subactors and proto-prep a new `DebugStatus` actor-singleton to be used in subs. - add a `Lock.repr()` to contextually print the current conc primitives. - rename our `Pdb`-subtype to `PdbREPL`. - rigor out the SIGINT handler a bit, originally to try and hack-solve the double-lock issue mentioned above, but now just with better logging and logic for most (all?) possible hang cases that should be hang-recoverable after enough ctrl-c mashing by the user.. well hopefully: - using `Lock.repr()` for both root and sub cases. - lots more `log.warn()`s and handler reversions on stale lock or cs detection. - factor `._pause()` impl a little better moving the actual repl entry to a new `_enter_repl_sync()` (originally for easier wrapping in the sub case with `apply_codec()`). --- tractor/devx/__init__.py | 1 - tractor/devx/_debug.py | 1049 ++++++++++++++++++++++++++++---------- 2 files changed, 792 insertions(+), 258 deletions(-) diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index 75aec953..c1a93878 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -27,7 +27,6 @@ from ._debug import ( pause as pause, pause_from_sync as pause_from_sync, shield_sigint_handler as shield_sigint_handler, - MultiActorPdb as MultiActorPdb, open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, post_mortem as post_mortem, diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 26155b22..51e74379 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -25,6 +25,7 @@ from contextlib import ( asynccontextmanager as acm, contextmanager as cm, nullcontext, + _GeneratorContextManager, ) from functools import ( partial, @@ -33,6 +34,7 @@ from functools import ( import os import signal import sys +import textwrap import threading import traceback from typing import ( @@ -40,6 +42,7 @@ from typing import ( Callable, AsyncIterator, AsyncGenerator, + TypeAlias, TYPE_CHECKING, ) from types import ( @@ -47,17 +50,23 @@ from types import ( ModuleType, ) +from msgspec import Struct import pdbp import sniffio import tractor import trio -from trio.lowlevel import current_task +from trio.lowlevel import ( + current_task, + Task, +) from trio import ( TaskStatus, - # Task, ) from tractor.log import get_logger +from tractor.msg import ( + _codec, +) from tractor._state import ( current_actor, is_root_process, @@ -76,6 +85,36 @@ if TYPE_CHECKING: log = get_logger(__name__) +# XXX HACKZONE XXX +# hide exit stack frames on nurseries and cancel-scopes! +# |_ so avoid seeing it when the `pdbp` REPL is first engaged from +# inside a `trio.open_nursery()` scope (with no line after it +# in before the block end??). +# +# TODO: FINALLY got this workin originally with +# `@pdbp.hideframe` around the `wrapper()` def embedded inside +# `_ki_protection_decoratior()`.. which is in the module: +# /home/goodboy/.virtualenvs/tractor311/lib/python3.11/site-packages/trio/_core/_ki.py +# +# -[ ] make an issue and patch for `trio` core? maybe linked +# to the long outstanding `pdb` one below? +# |_ it's funny that there's frame hiding throughout `._run.py` +# but not where it matters on the below exit funcs.. +# +# -[ ] provide a patchset for the lonstanding +# |_ https://github.com/python-trio/trio/issues/1155 +# +# -[ ] make a linked issue to ^ and propose allowing all the +# `._core._run` code to have their `__tracebackhide__` value +# configurable by a `RunVar` to allow getting scheduler frames +# if desired through configuration? +# +# -[ ] maybe dig into the core `pdb` issue why the extra frame is shown +# at all? +# +pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) +pdbp.hideframe(trio._core._run.CancelScope.__exit__) +pdbp.hideframe(_GeneratorContextManager.__exit__) __all__ = [ 'breakpoint', @@ -83,6 +122,28 @@ __all__ = [ ] +class LockStatus( + Struct, + tag=True, + tag_field='msg_type', +): + subactor_uid: tuple[str, str] + cid: str + locked: bool + + +class LockRelease( + Struct, + tag=True, + tag_field='msg_type', +): + subactor_uid: tuple[str, str] + cid: str + + +__msg_spec__: TypeAlias = LockStatus|LockRelease + + class Lock: ''' Actor global debug lock state. @@ -90,41 +151,111 @@ class Lock: Mostly to avoid a lot of ``global`` declarations for now XD. ''' - repl: MultiActorPdb | None = None + # XXX local ref to the `Pbp` instance, ONLY set in the + # actor-process that currently has activated a REPL + # i.e. it will be `None` (unset) in any other actor-process + # that does not have this lock acquired in the root proc. + repl: PdbREPL|None = None + # placeholder for function to set a ``trio.Event`` on debugger exit # pdb_release_hook: Callable | None = None - _trio_handler: Callable[ - [int, FrameType | None], Any - ] | int | None = None + _trio_handler: ( + Callable[[int, FrameType|None], Any] + |int + | None + ) = None - # actor-wide variable pointing to current task name using debugger - local_task_in_debug: str | None = None + remote_task_in_debug: str|None = None - # NOTE: set by the current task waiting on the root tty lock from - # the CALLER side of the `lock_tty_for_child()` context entry-call - # and must be cancelled if this actor is cancelled via IPC - # request-message otherwise deadlocks with the parent actor may - # ensure + @staticmethod + def get_locking_task_cs() -> trio.CancelScope|None: + if is_root_process(): + return Lock._locking_task_cs + + raise RuntimeError( + '`Lock.locking_task_cs` is invalid in subactors!' + ) + + @staticmethod + def set_locking_task_cs( + cs: trio.CancelScope, + ) -> None: + if not is_root_process(): + raise RuntimeError( + '`Lock.locking_task_cs` is invalid in subactors!' + ) + + Lock._locking_task_cs = cs + + # SUBACTOR ONLY + # ------ - ------- + local_task_in_debug: Task|None = None _debugger_request_cs: trio.CancelScope|None = None + local_pdb_complete: trio.Event|None = None - # NOTE: set only in the root actor for the **local** root spawned task - # which has acquired the lock (i.e. this is on the callee side of - # the `lock_tty_for_child()` context entry). - _root_local_task_cs_in_debug: trio.CancelScope|None = None + # ROOT ONLY + # ------ - ------- + # the root-actor-ONLY singletons for, + # + # - the uid of the actor who's task is using a REPL + # - a literal task-lock, + # - a shielded-cancel-scope around the acquiring task*, + # - a broadcast event to signal no-actor using a REPL in tree, + # - a filter list to block subs-by-uid from locking. + # + # * in case it needs to be manually cancelled in root due to + # a stale lock condition (eg. IPC failure with the locking + # child + global_actor_in_debug: tuple[str, str]|None = None + no_remote_has_tty: trio.Event|None = None + _locking_task_cs: trio.CancelScope|None = None - # actor tree-wide actor uid that supposedly has the tty lock - global_actor_in_debug: tuple[str, str] = None - - local_pdb_complete: trio.Event | None = None - no_remote_has_tty: trio.Event | None = None - - # lock in root actor preventing multi-access to local tty _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() + _blocked: set[tuple[str, str]] = set() # `Actor.uid` block list + # TODO: should go on `PbpREPL`? _orig_sigint_handler: Callable | None = None - _blocked: set[tuple[str, str]] = set() + @classmethod + def repr(cls) -> str: + + # both root and subs + fields: str = ( + f'repl: {cls.repl}\n' + f'local_repl_task: {cls.local_task_in_debug}\n' + ) + + if is_root_process(): + lock_stats: trio.LockStatistics = cls._debug_lock.statistics() + fields += ( + f'global_actor_in_debug: {cls.global_actor_in_debug}\n' + f'no_remote_has_tty: {cls.no_remote_has_tty}\n' + f'remote_task_in_debug: {cls.remote_task_in_debug}\n' + f'_locking_task_cs: {cls.get_locking_task_cs()}\n' + f'_blocked: {cls._blocked}\n\n' + + f'_debug_lock: {cls._debug_lock}\n' + f'lock_stats: {lock_stats}\n' + ) + else: + fields += ( + f'local_task_in_debug: {cls.local_task_in_debug}\n' + f'local_pdb_complete: {cls.local_pdb_complete}\n' + f'_debugger_request_cs: {cls._debugger_request_cs}\n' + ) + + body: str = textwrap.indent( + fields, + prefix=' |_', + ) + return ( + f'<{cls.__name__}(\n' + f'{body}' + ')>' + ) + + # TODO: move to PdbREPL! @classmethod def shield_sigint(cls): ''' @@ -218,19 +349,35 @@ class Lock: else: cls._debug_lock.release() - except RuntimeError: + except RuntimeError as rte: # uhhh makes no sense but been seeing the non-owner # release error even though this is definitely the task # that locked? owner = cls._debug_lock.statistics().owner + # if ( + # owner + # and + # cls.remote_task_in_debug is None + # ): + # raise RuntimeError( + # 'Stale `Lock` detected, no remote task active!?\n' + # f'|_{owner}\n' + # # f'{Lock}' + # ) from rte + if owner: - raise + raise rte + + # OW suppress, can't member why tho .. XD + # something somethin corrupts a cancel-scope + # somewhere.. try: # sometimes the ``trio`` might already be terminated in # which case this call will raise. if cls.local_pdb_complete is not None: cls.local_pdb_complete.set() + finally: # restore original sigint handler cls.unshield_sigint() @@ -241,10 +388,33 @@ class Lock: cls.local_task_in_debug = None +# TODO: actually use this instead throughout for subs! +class DebugStatus: + ''' + Singleton-state for debugging machinery in a subactor. + + Composes conc primitives for syncing with a root actor to + acquire the tree-global (TTY) `Lock` such that only ever one + actor's task can have the REPL active at a given time. + + ''' + repl: PdbREPL|None = None + lock_status: LockStatus|None = None + + repl_task: Task|None = None + # local_task_in_debug: Task|None = None + + req_cs: trio.CancelScope|None = None + # _debugger_request_cs: trio.CancelScope|None = None + + repl_release: trio.Event|None = None + # local_pdb_complete: trio.Event|None = None + class TractorConfig(pdbp.DefaultConfig): ''' - Custom ``pdbp`` goodness :surfer: + Custom `pdbp` config which tries to use the best tradeoff + between pretty and minimal. ''' use_pygments: bool = True @@ -255,21 +425,41 @@ class TractorConfig(pdbp.DefaultConfig): # fixes line spacing issue when resizing terminal B) truncate_long_lines: bool = False + # ------ - ------ + # our own custom config vars mostly + # for syncing with the actor tree's singleton + # TTY `Lock`. -class MultiActorPdb(pdbp.Pdb): + +class PdbREPL(pdbp.Pdb): ''' - Add teardown hooks to the regular ``pdbp.Pdb``. + Add teardown hooks and local state describing any + ongoing TTY `Lock` request dialog. ''' # override the pdbp config with our coolio one + # NOTE: this is only loaded when no `~/.pdbrc` exists + # so we should prolly pass it into the .__init__() instead? + # i dunno, see the `DefaultFactory` and `pdb.Pdb` impls. DefaultConfig = TractorConfig + status = DebugStatus + # def preloop(self): # print('IN PRELOOP') # super().preloop() - # TODO: figure out how to disallow recursive .set_trace() entry - # since that'll cause deadlock for us. + # TODO: cleaner re-wrapping of all this? + # -[ ] figure out how to disallow recursive .set_trace() entry + # since that'll cause deadlock for us. + # -[ ] maybe a `@cm` to call `super().()`? + # -[ ] look at hooking into the `pp` hook specially with our + # own set of pretty-printers? + # * `.pretty_struct.Struct.pformat()` + # * `.pformat(MsgType.pld)` + # * `.pformat(Error.tb_str)`? + # * .. maybe more? + # def set_continue(self): try: super().set_continue() @@ -282,6 +472,17 @@ class MultiActorPdb(pdbp.Pdb): finally: Lock.release() + # TODO: special handling where we just want the next LOC and + # not to resume to the next pause/crash point? + # def set_next( + # self, + # frame: FrameType + # ) -> None: + # try: + # super().set_next(frame) + # finally: + # Lock.release() + # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch # my d$%&. @@ -314,7 +515,8 @@ class MultiActorPdb(pdbp.Pdb): @acm async def _acquire_debug_lock_from_root_task( - uid: tuple[str, str] + subactor_uid: tuple[str, str], + remote_task_uid: str, ) -> AsyncIterator[trio.StrictFIFOLock]: ''' @@ -326,16 +528,31 @@ async def _acquire_debug_lock_from_root_task( to the ``pdb`` repl. ''' - task_name: str = current_task().name + # task_name: str = current_task().name we_acquired: bool = False log.runtime( - f"Attempting to acquire TTY lock, remote task: {task_name}:{uid}" + f'Attempting to acquire TTY lock for,\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' ) try: - log.runtime( - f"entering lock checkpoint, remote task: {task_name}:{uid}" + pre_msg: str = ( + f'Entering lock checkpoint for sub-actor\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' ) + stats = Lock._debug_lock.statistics() + if owner := stats.owner: + # and Lock.no_remote_has_tty is not None + pre_msg += ( + f'\n' + f'`Lock` already held by local task\n' + f'{owner}\n\n' + f'On behalf of remote task: {Lock.remote_task_in_debug!r}\n' + ) + log.runtime(pre_msg) + # NOTE: if the surrounding cancel scope from the # `lock_tty_for_child()` caller is cancelled, this line should # unblock and NOT leave us in some kind of @@ -349,9 +566,14 @@ async def _acquire_debug_lock_from_root_task( # can try to avoid clobbering any connection from a child # that's currently relying on it. Lock.no_remote_has_tty = trio.Event() + Lock.remote_task_in_debug = remote_task_uid - Lock.global_actor_in_debug = uid - log.runtime(f"TTY lock acquired, remote task: {task_name}:{uid}") + Lock.global_actor_in_debug = subactor_uid + log.runtime( + f'TTY lock acquired for,\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' + ) # NOTE: critical section: this yield is unshielded! @@ -368,7 +590,8 @@ async def _acquire_debug_lock_from_root_task( finally: if ( we_acquired - and Lock._debug_lock.locked() + and + Lock._debug_lock.locked() ): Lock._debug_lock.release() @@ -380,16 +603,37 @@ async def _acquire_debug_lock_from_root_task( stats = Lock._debug_lock.statistics() if ( not stats.owner + # and Lock.no_remote_has_tty is not None ): - log.runtime(f"No more tasks waiting on tty lock! says {uid}") + # log.runtime( + log.info( + f'No more child ctx tasks hold the TTY lock!\n' + f'last subactor: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' + ) if Lock.no_remote_has_tty is not None: + # set and release Lock.no_remote_has_tty.set() Lock.no_remote_has_tty = None + Lock.remote_task_in_debug = None + else: + log.warning( + 'Not signalling `Lock.no_remote_has_tty` since it has value:\n' + f'{Lock.no_remote_has_tty}\n' + ) + else: + log.info( + f'A child ctx tasks still holds the TTY lock ??\n' + f'last subactor: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' + f'current local owner task: {stats.owner}\n' + ) Lock.global_actor_in_debug = None - log.runtime( - f"TTY lock released, remote task: {task_name}:{uid}" + 'TTY lock released by child\n' + f'last subactor: {subactor_uid}\n' + f'remote task: {remote_task_uid}\n' ) @@ -397,9 +641,14 @@ async def _acquire_debug_lock_from_root_task( async def lock_tty_for_child( ctx: tractor.Context, - subactor_uid: tuple[str, str] -) -> str: + # TODO: when we finally get a `Start.params: ParamSpec` + # working it'd sure be nice to have `msgspec` auto-decode this + # to an actual tuple XD + subactor_uid: tuple[str, str], + subactor_task_uid: tuple[str, int], + +) -> LockStatus|LockRelease: ''' Lock the TTY in the root process of an actor tree in a new inter-actor-context-task such that the ``pdbp`` debugger console @@ -411,53 +660,141 @@ async def lock_tty_for_child( highly reliable at releasing the mutex complete! ''' - task_name: str = current_task().name + + req_task_uid: tuple = tuple(subactor_task_uid) + if req_task_uid in Lock._blocked: + raise RuntimeError( + f'The same remote task already has an active request for TTY lock ??\n\n' + f'task uid: {req_task_uid}\n' + f'subactor uid: {subactor_uid}\n' + ) + + Lock._blocked.add(req_task_uid) + + root_task_name: str = current_task().name if tuple(subactor_uid) in Lock._blocked: log.warning( - f'Actor {subactor_uid} is blocked from acquiring debug lock\n' - f"remote task: {task_name}:{subactor_uid}" + f'Subactor is blocked from acquiring debug lock..\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' ) ctx._enter_debugger_on_cancel: bool = False await ctx.cancel(f'Debug lock blocked for {subactor_uid}') - return 'pdb_lock_blocked' + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) # TODO: when we get to true remote debugging # this will deliver stdin data? log.debug( - "Attempting to acquire TTY lock\n" - f"remote task: {task_name}:{subactor_uid}" + 'Subactor attempting to acquire TTY lock\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' ) - - log.debug(f"Actor {subactor_uid} is WAITING on stdin hijack lock") Lock.shield_sigint() - try: with ( + # NOTE: though a cs is created for every subactor lock + # REQUEST in this ctx-child task, only the root-task + # holding the `Lock` (on behalf of the ctx parent task + # in a subactor) will set + # `Lock._locking_task_cs` such that if the + # lock holdingn task ever needs to be cancelled (since + # it's shielded by default) that global ref can be + # used to do so! trio.CancelScope(shield=True) as debug_lock_cs, + + _codec.limit_msg_spec( + payload_spec=__msg_spec__, + ) as codec, ): - Lock._root_local_task_cs_in_debug = debug_lock_cs - async with _acquire_debug_lock_from_root_task(subactor_uid): + # sanity? + # TODO: don't need the ref right? + assert codec is _codec.current_codec() + + async with _acquire_debug_lock_from_root_task( + subactor_uid, + subactor_task_uid, + ): + # XXX SUPER IMPORTANT BELOW IS ON THIS LINE XXX + # without that the root cs might be, + # - set and then removed in the finally block by + # a task that never acquired the lock, leaving + # - the task that DID acquire the lock STUCK since + # it's original cs was GC-ed bc the first task + # already set the global ref to `None` + Lock.set_locking_task_cs(debug_lock_cs) # indicate to child that we've locked stdio - await ctx.started('Locked') - log.debug( - f"Actor {subactor_uid} acquired stdin hijack lock" + await ctx.started( + LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=True, + ) ) + log.debug( f'Actor {subactor_uid} acquired TTY lock') + # wait for unlock pdb by child async with ctx.open_stream() as stream: - assert await stream.receive() == 'pdb_unlock' + release_msg: LockRelease = await stream.receive() - return "pdb_unlock_complete" + # TODO: security around only releasing if + # these match? + log.pdb( + f'TTY lock released requested\n\n' + f'{release_msg}\n' + ) + assert release_msg.cid == ctx.cid + assert release_msg.subactor_uid == tuple(subactor_uid) + + log.debug(f'Actor {subactor_uid} released TTY lock') + + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) finally: - Lock._root_local_task_cs_in_debug = None + debug_lock_cs.cancel() + Lock.set_locking_task_cs(None) Lock.unshield_sigint() +@cm +def apply_debug_codec() -> _codec.MsgCodec: + ''' + Apply the subactor TTY `Lock`-ing protocol's msgspec temporarily + (only in the current task). + + ''' + with ( + _codec.limit_msg_spec( + payload_spec=__msg_spec__, + ) as debug_codec, + ): + assert debug_codec is _codec.current_codec() + log.pdb( + 'Applied `.devx._debug` msg-spec via codec\n' + f'{debug_codec}\n' + ) + yield debug_codec + + log.pdb( + 'REMOVED `.devx._debug` msg-spec via codec\n' + f'{debug_codec}\n' + ) + + async def wait_for_parent_stdin_hijack( actor_uid: tuple[str, str], + task_uid: tuple[str, int], task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED ): ''' @@ -476,25 +813,30 @@ async def wait_for_parent_stdin_hijack( ''' from .._discovery import get_root - with trio.CancelScope(shield=True) as cs: + with ( + trio.CancelScope(shield=True) as cs, + apply_debug_codec(), + ): Lock._debugger_request_cs = cs - try: + # TODO: merge into sync async with ? async with get_root() as portal: - # this syncs to child's ``Context.started()`` call. async with portal.open_context( lock_tty_for_child, subactor_uid=actor_uid, + subactor_task_uid=task_uid, - ) as (ctx, val): - - log.debug('locked context') - assert val == 'Locked' + ) as (ctx, resp): + log.pdb( + 'Subactor locked TTY per msg\n' + f'{resp}\n' + ) + assert resp.subactor_uid == actor_uid + assert resp.cid async with ctx.open_stream() as stream: - try: - # unblock local caller + try: # to unblock local caller assert Lock.local_pdb_complete task_status.started(cs) @@ -503,14 +845,22 @@ async def wait_for_parent_stdin_hijack( await Lock.local_pdb_complete.wait() finally: - # TODO: shielding currently can cause hangs... - # with trio.CancelScope(shield=True): - await stream.send('pdb_unlock') + await stream.send( + LockRelease( + subactor_uid=actor_uid, + cid=resp.cid, + ) + ) # sync with callee termination - assert await ctx.result() == "pdb_unlock_complete" + status: LockStatus = await ctx.result() + assert not status.locked - log.debug('exitting child side locking task context') + log.pdb( + 'TTY lock was released for subactor with msg\n\n' + f'{status}\n\n' + 'Exitting {ctx.side!r} side locking of locking ctx' + ) except ContextCancelled: log.warning('Root actor cancelled debug lock') @@ -518,12 +868,17 @@ async def wait_for_parent_stdin_hijack( finally: Lock.local_task_in_debug = None - log.debug('Exiting debugger from child') + log.debug('Exiting debugger TTY lock request func from child') -def mk_mpdb() -> MultiActorPdb: + log.cancel('Reverting SIGINT handler!') + Lock.unshield_sigint() + + + +def mk_mpdb() -> PdbREPL: ''' - Deliver a new `MultiActorPdb`: a multi-process safe `pdbp` + Deliver a new `PdbREPL`: a multi-process safe `pdbp` REPL using the magic of SC! Our `pdb.Pdb` subtype accomplishes multi-process safe debugging @@ -538,7 +893,7 @@ def mk_mpdb() -> MultiActorPdb: by either explicit requests in the runtime or ''' - pdb = MultiActorPdb() + pdb = PdbREPL() # Always shield out SIGINTs for subactors when REPL is active. # @@ -560,7 +915,6 @@ def mk_mpdb() -> MultiActorPdb: def shield_sigint_handler( signum: int, frame: 'frame', # type: ignore # noqa - # pdb_obj: MultiActorPdb | None = None, *args, ) -> None: @@ -577,6 +931,7 @@ def shield_sigint_handler( uid_in_debug: tuple[str, str]|None = Lock.global_actor_in_debug actor: Actor = current_actor() + case_handled: bool = False def do_cancel(): # If we haven't tried to cancel the runtime then do that instead @@ -586,107 +941,202 @@ def shield_sigint_handler( actor.cancel_soon() # If the runtime is already cancelled it likely means the user - # hit ctrl-c again because teardown didn't full take place in + # hit ctrl-c again because teardown didn't fully take place in # which case we do the "hard" raising of a local KBI. else: raise KeyboardInterrupt + # try to see if the supposed (sub)actor in debug still + # has an active connection to *this* actor, and if not + # it's likely they aren't using the TTY lock / debugger + # and we should propagate SIGINT normally. any_connected: bool = False - if uid_in_debug is not None: - # try to see if the supposed (sub)actor in debug still - # has an active connection to *this* actor, and if not - # it's likely they aren't using the TTY lock / debugger - # and we should propagate SIGINT normally. - chans: list[tractor.Channel] = actor._peers.get(tuple(uid_in_debug)) + chans: list[tractor.Channel] = actor._peers.get( + tuple(uid_in_debug) + ) if chans: any_connected = any(chan.connected() for chan in chans) if not any_connected: log.warning( 'A global actor reported to be in debug ' - 'but no connection exists for this child:\n' - f'{uid_in_debug}\n' + 'but no connection exists for this child!?\n' + f'subactor_uid: {uid_in_debug}\n\n' 'Allowing SIGINT propagation..' ) return do_cancel() # only set in the actor actually running the REPL - pdb_obj: MultiActorPdb|None = Lock.repl + repl: PdbREPL|None = Lock.repl + # TODO: maybe we should flatten out all these cases using + # a match/case? + # # root actor branch that reports whether or not a child # has locked debugger. - if ( - is_root_process() - and uid_in_debug is not None + if is_root_process(): + lock_cs: trio.CancelScope = Lock.get_locking_task_cs() - # XXX: only if there is an existing connection to the - # (sub-)actor in debug do we ignore SIGINT in this - # parent! Otherwise we may hang waiting for an actor - # which has already terminated to unlock. - and any_connected - ): - # we are root and some actor is in debug mode - # if uid_in_debug is not None: + log.warning( + f'root {actor.uid} handling SIGINT\n' + f'any_connected: {any_connected}\n\n' - if pdb_obj: - name = uid_in_debug[0] - if name != 'root': - log.pdb( - f"Ignoring SIGINT, child in debug mode: `{uid_in_debug}`" - ) + f'{Lock.repr()}\n' + ) + + maybe_stale_lock_cs: bool = ( + lock_cs is not None + # and not lock_cs.cancel_called + and uid_in_debug is None + ) + if maybe_stale_lock_cs: + log.warning( + 'Stale `Lock._locking_task_cs: CancelScope` DETECTED?\n' + f'|_{lock_cs}\n\n' + ) + lock_cs.cancel() + + if uid_in_debug: # "someone" is (ostensibly) using debug `Lock` + name_in_debug: str = uid_in_debug[0] + if ( + not repl # but it's NOT us, the root actor. + ): + # sanity: since no repl ref is set, we def shouldn't + # be the lock owner! + assert name_in_debug != 'root' + + # XXX: only if there is an existing connection to the + # (sub-)actor in debug do we ignore SIGINT in this + # parent! Otherwise we may hang waiting for an actor + # which has already terminated to unlock. + if any_connected: # there are subactors we can contact + # NOTE: don't emit this with `.pdb()` level in + # root without a higher level. + log.debug( + f'Ignoring SIGINT while debug REPL in use by child\n' + f'subactor: {uid_in_debug}\n' + ) + # returns here minus tail logic + case_handled = True + + else: + message: str = ( + f'Ignoring SIGINT while debug REPL SUPPOSEDLY in use by child\n' + f'subactor: {uid_in_debug}\n\n' + f'BUT, no child actors are contactable!?!?\n\n' + + # f'Reverting to def `trio` SIGINT handler..\n' + ) + + if maybe_stale_lock_cs: + lock_cs.cancel() + message += ( + 'Maybe `Lock._locking_task_cs: CancelScope` is stale?\n' + f'|_{lock_cs}\n\n' + ) + + log.warning(message) + Lock.unshield_sigint() + case_handled = True else: + assert name_in_debug == 'root' # we are the registered locker + assert repl # we have a pdb REPL engaged log.pdb( - "Ignoring SIGINT while in debug mode" + f'Ignoring SIGINT while debug REPL in use\n' + f'root actor: {uid_in_debug}\n' ) - elif ( - is_root_process() - ): - if pdb_obj: - log.pdb( - "Ignoring SIGINT since debug mode is enabled" + # returns here minus tail logic + case_handled = True + + # root actor still has this SIGINT handler active without + # an actor using the `Lock` (a bug state) ?? + # => so immediately cancel any stale lock cs and revert + # the handler! + else: + # XXX revert back to ``trio`` handler since this handler shouldn't + # be enabled withtout an actor using a debug REPL! + log.warning( + 'Ignoring SIGINT in root actor but no actor using a `pdb` REPL?\n' + 'Reverting SIGINT handler to `trio` default!\n' ) - if ( - Lock._root_local_task_cs_in_debug - and not Lock._root_local_task_cs_in_debug.cancel_called - ): - Lock._root_local_task_cs_in_debug.cancel() + if maybe_stale_lock_cs: + lock_cs.cancel() - # revert back to ``trio`` handler asap! Lock.unshield_sigint() + case_handled = True # child actor that has locked the debugger elif not is_root_process(): + log.warning( + f'Subactor {actor.uid} handling SIGINT\n\n' + f'{Lock.repr()}\n' + ) - chan: Channel = actor._parent_chan - if not chan or not chan.connected(): + rent_chan: Channel = actor._parent_chan + if ( + rent_chan is None + or + not rent_chan.connected() + ): log.warning( - 'A global actor reported to be in debug ' - 'but no connection exists for its parent:\n' + 'A global sub-actor reported to be in debug ' + 'but it has no connection to its parent ??\n' f'{uid_in_debug}\n' 'Allowing SIGINT propagation..' ) - return do_cancel() + Lock.unshield_sigint() + # do_cancel() + case_handled = True - task: str | None = Lock.local_task_in_debug + task: str|None = Lock.local_task_in_debug if ( task - and pdb_obj + and + repl ): + # if repl: log.pdb( - f"Ignoring SIGINT while task in debug mode: `{task}`" + f'Ignoring SIGINT while local task using debug REPL\n' + f'|_{task}\n' + f' |_{repl}\n' ) + case_handled = True + else: + msg: str = ( + 'SIGINT shield handler still active BUT, \n\n' + ) + if task is None: + msg += ( + f'- No local task claims to be in debug?\n' + f' |_{task}\n\n' + ) + + if repl is None: + msg += ( + f'- No local REPL is currently active?\n' + f' |_{repl}\n\n' + ) + + log.warning( + msg + + + 'Reverting handler to `trio` default!\n' + ) + Lock.unshield_sigint() + case_handled = True + + # XXX ensure that the reverted-to-handler actually is + # able to rx what should have been **this** KBI ;) + do_cancel() + # raise KeyboardInterrupt # TODO: how to handle the case of an intermediary-child actor # that **is not** marked in debug mode? See oustanding issue: # https://github.com/goodboy/tractor/issues/320 # elif debug_mode(): - else: # XXX: shouldn't ever get here? - raise RuntimeError("WTFWTFWTF") - # raise KeyboardInterrupt("WTFWTFWTF") - # NOTE: currently (at least on ``fancycompleter`` 0.9.2) # it looks to be that the last command that was run (eg. ll) # will be repeated by default. @@ -695,31 +1145,37 @@ def shield_sigint_handler( # we want to alert the user that more input is expect since # nothing has been done dur to ignoring sigint. if ( - pdb_obj # only when this actor has a REPL engaged + repl # only when this actor has a REPL engaged ): # XXX: yah, mega hack, but how else do we catch this madness XD - if pdb_obj.shname == 'xonsh': - pdb_obj.stdout.write(pdb_obj.prompt) + if repl.shname == 'xonsh': + repl.stdout.write(repl.prompt) - pdb_obj.stdout.flush() + repl.stdout.flush() # TODO: make this work like sticky mode where if there is output # detected as written to the tty we redraw this part underneath # and erase the past draw of this same bit above? - # pdb_obj.sticky = True - # pdb_obj._print_if_sticky() + # repl.sticky = True + # repl._print_if_sticky() # also see these links for an approach from ``ptk``: # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040 # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py + if not case_handled: + log.critical( + f'{actor.uid} UNHANDLED SIGINT !?!?\n' + # TODO: pprint for `Lock`? + ) + _pause_msg: str = 'Attaching to pdb REPL in actor' def _set_trace( actor: tractor.Actor|None = None, - pdb: MultiActorPdb|None = None, + pdb: PdbREPL|None = None, shield: bool = False, extra_frames_up_when_async: int = 1, @@ -767,14 +1223,16 @@ def _set_trace( log.pdb( f'{msg}\n' '|\n' - f'|_ {actor.uid}\n' + # TODO: make an `Actor.__repr()__` + # f'|_ {current_task()} @ {actor.name}\n' + f'|_ {current_task()}\n' ) # no f!#$&* idea, but when we're in async land # we need 2x frames up? for i in range(extra_frames_up_when_async): frame: FrameType = frame.f_back log.debug( - f'Going up frame {i} -> {frame}\n' + f'Going up frame_{i}:\n|_{frame}\n' ) # engage ze REPL @@ -787,7 +1245,7 @@ async def _pause( debug_func: Callable = _set_trace, # NOTE: must be passed in the `.pause_from_sync()` case! - pdb: MultiActorPdb|None = None, + pdb: PdbREPL|None = None, # TODO: allow caller to pause despite task cancellation, # exactly the same as wrapping with: @@ -799,6 +1257,8 @@ async def _pause( # shield: bool = False, hide_tb: bool = True, + extra_frames_up_when_async: int = 4, + task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED ) -> None: @@ -813,7 +1273,9 @@ async def _pause( __tracebackhide__: bool = hide_tb actor: Actor = current_actor() try: - task_name: str = trio.lowlevel.current_task().name + # TODO: use the `Task` instance instead for `is` checks + # below! + task: Task = trio.lowlevel.current_task() except RuntimeError as rte: if actor.is_infected_aio(): raise RuntimeError( @@ -821,48 +1283,142 @@ async def _pause( 'for infected `asyncio` mode!' ) from rte + # task_name: str = task.name + if ( not Lock.local_pdb_complete - or Lock.local_pdb_complete.is_set() + or + Lock.local_pdb_complete.is_set() ): Lock.local_pdb_complete = trio.Event() if debug_func is not None: - debug_func = partial( - debug_func, - ) + debug_func = partial(debug_func) if pdb is None: - pdb: MultiActorPdb = mk_mpdb() + pdb: PdbREPL = mk_mpdb() + + def _enter_repl_sync( + debug_func: Callable, + ) -> None: + __tracebackhide__: bool = hide_tb + try: + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + # + if debug_func is None: + task_status.started(Lock) + else: + # block here one (at the appropriate frame *up*) where + # ``breakpoint()`` was awaited and begin handling stdio. + log.debug('Entering sync world of the `pdb` REPL..') + try: + # log.critical( + # f'stack len: {len(pdb.stack)}\n' + # ) + debug_func( + actor, + pdb, + extra_frames_up_when_async=extra_frames_up_when_async, + shield=shield, + ) + except BaseException: + log.exception( + 'Failed to invoke internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + raise + + except bdb.BdbQuit: + Lock.release() + raise + + except BaseException: + log.exception( + 'Failed to engage debugger via `_pause()` ??\n' + ) + raise + + if is_root_process(): + + # we also wait in the root-parent for any child that + # may have the tty locked prior + # TODO: wait, what about multiple root tasks acquiring it though? + if Lock.global_actor_in_debug == actor.uid: + # re-entrant root process already has it: noop. + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) + await trio.lowlevel.checkpoint() + return + + # XXX: since we need to enter pdb synchronously below, + # we have to release the lock manually from pdb completion + # callbacks. Can't think of a nicer way then this atm. + if Lock._debug_lock.locked(): + log.warning( + 'attempting to shield-acquire active TTY lock' + f' owned by {Lock.global_actor_in_debug}' + ) + + # must shield here to avoid hitting a ``Cancelled`` and + # a child getting stuck bc we clobbered the tty + with trio.CancelScope(shield=True): + await Lock._debug_lock.acquire() + else: + # may be cancelled + await Lock._debug_lock.acquire() + + Lock.global_actor_in_debug = actor.uid + Lock.local_task_in_debug = task + Lock.repl = pdb + + # enter REPL from root, no TTY locking IPC ctx necessary + _enter_repl_sync(debug_func) + return # next branch is mutex and for subactors # TODO: need a more robust check for the "root" actor - if ( + elif ( not is_root_process() and actor._parent_chan # a connected child ): - if Lock.local_task_in_debug: # Recurrence entry case: this task already has the lock and # is likely recurrently entering a breakpoint - if Lock.local_task_in_debug == task_name: - # noop on recurrent entry case but we want to trigger - # a checkpoint to allow other actors error-propagate and - # potetially avoid infinite re-entries in some subactor. + # + # NOTE: noop on recurrent entry case but we want to trigger + # a checkpoint to allow other actors error-propagate and + # potetially avoid infinite re-entries in some + # subactor that would otherwise not bubble until the + # next checkpoint was hit. + if ( + (repl_task := Lock.local_task_in_debug) + and + repl_task is task + ): + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) await trio.lowlevel.checkpoint() return - # if **this** actor is already in debug mode block here - # waiting for the control to be released - this allows - # support for recursive entries to `tractor.breakpoint()` - log.warning(f"{actor.uid} already has a debug lock, waiting...") - + # if **this** actor is already in debug REPL we want + # to maintain actor-local-task mutex access, so block + # here waiting for the control to be released - this + # -> allows for recursive entries to `tractor.pause()` + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'waiting for release..' + ) await Lock.local_pdb_complete.wait() await trio.sleep(0.1) # mark local actor as "in debug mode" to avoid recurrent # entries/requests to the root process - Lock.local_task_in_debug = task_name + Lock.local_task_in_debug = task # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without @@ -875,91 +1431,54 @@ async def _pause( # actor._service_n.cancel_scope.shield = shield # ``` # but not entirely sure if that's a sane way to implement it? - try: - with trio.CancelScope(shield=True): - await actor._service_n.start( + + # NOTE: MUST it here bc multiple tasks are spawned by any + # one sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered here. SO ensure + # the codec is set before either are run! + # + with ( + # _codec.limit_msg_spec( + # payload_spec=__msg_spec__, + # ) as debug_codec, + trio.CancelScope(shield=shield), + ): + # async with trio.open_nursery() as tn: + # tn.cancel_scope.shield = True + try: + # cs: trio.CancelScope = await tn.start( + cs: trio.CancelScope = await actor._service_n.start( wait_for_parent_stdin_hijack, actor.uid, + (task.name, id(task)), ) + # our locker task should be the one in ctx + # with the root actor + assert Lock._debugger_request_cs is cs + + # XXX used by the SIGINT handler to check if + # THIS actor is in REPL interaction Lock.repl = pdb - except RuntimeError: - Lock.release() + except RuntimeError: + Lock.release() - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. - return + if actor._cancel_called: + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + return - raise - - elif is_root_process(): - - # we also wait in the root-parent for any child that - # may have the tty locked prior - # TODO: wait, what about multiple root tasks acquiring it though? - if Lock.global_actor_in_debug == actor.uid: - # re-entrant root process already has it: noop. - return - - # XXX: since we need to enter pdb synchronously below, - # we have to release the lock manually from pdb completion - # callbacks. Can't think of a nicer way then this atm. - if Lock._debug_lock.locked(): - log.warning( - 'Root actor attempting to shield-acquire active tty lock' - f' owned by {Lock.global_actor_in_debug}') - - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - with trio.CancelScope(shield=True): - await Lock._debug_lock.acquire() - else: - # may be cancelled - await Lock._debug_lock.acquire() - - Lock.global_actor_in_debug = actor.uid - Lock.local_task_in_debug = task_name - Lock.repl = pdb - - try: - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - # - if debug_func is None: - task_status.started(Lock) - - else: - # block here one (at the appropriate frame *up*) where - # ``breakpoint()`` was awaited and begin handling stdio. - log.debug('Entering sync world of the `pdb` REPL..') - try: - # log.critical( - # f'stack len: {len(pdb.stack)}\n' - # ) - debug_func( - actor, - pdb, - extra_frames_up_when_async=2, - shield=shield, - ) - except BaseException: - log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) raise - except bdb.BdbQuit: - Lock.release() - raise + # enter REPL + + try: + _enter_repl_sync(debug_func) + finally: + Lock.unshield_sigint() - except BaseException: - log.exception( - 'Failed to engage debugger via `_pause()` ??\n' - ) - raise # XXX: apparently we can't do this without showing this frame # in the backtrace on first entry to the REPL? Seems like an odd @@ -1017,15 +1536,21 @@ async def pause( # __tracebackhide__: bool = True # super().__exit__(*args, **kwargs) - trio.CancelScope.__enter__.__tracebackhide__ = True - trio.CancelScope.__exit__.__tracebackhide__ = True + # trio.CancelScope.__enter__.__tracebackhide__ = True + # trio.CancelScope.__exit__.__tracebackhide__ = True # import types # with trio.CancelScope(shield=shield) as cs: # cs.__exit__ = types.MethodType(_exit, cs) # cs.__exit__.__tracebackhide__ = True - with trio.CancelScope(shield=shield) as cs: + # TODO: LOL, solved this with the `pdb.hideframe` stuff + # at top-of-mod.. so I guess we can just only use this + # block right? + with trio.CancelScope( + shield=shield, + ) as cs: + print(f'debug cs is {cs}\n') # setattr(cs.__exit__.__func__, '__tracebackhide__', True) # setattr(cs.__enter__.__func__, '__tracebackhide__', True) @@ -1135,7 +1660,7 @@ def pause_from_sync( # raises on not-found by default greenback: ModuleType = maybe_import_greenback() - mdb: MultiActorPdb = mk_mpdb() + mdb: PdbREPL = mk_mpdb() # run async task which will lock out the root proc's TTY. if not Lock.is_main_trio_thread(): @@ -1157,7 +1682,7 @@ def pause_from_sync( ) ) # TODO: maybe the `trio.current_task()` id/name if avail? - Lock.local_task_in_debug: str = str(threading.current_thread().name) + Lock.local_task_in_debug: str = str(threading.current_thread()) else: # we are presumably the `trio.run()` + main thread greenback.await_( @@ -1167,7 +1692,7 @@ def pause_from_sync( hide_tb=hide_tb, ) ) - Lock.local_task_in_debug: str = current_task().name + Lock.local_task_in_debug: str = current_task() # TODO: ensure we aggressively make the user aware about # entering the global ``breakpoint()`` built-in from sync @@ -1198,7 +1723,11 @@ async def breakpoint(**kwargs): '`tractor.breakpoint()` is deprecated!\n' 'Please use `tractor.pause()` instead!\n' ) - await pause(**kwargs) + __tracebackhide__: bool = True + await pause( + # extra_frames_up_when_async=6, + **kwargs + ) _crash_msg: str = ( @@ -1208,11 +1737,11 @@ _crash_msg: str = ( def _post_mortem( actor: tractor.Actor, - pdb: MultiActorPdb, + pdb: PdbREPL, shield: bool = False, # only for compat with `._set_trace()`.. - extra_frames_up_when_async=0, + extra_frames_up_when_async=1, ) -> None: ''' @@ -1225,7 +1754,11 @@ def _post_mortem( log.pdb( f'{_crash_msg}\n' '|\n' - f'|_ {actor.uid}\n' + f'|_ {current_task()}\n' + + # f'|_ @{actor.uid}\n' + # TODO: make an `Actor.__repr()__` + # f'|_ {current_task()} @ {actor.name}\n' ) # TODO: only replacing this to add the @@ -1278,9 +1811,12 @@ async def _maybe_enter_pm(err): @acm async def acquire_debug_lock( subactor_uid: tuple[str, str], -) -> AsyncGenerator[None, tuple]: +) -> AsyncGenerator[ + trio.CancelScope|None, + tuple, +]: ''' - Grab root's debug lock on entry, release on exit. + Request to acquire the TTY `Lock` in the root actor, release on exit. This helper is for actor's who don't actually need to acquired the debugger but want to wait until the lock is free in the @@ -1297,7 +1833,7 @@ async def acquire_debug_lock( wait_for_parent_stdin_hijack, subactor_uid, ) - yield None + yield cs cs.cancel() @@ -1328,7 +1864,6 @@ async def maybe_wait_for_debugger( # Instead try to wait for pdb to be released before # tearing down. in_debug: tuple[str, str]|None = Lock.global_actor_in_debug - debug_complete: trio.Event|None = Lock.no_remote_has_tty if in_debug == current_actor().uid: log.debug( @@ -1340,7 +1875,7 @@ async def maybe_wait_for_debugger( elif in_debug: msg += ( - f'Debug `Lock` in use by subactor: {in_debug}\n' + f'Debug `Lock` in use by subactor\n|\n|_{in_debug}\n' ) # TODO: could this make things more deterministic? # wait to see if a sub-actor task will be @@ -1358,17 +1893,17 @@ async def maybe_wait_for_debugger( for istep in range(poll_steps): if ( - debug_complete - and not debug_complete.is_set() + Lock.no_remote_has_tty is not None + and not Lock.no_remote_has_tty.is_set() and in_debug is not None ): log.pdb( msg + - 'Root is waiting on tty lock to release..\n' + '\nRoot is waiting on tty lock to release..\n' ) with trio.CancelScope(shield=True): - await debug_complete.wait() + await Lock.no_remote_has_tty.wait() log.pdb( f'Child subactor released debug lock\n' f'|_{in_debug}\n' @@ -1378,8 +1913,8 @@ async def maybe_wait_for_debugger( if ( in_debug is None and ( - debug_complete is None - or debug_complete.is_set() + Lock.no_remote_has_tty is None + or Lock.no_remote_has_tty.is_set() ) ): log.pdb( -- 2.34.1 From ef3a7fbaa8f7b0ce2ba990483405d3157b03b493 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 17 Apr 2024 23:19:31 -0400 Subject: [PATCH 054/305] The src error to `_raise_from_no_key_in_msg()` is always an attr-error now! --- tractor/_exceptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 65637fb5..4ace626f 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -935,7 +935,7 @@ def is_multi_cancelled(exc: BaseException) -> bool: def _raise_from_no_key_in_msg( ctx: Context, msg: MsgType, - src_err: KeyError, + src_err: AttributeError, log: StackLevelAdapter, # caller specific `log` obj expect_msg: str = Yield, @@ -994,7 +994,7 @@ def _raise_from_no_key_in_msg( ctx.chan, hide_tb=hide_tb, - ) from None + ) from src_err # `MsgStream` termination msg. # TODO: does it make more sense to pack -- 2.34.1 From cbb9bbcbca87b8ee67dee9fceec55c083d54a2e2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 12:47:28 -0400 Subject: [PATCH 055/305] Use `DebugStatus` around subactor lock requests Breaks out all the (sub)actor local conc primitives from `Lock` (which is now only used in and by the root actor) such that there's an explicit distinction between a task that's "consuming" the `Lock` (remotely) vs. the root-side service tasks which do the actual acquire on behalf of the requesters. `DebugStatus` changeover deats: ------ - ------ - move all the actor-local vars over `DebugStatus` including: - move `_trio_handler` and `_orig_sigint_handler` - `local_task_in_debug` now `repl_task` - `_debugger_request_cs` now `req_cs` - `local_pdb_complete` now `repl_release` - drop all ^ fields from `Lock.repr()` obvi.. - move over the `.[un]shield_sigint()` and `.is_main_trio_thread()` methods. - add some new attrs/meths: - `DebugStatus.repl` for the currently running `Pdb` in-actor singleton. - `.repr()` for pprint of state (like `Lock`). - Note: that even when a root-actor task is in REPL, the `DebugStatus` is still used for certain actor-local state mgmt, such as SIGINT handler shielding. - obvi change all lock-requester code bits to now use a `DebugStatus` in their local actor-state instead of `Lock`, i.e. change usage from `Lock` in `._runtime` and `._root`. - use new `Lock.get_locking_task_cs()` API in when checking for sub-in-debug from `._runtime.Actor._stream_handler()`. Unrelated to topic-at-hand tweaks: ------ - ------ - drop the commented bits about hiding `@[a]cm` stack frames from `_debug.pause()` and simplify to only one block with the `shield` passthrough since we already solved the issue with cancel-scopes using `@pdbp.hideframe` B) - this includes all the extra logging about the extra frame for the user (good thing i put in that wasted effort back then eh..) - put the `try/except BaseException` with `log.exception()` around the whole of `._pause()` to ensure we don't miss in-func errors which can cause hangs.. - allow passing in `portal: Portal` to `Actor.start_remote_task()` such that `Portal` task spawning methods are always denoted correctly in terms of `Context.side`. - lotsa logging tweaks, decreasing a bit of noise from `.runtime()`s. --- tractor/_root.py | 2 +- tractor/_runtime.py | 101 ++++---- tractor/devx/_debug.py | 573 +++++++++++++++++++---------------------- 3 files changed, 322 insertions(+), 354 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index 3209555e..377f494e 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -137,7 +137,7 @@ async def open_root_actor( # attempt to retreive ``trio``'s sigint handler and stash it # on our debugger lock state. - _debug.Lock._trio_handler = signal.getsignal(signal.SIGINT) + _debug.DebugStatus._trio_handler = signal.getsignal(signal.SIGINT) # mark top most level process as root actor _state._runtime_vars['_is_root'] = True diff --git a/tractor/_runtime.py b/tractor/_runtime.py index f61ec80d..4d675716 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -263,10 +263,13 @@ class Actor: self._listeners: list[trio.abc.Listener] = [] self._parent_chan: Channel|None = None self._forkserver_info: tuple|None = None + + # track each child/sub-actor in it's locally + # supervising nursery self._actoruid2nursery: dict[ - tuple[str, str], + tuple[str, str], # sub-`Actor.uid` ActorNursery|None, - ] = {} # type: ignore # noqa + ] = {} # when provided, init the registry addresses property from # input via the validator. @@ -661,12 +664,18 @@ class Actor: # TODO: NEEEDS TO BE TESTED! # actually, no idea if this ever even enters.. XD + # + # XXX => YES IT DOES, when i was testing ctl-c + # from broken debug TTY locking due to + # msg-spec races on application using RunVar... pdb_user_uid: tuple = pdb_lock.global_actor_in_debug if ( pdb_user_uid and local_nursery ): - entry: tuple|None = local_nursery._children.get(pdb_user_uid) + entry: tuple|None = local_nursery._children.get( + tuple(pdb_user_uid) + ) if entry: proc: trio.Process _, proc, _ = entry @@ -676,10 +685,10 @@ class Actor: and poll() is None ): log.cancel( - 'Root actor reports no-more-peers, BUT ' + 'Root actor reports no-more-peers, BUT\n' 'a DISCONNECTED child still has the debug ' - 'lock!\n' - f'root uid: {self.uid}\n' + 'lock!\n\n' + # f'root uid: {self.uid}\n' f'last disconnected child uid: {uid}\n' f'locking child uid: {pdb_user_uid}\n' ) @@ -705,9 +714,8 @@ class Actor: # if a now stale local task has the TTY lock still # we cancel it to allow servicing other requests for # the lock. - db_cs: trio.CancelScope|None = pdb_lock._root_local_task_cs_in_debug if ( - db_cs + (db_cs := pdb_lock.get_locking_task_cs()) and not db_cs.cancel_called and uid == pdb_user_uid ): @@ -744,7 +752,7 @@ class Actor: except KeyError: log.warning( 'Ignoring invalid IPC ctx msg!\n\n' - f'<= sender: {uid}\n' + f'<= sender: {uid}\n\n' # XXX don't need right since it's always in msg? # f'=> cid: {cid}\n\n' @@ -798,7 +806,7 @@ class Actor: cid, # side, )] - log.runtime( + log.debug( f'Retreived cached IPC ctx for\n' f'peer: {chan.uid}\n' f'cid:{cid}\n' @@ -837,10 +845,14 @@ class Actor: nsf: NamespacePath, kwargs: dict, + # determines `Context.side: str` + portal: Portal|None = None, + # IPC channel config msg_buffer_size: int|None = None, allow_overruns: bool = False, load_nsf: bool = False, + ack_timeout: float = 3, ) -> Context: ''' @@ -865,10 +877,12 @@ class Actor: msg_buffer_size=msg_buffer_size, allow_overruns=allow_overruns, ) + ctx._portal = portal if ( 'self' in nsf - or not load_nsf + or + not load_nsf ): ns, _, func = nsf.partition(':') else: @@ -876,42 +890,29 @@ class Actor: # -[ ] but, how to do `self:`?? ns, func = nsf.to_tuple() + msg = msgtypes.Start( + ns=ns, + func=func, + kwargs=kwargs, + uid=self.uid, + cid=cid, + ) log.runtime( - 'Sending cmd to\n' - f'peer: {chan.uid} => \n' - '\n' - f'=> {ns}.{func}({kwargs})\n' + 'Sending RPC start msg\n\n' + f'=> peer: {chan.uid}\n' + f' |_ {ns}.{func}({kwargs})\n' ) - await chan.send( - msgtypes.Start( - ns=ns, - func=func, - kwargs=kwargs, - uid=self.uid, - cid=cid, - ) - ) - # {'cmd': ( - # ns, - # func, - # kwargs, - # self.uid, - # cid, - # )} - # ) + await chan.send(msg) - # Wait on first response msg and validate; this should be - # immediate. - # first_msg: dict = await ctx._recv_chan.receive() - # functype: str = first_msg.get('functype') - - first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() + # NOTE wait on first `StartAck` response msg and validate; + # this should be immediate and does not (yet) wait for the + # remote child task to sync via `Context.started()`. + with trio.fail_after(ack_timeout): + first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() try: functype: str = first_msg.functype except AttributeError: raise unpack_error(first_msg, chan) - # if 'error' in first_msg: - # raise unpack_error(first_msg, chan) if functype not in ( 'asyncfunc', @@ -919,7 +920,7 @@ class Actor: 'context', ): raise ValueError( - f'{first_msg} is an invalid response packet?' + f'Invalid `StartAck.functype: str = {first_msg!r}` ??' ) ctx._remote_func_type = functype @@ -1164,7 +1165,7 @@ class Actor: # kill any debugger request task to avoid deadlock # with the root actor in this tree - dbcs = _debug.Lock._debugger_request_cs + dbcs = _debug.DebugStatus.req_cs if dbcs is not None: msg += ( '>> Cancelling active debugger request..\n' @@ -1239,9 +1240,9 @@ class Actor: except KeyError: # NOTE: during msging race conditions this will often # emit, some examples: - # - callee returns a result before cancel-msg/ctxc-raised - # - callee self raises ctxc before caller send request, - # - callee errors prior to cancel req. + # - child returns a result before cancel-msg/ctxc-raised + # - child self raises ctxc before parent send request, + # - child errors prior to cancel req. log.cancel( 'Cancel request invalid, RPC task already completed?\n\n' f'<= canceller: {requesting_uid}\n\n' @@ -1304,15 +1305,15 @@ class Actor: flow_info: str = ( f'<= canceller: {requesting_uid}\n' f'=> ipc-parent: {parent_chan}\n' - f' |_{ctx}\n' + f'|_{ctx}\n' ) log.runtime( - 'Waiting on RPC task to cancel\n' + 'Waiting on RPC task to cancel\n\n' f'{flow_info}' ) await is_complete.wait() log.runtime( - f'Sucessfully cancelled RPC task\n' + f'Sucessfully cancelled RPC task\n\n' f'{flow_info}' ) return True @@ -1538,8 +1539,8 @@ async def async_main( ''' # attempt to retreive ``trio``'s sigint handler and stash it - # on our debugger lock state. - _debug.Lock._trio_handler = signal.getsignal(signal.SIGINT) + # on our debugger state. + _debug.DebugStatus._trio_handler = signal.getsignal(signal.SIGINT) is_registered: bool = False try: diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 51e74379..e4ab7d83 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -160,12 +160,6 @@ class Lock: # placeholder for function to set a ``trio.Event`` on debugger exit # pdb_release_hook: Callable | None = None - _trio_handler: ( - Callable[[int, FrameType|None], Any] - |int - | None - ) = None - remote_task_in_debug: str|None = None @staticmethod @@ -188,12 +182,6 @@ class Lock: Lock._locking_task_cs = cs - # SUBACTOR ONLY - # ------ - ------- - local_task_in_debug: Task|None = None - _debugger_request_cs: trio.CancelScope|None = None - local_pdb_complete: trio.Event|None = None - # ROOT ONLY # ------ - ------- # the root-actor-ONLY singletons for, @@ -214,16 +202,12 @@ class Lock: _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() _blocked: set[tuple[str, str]] = set() # `Actor.uid` block list - # TODO: should go on `PbpREPL`? - _orig_sigint_handler: Callable | None = None - @classmethod def repr(cls) -> str: # both root and subs fields: str = ( f'repl: {cls.repl}\n' - f'local_repl_task: {cls.local_task_in_debug}\n' ) if is_root_process(): @@ -238,12 +222,6 @@ class Lock: f'_debug_lock: {cls._debug_lock}\n' f'lock_stats: {lock_stats}\n' ) - else: - fields += ( - f'local_task_in_debug: {cls.local_task_in_debug}\n' - f'local_pdb_complete: {cls.local_pdb_complete}\n' - f'_debugger_request_cs: {cls._debugger_request_cs}\n' - ) body: str = textwrap.indent( fields, @@ -255,7 +233,101 @@ class Lock: ')>' ) - # TODO: move to PdbREPL! + @classmethod + def release(cls): + try: + if not DebugStatus.is_main_trio_thread(): + trio.from_thread.run_sync( + cls._debug_lock.release + ) + else: + cls._debug_lock.release() + + except RuntimeError as rte: + # uhhh makes no sense but been seeing the non-owner + # release error even though this is definitely the task + # that locked? + owner = cls._debug_lock.statistics().owner + # if ( + # owner + # and + # cls.remote_task_in_debug is None + # ): + # raise RuntimeError( + # 'Stale `Lock` detected, no remote task active!?\n' + # f'|_{owner}\n' + # # f'{Lock}' + # ) from rte + + if owner: + raise rte + + # OW suppress, can't member why tho .. XD + # something somethin corrupts a cancel-scope + # somewhere.. + + try: + # sometimes the ``trio`` might already be terminated in + # which case this call will raise. + if DebugStatus.repl_release is not None: + DebugStatus.repl_release.set() + + finally: + cls.repl = None + cls.global_actor_in_debug = None + + # restore original sigint handler + DebugStatus.unshield_sigint() + # actor-local state, irrelevant for non-root. + DebugStatus.repl_task = None + + +# TODO: actually use this instead throughout for subs! +class DebugStatus: + ''' + Singleton-state for debugging machinery in a subactor. + + Composes conc primitives for syncing with a root actor to + acquire the tree-global (TTY) `Lock` such that only ever one + actor's task can have the REPL active at a given time. + + Methods to shield the process' `SIGINT` handler are used + whenever a local task is an active REPL. + + ''' + repl: PdbREPL|None = None + repl_task: Task|None = None + req_cs: trio.CancelScope|None = None + repl_release: trio.Event|None = None + + lock_status: LockStatus|None = None + + _orig_sigint_handler: Callable | None = None + _trio_handler: ( + Callable[[int, FrameType|None], Any] + |int + | None + ) = None + + + @classmethod + def repr(cls) -> str: + fields: str = ( + f'repl: {cls.repl}\n' + f'repl_task: {cls.repl_task}\n' + f'repl_release: {cls.repl_release}\n' + f'req_cs: {cls.req_cs}\n' + ) + body: str = textwrap.indent( + fields, + prefix=' |_', + ) + return ( + f'<{cls.__name__}(\n' + f'{body}' + ')>' + ) + @classmethod def shield_sigint(cls): ''' @@ -339,77 +411,6 @@ class Lock: # is not threading.main_thread() # ) - @classmethod - def release(cls): - try: - if not cls.is_main_trio_thread(): - trio.from_thread.run_sync( - cls._debug_lock.release - ) - else: - cls._debug_lock.release() - - except RuntimeError as rte: - # uhhh makes no sense but been seeing the non-owner - # release error even though this is definitely the task - # that locked? - owner = cls._debug_lock.statistics().owner - # if ( - # owner - # and - # cls.remote_task_in_debug is None - # ): - # raise RuntimeError( - # 'Stale `Lock` detected, no remote task active!?\n' - # f'|_{owner}\n' - # # f'{Lock}' - # ) from rte - - if owner: - raise rte - - # OW suppress, can't member why tho .. XD - # something somethin corrupts a cancel-scope - # somewhere.. - - try: - # sometimes the ``trio`` might already be terminated in - # which case this call will raise. - if cls.local_pdb_complete is not None: - cls.local_pdb_complete.set() - - finally: - # restore original sigint handler - cls.unshield_sigint() - cls.repl = None - - # actor-local state, irrelevant for non-root. - cls.global_actor_in_debug = None - cls.local_task_in_debug = None - - -# TODO: actually use this instead throughout for subs! -class DebugStatus: - ''' - Singleton-state for debugging machinery in a subactor. - - Composes conc primitives for syncing with a root actor to - acquire the tree-global (TTY) `Lock` such that only ever one - actor's task can have the REPL active at a given time. - - ''' - repl: PdbREPL|None = None - lock_status: LockStatus|None = None - - repl_task: Task|None = None - # local_task_in_debug: Task|None = None - - req_cs: trio.CancelScope|None = None - # _debugger_request_cs: trio.CancelScope|None = None - - repl_release: trio.Event|None = None - # local_pdb_complete: trio.Event|None = None - class TractorConfig(pdbp.DefaultConfig): ''' @@ -445,6 +446,7 @@ class PdbREPL(pdbp.Pdb): status = DebugStatus + # def preloop(self): # print('IN PRELOOP') # super().preloop() @@ -660,16 +662,19 @@ async def lock_tty_for_child( highly reliable at releasing the mutex complete! ''' - req_task_uid: tuple = tuple(subactor_task_uid) if req_task_uid in Lock._blocked: raise RuntimeError( + f'Double lock request!?\n' f'The same remote task already has an active request for TTY lock ??\n\n' f'task uid: {req_task_uid}\n' - f'subactor uid: {subactor_uid}\n' - ) + f'subactor uid: {subactor_uid}\n\n' - Lock._blocked.add(req_task_uid) + 'This might be mean that the requesting task ' + 'in `wait_for_parent_stdin_hijack()` may have crashed?\n' + 'Consider that an internal bug exists given the TTY ' + '`Lock`ing IPC dialog..\n' + ) root_task_name: str = current_task().name if tuple(subactor_uid) in Lock._blocked: @@ -695,8 +700,9 @@ async def lock_tty_for_child( f'subactor_uid: {subactor_uid}\n' f'remote task: {subactor_task_uid}\n' ) - Lock.shield_sigint() + DebugStatus.shield_sigint() try: + Lock._blocked.add(req_task_uid) with ( # NOTE: though a cs is created for every subactor lock # REQUEST in this ctx-child task, only the root-task @@ -708,6 +714,9 @@ async def lock_tty_for_child( # used to do so! trio.CancelScope(shield=True) as debug_lock_cs, + # TODO: make this ONLY limit the pld_spec such that we + # can on-error-decode-`.pld: Raw` fields in + # `Context._deliver_msg()`? _codec.limit_msg_spec( payload_spec=__msg_spec__, ) as codec, @@ -763,8 +772,9 @@ async def lock_tty_for_child( finally: debug_lock_cs.cancel() + Lock._blocked.remove(req_task_uid) Lock.set_locking_task_cs(None) - Lock.unshield_sigint() + DebugStatus.unshield_sigint() @cm @@ -817,7 +827,7 @@ async def wait_for_parent_stdin_hijack( trio.CancelScope(shield=True) as cs, apply_debug_codec(), ): - Lock._debugger_request_cs = cs + DebugStatus.req_cs = cs try: # TODO: merge into sync async with ? async with get_root() as portal: @@ -829,7 +839,7 @@ async def wait_for_parent_stdin_hijack( ) as (ctx, resp): log.pdb( - 'Subactor locked TTY per msg\n' + 'Subactor locked TTY with msg\n\n' f'{resp}\n' ) assert resp.subactor_uid == actor_uid @@ -837,12 +847,12 @@ async def wait_for_parent_stdin_hijack( async with ctx.open_stream() as stream: try: # to unblock local caller - assert Lock.local_pdb_complete + assert DebugStatus.repl_release task_status.started(cs) # wait for local task to exit and # release the REPL - await Lock.local_pdb_complete.wait() + await DebugStatus.repl_release.wait() finally: await stream.send( @@ -867,12 +877,12 @@ async def wait_for_parent_stdin_hijack( raise finally: - Lock.local_task_in_debug = None + DebugStatus.repl_task = None log.debug('Exiting debugger TTY lock request func from child') log.cancel('Reverting SIGINT handler!') - Lock.unshield_sigint() + DebugStatus.unshield_sigint() @@ -901,7 +911,7 @@ def mk_mpdb() -> PdbREPL: # in which case schedule the SIGINT shielding override # to in the main thread. # https://docs.python.org/3/library/signal.html#signals-and-threads - Lock.shield_sigint() + DebugStatus.shield_sigint() # XXX: These are the important flags mentioned in # https://github.com/python-trio/trio/issues/1155 @@ -1036,7 +1046,8 @@ def shield_sigint_handler( ) log.warning(message) - Lock.unshield_sigint() + # Lock.unshield_sigint() + DebugStatus.unshield_sigint() case_handled = True else: @@ -1064,7 +1075,7 @@ def shield_sigint_handler( if maybe_stale_lock_cs: lock_cs.cancel() - Lock.unshield_sigint() + DebugStatus.unshield_sigint() case_handled = True # child actor that has locked the debugger @@ -1086,11 +1097,11 @@ def shield_sigint_handler( f'{uid_in_debug}\n' 'Allowing SIGINT propagation..' ) - Lock.unshield_sigint() + DebugStatus.unshield_sigint() # do_cancel() case_handled = True - task: str|None = Lock.local_task_in_debug + task: str|None = DebugStatus.repl_task if ( task and @@ -1124,7 +1135,7 @@ def shield_sigint_handler( + 'Reverting handler to `trio` default!\n' ) - Lock.unshield_sigint() + DebugStatus.unshield_sigint() case_handled = True # XXX ensure that the reverted-to-handler actually is @@ -1200,32 +1211,15 @@ def _set_trace( pdb and actor is not None ) - # or shield ): - msg: str = _pause_msg - if shield: - # log.warning( - msg = ( - '\n\n' - ' ------ - ------\n' - 'Debugger invoked with `shield=True` so an extra\n' - '`trio.CancelScope.__exit__()` frame is shown..\n' - '\n' - 'Try going up one frame to see your pause point!\n' - '\n' - ' SORRY we need to fix this!\n' - ' ------ - ------\n\n' - ) + msg - - # pdbp.set_trace() # TODO: maybe print the actor supervion tree up to the # root here? Bo + log.pdb( - f'{msg}\n' + f'{_pause_msg}\n' '|\n' # TODO: make an `Actor.__repr()__` - # f'|_ {current_task()} @ {actor.name}\n' - f'|_ {current_task()}\n' + f'|_ {current_task()} @ {actor.uid}\n' ) # no f!#$&* idea, but when we're in async land # we need 2x frames up? @@ -1286,11 +1280,11 @@ async def _pause( # task_name: str = task.name if ( - not Lock.local_pdb_complete + not DebugStatus.repl_release or - Lock.local_pdb_complete.is_set() + DebugStatus.repl_release.is_set() ): - Lock.local_pdb_complete = trio.Event() + DebugStatus.repl_release = trio.Event() if debug_func is not None: debug_func = partial(debug_func) @@ -1333,71 +1327,14 @@ async def _pause( Lock.release() raise - except BaseException: - log.exception( - 'Failed to engage debugger via `_pause()` ??\n' - ) - raise + try: + if is_root_process(): - if is_root_process(): - - # we also wait in the root-parent for any child that - # may have the tty locked prior - # TODO: wait, what about multiple root tasks acquiring it though? - if Lock.global_actor_in_debug == actor.uid: - # re-entrant root process already has it: noop. - log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'ignoring..' - ) - await trio.lowlevel.checkpoint() - return - - # XXX: since we need to enter pdb synchronously below, - # we have to release the lock manually from pdb completion - # callbacks. Can't think of a nicer way then this atm. - if Lock._debug_lock.locked(): - log.warning( - 'attempting to shield-acquire active TTY lock' - f' owned by {Lock.global_actor_in_debug}' - ) - - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - with trio.CancelScope(shield=True): - await Lock._debug_lock.acquire() - else: - # may be cancelled - await Lock._debug_lock.acquire() - - Lock.global_actor_in_debug = actor.uid - Lock.local_task_in_debug = task - Lock.repl = pdb - - # enter REPL from root, no TTY locking IPC ctx necessary - _enter_repl_sync(debug_func) - return # next branch is mutex and for subactors - - # TODO: need a more robust check for the "root" actor - elif ( - not is_root_process() - and actor._parent_chan # a connected child - ): - if Lock.local_task_in_debug: - - # Recurrence entry case: this task already has the lock and - # is likely recurrently entering a breakpoint - # - # NOTE: noop on recurrent entry case but we want to trigger - # a checkpoint to allow other actors error-propagate and - # potetially avoid infinite re-entries in some - # subactor that would otherwise not bubble until the - # next checkpoint was hit. - if ( - (repl_task := Lock.local_task_in_debug) - and - repl_task is task - ): + # we also wait in the root-parent for any child that + # may have the tty locked prior + # TODO: wait, what about multiple root tasks acquiring it though? + if Lock.global_actor_in_debug == actor.uid: + # re-entrant root process already has it: noop. log.warning( f'{task.name}@{actor.uid} already has TTY lock\n' f'ignoring..' @@ -1405,79 +1342,137 @@ async def _pause( await trio.lowlevel.checkpoint() return - # if **this** actor is already in debug REPL we want - # to maintain actor-local-task mutex access, so block - # here waiting for the control to be released - this - # -> allows for recursive entries to `tractor.pause()` - log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'waiting for release..' - ) - await Lock.local_pdb_complete.wait() - await trio.sleep(0.1) - - # mark local actor as "in debug mode" to avoid recurrent - # entries/requests to the root process - Lock.local_task_in_debug = task - - # this **must** be awaited by the caller and is done using the - # root nursery so that the debugger can continue to run without - # being restricted by the scope of a new task nursery. - - # TODO: if we want to debug a trio.Cancelled triggered exception - # we have to figure out how to avoid having the service nursery - # cancel on this task start? I *think* this works below: - # ```python - # actor._service_n.cancel_scope.shield = shield - # ``` - # but not entirely sure if that's a sane way to implement it? - - # NOTE: MUST it here bc multiple tasks are spawned by any - # one sub-actor AND there will be a race between when the - # root locking task delivers the `Started(pld=LockStatus)` - # and when the REPL is actually entered here. SO ensure - # the codec is set before either are run! - # - with ( - # _codec.limit_msg_spec( - # payload_spec=__msg_spec__, - # ) as debug_codec, - trio.CancelScope(shield=shield), - ): - # async with trio.open_nursery() as tn: - # tn.cancel_scope.shield = True - try: - # cs: trio.CancelScope = await tn.start( - cs: trio.CancelScope = await actor._service_n.start( - wait_for_parent_stdin_hijack, - actor.uid, - (task.name, id(task)), + # XXX: since we need to enter pdb synchronously below, + # we have to release the lock manually from pdb completion + # callbacks. Can't think of a nicer way then this atm. + if Lock._debug_lock.locked(): + log.warning( + 'attempting to shield-acquire active TTY lock' + f' owned by {Lock.global_actor_in_debug}' ) - # our locker task should be the one in ctx - # with the root actor - assert Lock._debugger_request_cs is cs - # XXX used by the SIGINT handler to check if - # THIS actor is in REPL interaction - Lock.repl = pdb + # must shield here to avoid hitting a ``Cancelled`` and + # a child getting stuck bc we clobbered the tty + with trio.CancelScope(shield=True): + await Lock._debug_lock.acquire() + else: + # may be cancelled + await Lock._debug_lock.acquire() - except RuntimeError: - Lock.release() + Lock.global_actor_in_debug = actor.uid + DebugStatus.repl_task = task + DebugStatus.repl = Lock.repl = pdb - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. + # enter REPL from root, no TTY locking IPC ctx necessary + _enter_repl_sync(debug_func) + return # next branch is mutex and for subactors + + # TODO: need a more robust check for the "root" actor + elif ( + not is_root_process() + and actor._parent_chan # a connected child + ): + if DebugStatus.repl_task: + + # Recurrence entry case: this task already has the lock and + # is likely recurrently entering a breakpoint + # + # NOTE: noop on recurrent entry case but we want to trigger + # a checkpoint to allow other actors error-propagate and + # potetially avoid infinite re-entries in some + # subactor that would otherwise not bubble until the + # next checkpoint was hit. + if ( + (repl_task := DebugStatus.repl_task) + and + repl_task is task + ): + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) + await trio.lowlevel.checkpoint() return - raise + # if **this** actor is already in debug REPL we want + # to maintain actor-local-task mutex access, so block + # here waiting for the control to be released - this + # -> allows for recursive entries to `tractor.pause()` + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'waiting for release..' + ) + await DebugStatus.repl_release.wait() + await trio.sleep(0.1) - # enter REPL + # mark local actor as "in debug mode" to avoid recurrent + # entries/requests to the root process + DebugStatus.repl_task = task - try: - _enter_repl_sync(debug_func) - finally: - Lock.unshield_sigint() + # this **must** be awaited by the caller and is done using the + # root nursery so that the debugger can continue to run without + # being restricted by the scope of a new task nursery. + + # TODO: if we want to debug a trio.Cancelled triggered exception + # we have to figure out how to avoid having the service nursery + # cancel on this task start? I *think* this works below: + # ```python + # actor._service_n.cancel_scope.shield = shield + # ``` + # but not entirely sure if that's a sane way to implement it? + + # NOTE: MUST it here bc multiple tasks are spawned by any + # one sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered here. SO ensure + # the codec is set before either are run! + # + with ( + # _codec.limit_msg_spec( + # payload_spec=__msg_spec__, + # ) as debug_codec, + trio.CancelScope(shield=shield), + ): + # async with trio.open_nursery() as tn: + # tn.cancel_scope.shield = True + try: + # cs: trio.CancelScope = await tn.start( + cs: trio.CancelScope = await actor._service_n.start( + wait_for_parent_stdin_hijack, + actor.uid, + (task.name, id(task)), + ) + # our locker task should be the one in ctx + # with the root actor + assert DebugStatus.req_cs is cs + + # XXX used by the SIGINT handler to check if + # THIS actor is in REPL interaction + Lock.repl = pdb + + except RuntimeError: + Lock.release() + + if actor._cancel_called: + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + return + + raise + + # enter REPL + + try: + _enter_repl_sync(debug_func) + finally: + DebugStatus.unshield_sigint() + + except BaseException: + log.exception( + 'Failed to engage debugger via `_pause()` ??\n' + ) + raise # XXX: apparently we can't do this without showing this frame @@ -1527,45 +1522,16 @@ async def pause( ''' __tracebackhide__: bool = True - if shield: - # NOTE XXX: even hard coding this inside the `class CancelScope:` - # doesn't seem to work for me!? - # ^ XXX ^ + with trio.CancelScope( + shield=shield, + ) as cs: - # def _exit(self, *args, **kwargs): - # __tracebackhide__: bool = True - # super().__exit__(*args, **kwargs) - - # trio.CancelScope.__enter__.__tracebackhide__ = True - # trio.CancelScope.__exit__.__tracebackhide__ = True - - # import types - # with trio.CancelScope(shield=shield) as cs: - # cs.__exit__ = types.MethodType(_exit, cs) - # cs.__exit__.__tracebackhide__ = True - - # TODO: LOL, solved this with the `pdb.hideframe` stuff - # at top-of-mod.. so I guess we can just only use this - # block right? - with trio.CancelScope( - shield=shield, - ) as cs: - print(f'debug cs is {cs}\n') - # setattr(cs.__exit__.__func__, '__tracebackhide__', True) - # setattr(cs.__enter__.__func__, '__tracebackhide__', True) - - # NOTE: so the caller can always cancel even if shielded - task_status.started(cs) - return await _pause( - debug_func=debug_func, - shield=True, - task_status=task_status, - **_pause_kwargs - ) - else: + # NOTE: so the caller can always manually cancel even + # if shielded! + task_status.started(cs) return await _pause( debug_func=debug_func, - shield=False, + shield=shield, task_status=task_status, **_pause_kwargs ) @@ -1682,7 +1648,7 @@ def pause_from_sync( ) ) # TODO: maybe the `trio.current_task()` id/name if avail? - Lock.local_task_in_debug: str = str(threading.current_thread()) + DebugStatus.repl_task: str = str(threading.current_thread()) else: # we are presumably the `trio.run()` + main thread greenback.await_( @@ -1692,7 +1658,7 @@ def pause_from_sync( hide_tb=hide_tb, ) ) - Lock.local_task_in_debug: str = current_task() + DebugStatus.repl_task: str = current_task() # TODO: ensure we aggressively make the user aware about # entering the global ``breakpoint()`` built-in from sync @@ -1754,7 +1720,8 @@ def _post_mortem( log.pdb( f'{_crash_msg}\n' '|\n' - f'|_ {current_task()}\n' + # f'|_ {current_task()}\n' + f'|_ {current_task()} @ {actor.uid}\n' # f'|_ @{actor.uid}\n' # TODO: make an `Actor.__repr()__` -- 2.34.1 From 7d71fce558dbd1d42ef483321377c9529487be18 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:10:23 -0400 Subject: [PATCH 056/305] `NamespacePath._mk_fqnp()` handle `__mod__` for methods Need to use `__self__.__mod__` in the method case i guess.. --- tractor/msg/ptr.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tractor/msg/ptr.py b/tractor/msg/ptr.py index 4d089c3e..abe5406e 100644 --- a/tractor/msg/ptr.py +++ b/tractor/msg/ptr.py @@ -76,9 +76,11 @@ class NamespacePath(str): return self._ref @staticmethod - def _mk_fqnp(ref: type | object) -> tuple[str, str]: + def _mk_fqnp( + ref: type|object, + ) -> tuple[str, str]: ''' - Generate a minial ``str`` pair which describes a python + Generate a minial `str` pair which describes a python object's namespace path and object/type name. In more precise terms something like: @@ -87,10 +89,9 @@ class NamespacePath(str): of THIS type XD ''' - if ( - isfunction(ref) - ): + if isfunction(ref): name: str = getattr(ref, '__name__') + mod_name: str = ref.__module__ elif ismethod(ref): # build out the path manually i guess..? @@ -99,15 +100,19 @@ class NamespacePath(str): type(ref.__self__).__name__, ref.__func__.__name__, ]) + mod_name: str = ref.__self__.__module__ else: # object or other? # isinstance(ref, object) # and not isfunction(ref) name: str = type(ref).__name__ + mod_name: str = ref.__module__ + # TODO: return static value direactly? + # # fully qualified namespace path, tuple. fqnp: tuple[str, str] = ( - ref.__module__, + mod_name, name, ) return fqnp @@ -115,7 +120,7 @@ class NamespacePath(str): @classmethod def from_ref( cls, - ref: type | object, + ref: type|object, ) -> NamespacePath: -- 2.34.1 From be0ded2a22b2679d3e4ffe2af7d359d94173cf52 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:17:50 -0400 Subject: [PATCH 057/305] Annotate nursery and portal methods for `CallerInfo` scanning --- tractor/_portal.py | 27 +++++++++++++++++++++------ tractor/_supervise.py | 9 ++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index e4db93a6..052dd8ef 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -161,17 +161,18 @@ class Portal: self._expect_result = await self.actor.start_remote_task( self.channel, nsf=NamespacePath(f'{ns}:{func}'), - kwargs=kwargs + kwargs=kwargs, + portal=self, ) async def _return_once( self, ctx: Context, - ) -> dict[str, Any]: + ) -> Return: assert ctx._remote_func_type == 'asyncfunc' # single response - msg: dict = await ctx._recv_chan.receive() + msg: Return = await ctx._recv_chan.receive() return msg async def result(self) -> Any: @@ -247,6 +248,8 @@ class Portal: purpose. ''' + __runtimeframe__: int = 1 # noqa + chan: Channel = self.channel if not chan.connected(): log.runtime( @@ -324,16 +327,18 @@ class Portal: internals! ''' + __runtimeframe__: int = 1 # noqa nsf = NamespacePath( f'{namespace_path}:{function_name}' ) - ctx = await self.actor.start_remote_task( + ctx: Context = await self.actor.start_remote_task( chan=self.channel, nsf=nsf, kwargs=kwargs, + portal=self, ) - ctx._portal = self - msg = await self._return_once(ctx) + ctx._portal: Portal = self + msg: Return = await self._return_once(ctx) return _unwrap_msg( msg, self.channel, @@ -384,6 +389,7 @@ class Portal: self.channel, nsf=nsf, kwargs=kwargs, + portal=self, ) ctx._portal = self return _unwrap_msg( @@ -398,6 +404,14 @@ class Portal: **kwargs, ) -> AsyncGenerator[MsgStream, None]: + ''' + Legacy one-way streaming API. + + TODO: re-impl on top `Portal.open_context()` + an async gen + around `Context.open_stream()`. + + ''' + __runtimeframe__: int = 1 # noqa if not inspect.isasyncgenfunction(async_gen_func): if not ( @@ -411,6 +425,7 @@ class Portal: self.channel, nsf=NamespacePath.from_ref(async_gen_func), kwargs=kwargs, + portal=self, ) ctx._portal = self diff --git a/tractor/_supervise.py b/tractor/_supervise.py index be81e4e6..dc65cc65 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -131,7 +131,12 @@ class ActorNursery: "main task" besides the runtime. ''' - loglevel = loglevel or self._actor.loglevel or get_loglevel() + __runtimeframe__: int = 1 # noqa + loglevel: str = ( + loglevel + or self._actor.loglevel + or get_loglevel() + ) # configure and pass runtime state _rtv = _state._runtime_vars.copy() @@ -209,6 +214,7 @@ class ActorNursery: the actor is terminated. ''' + __runtimeframe__: int = 1 # noqa mod_path: str = fn.__module__ if name is None: @@ -257,6 +263,7 @@ class ActorNursery: directly without any far end graceful ``trio`` cancellation. ''' + __runtimeframe__: int = 1 # noqa self.cancelled = True # TODO: impl a repr for spawn more compact -- 2.34.1 From 41499c6d9edd69fa13f85ce6772193be2b45d840 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:18:29 -0400 Subject: [PATCH 058/305] TOSQUASH 77a15eb use `DebugStatus` in `._rpc` --- tractor/_rpc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 7e259c1e..5970a101 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -810,7 +810,7 @@ async def process_messages( # should use it? # https://github.com/python-trio/trio/issues/467 log.runtime( - 'Entering IPC msg loop:\n' + 'Entering RPC msg loop:\n' f'peer: {chan.uid}\n' f'|_{chan}\n' ) @@ -872,7 +872,7 @@ async def process_messages( # XXX NOTE XXX don't start entire actor # runtime cancellation if this actor is # currently in debug mode! - pdb_complete: trio.Event|None = _debug.Lock.local_pdb_complete + pdb_complete: trio.Event|None = _debug.DebugStatus.repl_release if pdb_complete: await pdb_complete.wait() @@ -1069,7 +1069,7 @@ async def process_messages( log.exception(message) raise RuntimeError(message) - log.runtime( + log.transport( 'Waiting on next IPC msg from\n' f'peer: {chan.uid}\n' f'|_{chan}\n' -- 2.34.1 From 69b509d09e6a46a77b93292cc0180bde0fee30bd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:40:26 -0400 Subject: [PATCH 059/305] Add some `bytes` annots --- tractor/_ipc.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index a5b44a4e..f76d4ef5 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -314,8 +314,7 @@ class MsgpackTCPStream(MsgTransport): while True: try: - header = await self.recv_stream.receive_exactly(4) - + header: bytes = await self.recv_stream.receive_exactly(4) except ( ValueError, ConnectionResetError, @@ -337,8 +336,7 @@ class MsgpackTCPStream(MsgTransport): size, = struct.unpack(" Date: Thu, 18 Apr 2024 15:41:06 -0400 Subject: [PATCH 060/305] Tweak `current_actor()` failure msg --- tractor/_state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tractor/_state.py b/tractor/_state.py index b76e8ac9..30346a6a 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -66,7 +66,7 @@ def current_actor( err_on_no_runtime and _current_actor is None ): - msg: str = 'No local actor has been initialized yet' + msg: str = 'No local actor has been initialized yet?\n' from ._exceptions import NoRuntime if last := last_actor(): @@ -79,8 +79,8 @@ def current_actor( # this process. else: msg += ( - 'No last actor found?\n' - 'Did you forget to open one of:\n\n' + # 'No last actor found?\n' + '\nDid you forget to call one of,\n' '- `tractor.open_root_actor()`\n' '- `tractor.open_nursery()`\n' ) -- 2.34.1 From 5bf27aca2c0537a4f94a6e5b1981aeb2cb6ab76c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 15:53:34 -0400 Subject: [PATCH 061/305] Proto in new `Context` refinements As per some newly added features and APIs: - pass `portal: Portal` to `Actor.start_remote_task()` from `open_context_from_portal()` marking `Portal.open_context()` as always being the "parent" task side. - add caller tracing via `.devx._code.CallerInfo/.find_caller_info()` called in `mk_context()` and (for now) a `__runtimeframe__: int = 2` inside `open_context_from_portal()` such that any enter-er of `Portal.open_context()` will be reported. - pass in a new `._caller_info` attr which is used in 2 new meths: - `.repr_caller: str` for showing the name of the app-code-func. - `.repr_api: str` for showing the API ep, which for now we just hardcode to `Portal.open_context()` since ow its gonna show the mod func name `open_context_from_portal()`. - use those new props ^ in the `._deliver_msg()` flow body log msg content for much clearer msg-flow tracing Bo - add `Context._cancel_on_msgerr: bool` to toggle whether a delivered `MsgTypeError` should trigger a `._scope.cancel()` call. - also (temporarily) add separate `.cancel()` emissions for both cases as i work through hacking out the maybe `MsgType.pld: Raw` support. --- tractor/_context.py | 147 +++++++++++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 36 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 052c198d..9e5fe6c1 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -26,6 +26,7 @@ disjoint, parallel executing tasks in separate actors. from __future__ import annotations from collections import deque from contextlib import asynccontextmanager as acm +from contextvars import ContextVar from dataclasses import ( dataclass, field, @@ -56,6 +57,7 @@ from ._exceptions import ( ) from .log import get_logger from .msg import ( + _codec, Error, MsgType, MsgCodec, @@ -80,6 +82,9 @@ if TYPE_CHECKING: from ._portal import Portal from ._runtime import Actor from ._ipc import MsgTransport + from .devx._code import ( + CallerInfo, + ) log = get_logger(__name__) @@ -499,6 +504,18 @@ class Context: _started_called: bool = False _stream_opened: bool = False _stream: MsgStream|None = None + _pld_codec_var: ContextVar[MsgCodec] = ContextVar( + 'pld_codec', + default=_codec._def_msgspec_codec, # i.e. `Any`-payloads + ) + + @property + def pld_codec(self) -> MsgCodec|None: + return self._pld_codec_var.get() + + # caller of `Portal.open_context()` for + # logging purposes mostly + _caller_info: CallerInfo|None = None # overrun handling machinery # NOTE: none of this provides "backpressure" to the remote @@ -525,6 +542,7 @@ class Context: # TODO: figure out how we can enforce this without losing our minds.. _strict_started: bool = False + _cancel_on_msgerr: bool = True def __str__(self) -> str: ds: str = '=' @@ -857,6 +875,7 @@ class Context: # TODO: never do this right? # if self._remote_error: # return + peer_side: str = self.peer_side(self.side) # XXX: denote and set the remote side's error so that # after we cancel whatever task is the opener of this @@ -864,14 +883,15 @@ class Context: # appropriately. log.runtime( 'Setting remote error for ctx\n\n' - f'<= remote ctx uid: {self.chan.uid}\n' - f'=>{error}' + f'<= {peer_side!r}: {self.chan.uid}\n' + f'=> {self.side!r}\n\n' + f'{error}' ) self._remote_error: BaseException = error # self-cancel (ack) or, # peer propagated remote cancellation. - msgtyperr: bool = False + msgerr: bool = False if isinstance(error, ContextCancelled): whom: str = ( @@ -884,7 +904,7 @@ class Context: ) elif isinstance(error, MsgTypeError): - msgtyperr = True + msgerr = True peer_side: str = self.peer_side(self.side) log.error( f'IPC dialog error due to msg-type caused by {peer_side!r} side\n\n' @@ -935,13 +955,24 @@ class Context: and not self._is_self_cancelled() and not cs.cancel_called and not cs.cancelled_caught - and not msgtyperr + and ( + msgerr + and + # NOTE: allow user to config not cancelling the + # local scope on `MsgTypeError`s + self._cancel_on_msgerr + ) ): # TODO: it'd sure be handy to inject our own # `trio.Cancelled` subtype here ;) # https://github.com/goodboy/tractor/issues/368 + log.cancel('Cancelling local `.open_context()` scope!') self._scope.cancel() + else: + log.cancel('NOT cancelling local `.open_context()` scope!') + + # TODO: maybe we should also call `._res_scope.cancel()` if it # exists to support cancelling any drain loop hangs? # NOTE: this usage actually works here B) @@ -969,9 +1000,7 @@ class Context: dmaddr = dst_maddr @property - def repr_rpc( - self, - ) -> str: + def repr_rpc(self) -> str: # TODO: how to show the transport interchange fmt? # codec: str = self.chan.transport.codec_key outcome_str: str = self.repr_outcome( @@ -983,6 +1012,27 @@ class Context: f'{self._nsf}() -> {outcome_str}:' ) + @property + def repr_caller(self) -> str: + ci: CallerInfo|None = self._caller_info + if ci: + return ( + f'{ci.caller_nsp}()' + # f'|_api: {ci.api_nsp}' + ) + + return '' + + @property + def repr_api(self) -> str: + # ci: CallerInfo|None = self._caller_info + # if ci: + # return ( + # f'{ci.api_nsp}()\n' + # ) + + return 'Portal.open_context()' + async def cancel( self, timeout: float = 0.616, @@ -1187,8 +1237,9 @@ class Context: ) # NOTE: in one way streaming this only happens on the - # caller side inside `Actor.start_remote_task()` so if you try - # to send a stop from the caller to the callee in the + # parent-ctx-task side (on the side that calls + # `Actor.start_remote_task()`) so if you try to send + # a stop from the caller to the callee in the # single-direction-stream case you'll get a lookup error # currently. ctx: Context = actor.get_context( @@ -1853,6 +1904,19 @@ class Context: send_chan: trio.MemorySendChannel = self._send_chan nsf: NamespacePath = self._nsf + side: str = self.side + if side == 'child': + assert not self._portal + peer_side: str = self.peer_side(side) + + flow_body: str = ( + f'<= peer {peer_side!r}: {from_uid}\n' + f' |_<{nsf}()>\n\n' + + f'=> {side!r}: {self._task}\n' + f' |_<{self.repr_api} @ {self.repr_caller}>\n\n' + ) + re: Exception|None if re := unpack_error( msg, @@ -1863,18 +1927,10 @@ class Context: else: log_meth = log.runtime - side: str = self.side - - peer_side: str = self.peer_side(side) - log_meth( f'Delivering IPC ctx error from {peer_side!r} to {side!r} task\n\n' - f'<= peer {peer_side!r}: {from_uid}\n' - f' |_ {nsf}()\n\n' - - f'=> {side!r} cid: {cid}\n' - f' |_{self._task}\n\n' + f'{flow_body}' f'{pformat(re)}\n' ) @@ -1887,30 +1943,27 @@ class Context: # or `RemoteActorError`). self._maybe_cancel_and_set_remote_error(re) - # XXX only case where returning early is fine! + # TODO: expose as mod func instead! structfmt = pretty_struct.Struct.pformat if self._in_overrun: log.warning( - f'Queueing OVERRUN msg on caller task:\n' - f'<= peer: {from_uid}\n' - f' |_ {nsf}()\n\n' + f'Queueing OVERRUN msg on caller task:\n\n' - f'=> cid: {cid}\n' - f' |_{self._task}\n\n' + f'{flow_body}' f'{structfmt(msg)}\n' ) self._overflow_q.append(msg) + + # XXX NOTE XXX + # overrun is the ONLY case where returning early is fine! return False try: log.runtime( f'Delivering msg from IPC ctx:\n\n' - f'<= {from_uid}\n' - f' |_ {nsf}()\n\n' - f'=> {self._task}\n' - f' |_cid={self.cid}\n\n' + f'{flow_body}' f'{structfmt(msg)}\n' ) @@ -1942,6 +1995,7 @@ class Context: f'cid: {self.cid}\n' 'Failed to deliver msg:\n' f'send_chan: {send_chan}\n\n' + f'{pformat(msg)}\n' ) return False @@ -2095,6 +2149,12 @@ async def open_context_from_portal( ''' __tracebackhide__: bool = hide_tb + # denote this frame as a "runtime frame" for stack + # introspection where we report the caller code in logging + # and error message content. + # NOTE: 2 bc of the wrapping `@acm` + __runtimeframe__: int = 2 # noqa + # conduct target func method structural checks if not inspect.iscoroutinefunction(func) and ( getattr(func, '_tractor_contex_function', False) @@ -2122,6 +2182,8 @@ async def open_context_from_portal( nsf=nsf, kwargs=kwargs, + portal=portal, + # NOTE: it's imporant to expose this since you might # get the case where the parent who opened the context does # not open a stream until after some slow startup/init @@ -2132,13 +2194,17 @@ async def open_context_from_portal( # place.. allow_overruns=allow_overruns, ) - # ASAP, so that `Context.side: str` can be determined for - # logging / tracing / debug! - ctx._portal: Portal = portal - assert ctx._remote_func_type == 'context' - msg: Started = await ctx._recv_chan.receive() + assert ctx._caller_info + # XXX NOTE since `._scope` is NOT set BEFORE we retreive the + # `Started`-msg any cancellation triggered + # in `._maybe_cancel_and_set_remote_error()` will + # NOT actually cancel the below line! + # -> it's expected that if there is an error in this phase of + # the dialog, the `Error` msg should be raised from the `msg` + # handling block below. + msg: Started = await ctx._recv_chan.receive() try: # the "first" value here is delivered by the callee's # ``Context.started()`` call. @@ -2148,6 +2214,7 @@ async def open_context_from_portal( # except KeyError as src_error: except AttributeError as src_error: + log.exception('Raising from unexpected msg!\n') _raise_from_no_key_in_msg( ctx=ctx, msg=msg, @@ -2573,7 +2640,6 @@ async def open_context_from_portal( None, ) - def mk_context( chan: Channel, cid: str, @@ -2595,6 +2661,10 @@ def mk_context( recv_chan: trio.MemoryReceiveChannel send_chan, recv_chan = trio.open_memory_channel(msg_buffer_size) + # TODO: only scan caller-info if log level so high! + from .devx._code import find_caller_info + caller_info: CallerInfo|None = find_caller_info() + ctx = Context( chan=chan, cid=cid, @@ -2603,6 +2673,7 @@ def mk_context( _recv_chan=recv_chan, _nsf=nsf, _task=trio.lowlevel.current_task(), + _caller_info=caller_info, **kwargs, ) # TODO: we can drop the old placeholder yah? @@ -2613,7 +2684,11 @@ def mk_context( def context(func: Callable) -> Callable: ''' - Mark an async function as a streaming routine with ``@context``. + Mark an (async) function as an SC-supervised, inter-`Actor`, + child-`trio.Task`, IPC endpoint otherwise known more + colloquially as a (RPC) "context". + + Functions annotated the fundamental IPC endpoint type offered by `tractor`. ''' # TODO: apply whatever solution ``mypy`` ends up picking for this: -- 2.34.1 From 97fc2a6628778955c75e7cff562d35f8fac58bf9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 18 Apr 2024 16:24:59 -0400 Subject: [PATCH 062/305] Go back to `ContextVar` for codec mgmt Turns out we do want per-task inheritance particularly if there's to be per `Context` dynamic mutation of the spec; we don't want mutation in some task to affect any parent/global setting. Turns out since we use a common "feeder task" in the rpc loop, we need to offer a per `Context` payload decoder sys anyway in order to enable per-task controls for inter-actor multi-task-ctx scenarios. --- tractor/msg/_codec.py | 126 +++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 51 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 82fd2011..766a297a 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -33,25 +33,29 @@ from __future__ import annotations from contextlib import ( contextmanager as cm, ) -# from contextvars import ( -# ContextVar, -# Token, -# ) +from contextvars import ( + ContextVar, + Token, +) import textwrap from typing import ( Any, Callable, Type, + TYPE_CHECKING, Union, ) from types import ModuleType import msgspec -from msgspec import msgpack -from trio.lowlevel import ( - RunVar, - RunVarToken, +from msgspec import ( + msgpack, + # Raw, ) +# from trio.lowlevel import ( +# RunVar, +# RunVarToken, +# ) # TODO: see notes below from @mikenerone.. # from tricycle import TreeVar @@ -62,6 +66,9 @@ from tractor.msg.types import ( ) from tractor.log import get_logger +if TYPE_CHECKING: + from tractor._context import Context + log = get_logger(__name__) # TODO: overall IPC msg-spec features (i.e. in this mod)! @@ -157,24 +164,6 @@ class MsgCodec(Struct): lib: ModuleType = msgspec - # TODO: a sub-decoder system as well? - # payload_msg_specs: Union[Type[Struct]] = Any - # see related comments in `.msg.types` - # _payload_decs: ( - # dict[ - # str, - # msgpack.Decoder, - # ] - # |None - # ) = None - # OR - # ) = { - # # pre-seed decoders for std-py-type-set for use when - # # `MsgType.pld == None|Any`. - # None: msgpack.Decoder(Any), - # Any: msgpack.Decoder(Any), - # } - # TODO: use `functools.cached_property` for these ? # https://docs.python.org/3/library/functools.html#functools.cached_property @property @@ -210,7 +199,25 @@ class MsgCodec(Struct): # https://jcristharif.com/msgspec/usage.html#typed-decoding return self._dec.decode(msg) - # TODO: do we still want to try and support the sub-decoder with + # TODO: a sub-decoder system as well? + # payload_msg_specs: Union[Type[Struct]] = Any + # see related comments in `.msg.types` + # _payload_decs: ( + # dict[ + # str, + # msgpack.Decoder, + # ] + # |None + # ) = None + # OR + # ) = { + # # pre-seed decoders for std-py-type-set for use when + # # `MsgType.pld == None|Any`. + # None: msgpack.Decoder(Any), + # Any: msgpack.Decoder(Any), + # } + # + # -[ ] do we still want to try and support the sub-decoder with # `.Raw` technique in the case that the `Generic` approach gives # future grief? # @@ -429,6 +436,9 @@ _def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # _def_tractor_codec: MsgCodec = mk_codec( ipc_pld_spec=Any, + + # TODO: use this for debug mode locking prot? + # ipc_pld_spec=Raw, ) # TODO: IDEALLY provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing @@ -462,11 +472,9 @@ _def_tractor_codec: MsgCodec = mk_codec( # TODO: STOP USING THIS, since it's basically a global and won't # allow sub-IPC-ctxs to limit the msg-spec however desired.. -_ctxvar_MsgCodec: MsgCodec = RunVar( +# _ctxvar_MsgCodec: MsgCodec = RunVar( +_ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( 'msgspec_codec', - - # TODO: move this to our new `Msg`-spec! - # default=_def_msgspec_codec, default=_def_tractor_codec, ) @@ -475,23 +483,36 @@ _ctxvar_MsgCodec: MsgCodec = RunVar( def apply_codec( codec: MsgCodec, + ctx: Context|None = None, + ) -> MsgCodec: ''' - Dynamically apply a `MsgCodec` to the current task's - runtime context such that all IPC msgs are processed - with it for that task. + Dynamically apply a `MsgCodec` to the current task's runtime + context such that all (of a certain class of payload + containing i.e. `MsgType.pld: PayloadT`) IPC msgs are + processed with it for that task. + + Uses a `contextvars.ContextVar` to ensure the scope of any + codec setting matches the current `Context` or + `._rpc.process_messages()` feeder task's prior setting without + mutating any surrounding scope. + + When a `ctx` is supplied, only mod its `Context.pld_codec`. - Uses a `tricycle.TreeVar` to ensure the scope of the codec matches the `@cm` block and DOES NOT change to the original (default) value in new tasks (as it does for `ContextVar`). - See the docs: - - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables - - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py - ''' __tracebackhide__: bool = True - orig: MsgCodec = _ctxvar_MsgCodec.get() + + if ctx is not None: + var: ContextVar = ctx._var_pld_codec + else: + # use IPC channel-connection "global" codec + var: ContextVar = _ctxvar_MsgCodec + + orig: MsgCodec = var.get() + assert orig is not codec if codec.pld_spec is None: breakpoint() @@ -500,22 +521,25 @@ def apply_codec( 'Applying new msg-spec codec\n\n' f'{codec}\n' ) - token: RunVarToken = _ctxvar_MsgCodec.set(codec) + token: Token = var.set(codec) - # TODO: for TreeVar approach, see docs for @cm `.being()` API: - # https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables - # try: - # with _ctxvar_MsgCodec.being(codec): - # new = _ctxvar_MsgCodec.get() - # assert new is codec - # yield codec + # ?TODO? for TreeVar approach which copies from the + # cancel-scope of the prior value, NOT the prior task + # See the docs: + # - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables + # - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py + # ^- see docs for @cm `.being()` API + # with _ctxvar_MsgCodec.being(codec): + # new = _ctxvar_MsgCodec.get() + # assert new is codec + # yield codec try: - yield _ctxvar_MsgCodec.get() + yield var.get() finally: - _ctxvar_MsgCodec.reset(token) + var.reset(token) - assert _ctxvar_MsgCodec.get() is orig + assert var.get() is orig log.info( 'Reverted to last msg-spec codec\n\n' f'{orig}\n' -- 2.34.1 From d982daa886df7708a2027189d661a347a4bf1b43 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 21 Apr 2024 17:02:39 -0400 Subject: [PATCH 063/305] Mark `.pld` msgs as also taking `msgspec.Raw` --- tractor/msg/types.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index f7654f62..59ec2a4e 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -38,6 +38,7 @@ from typing import ( from msgspec import ( defstruct, # field, + Raw, Struct, # UNSET, # UnsetType, @@ -105,7 +106,7 @@ class Msg( # TODO: could also be set to `msgspec.Raw` if the sub-decoders # approach is preferred over the generic parameterization # approach as take by `mk_msg_spec()` below. - pld: PayloadT + pld: PayloadT|Raw class Aid( @@ -332,7 +333,7 @@ class Started( decorated IPC endpoint. ''' - pld: PayloadT + pld: PayloadT|Raw # TODO: instead of using our existing `Start` @@ -349,7 +350,7 @@ class Yield( Per IPC transmission of a value from `await MsgStream.send()`. ''' - pld: PayloadT + pld: PayloadT|Raw class Stop( @@ -377,7 +378,7 @@ class Return( func-as-`trio.Task`. ''' - pld: PayloadT + pld: PayloadT|Raw class CancelAck( @@ -710,7 +711,9 @@ def mk_msg_spec( ) return ( ipc_spec, - msgtypes_table[spec_build_method] + ipc_msg_types, + msgtypes_table[spec_build_method] + + + ipc_msg_types, ) -- 2.34.1 From 2ed43373c5015cb1a4d29ac630a9bc381e5646bd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 21 Apr 2024 17:08:27 -0400 Subject: [PATCH 064/305] Drop more `dict`-msg cruft from `._exceptions` --- tractor/_exceptions.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 4ace626f..90163241 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -170,7 +170,7 @@ def pformat_boxed_tb( f' ------ - ------\n\n' # f'{tb_str}\n' f'{tb_body}' - f' ------ - ------\n' + f' ------ - ------\n' f'_|\n' ) tb_box_indent: str = ( @@ -972,8 +972,6 @@ def _raise_from_no_key_in_msg( # an internal error should never get here try: cid: str = msg.cid - # cid: str = msg['cid'] - # except KeyError as src_err: except AttributeError as src_err: raise MessagingError( f'IPC `Context` rx-ed msg without a ctx-id (cid)!?\n' @@ -985,7 +983,6 @@ def _raise_from_no_key_in_msg( # TODO: test that shows stream raising an expected error!!! # raise the error message in a boxed exception type! - # if msg.get('error'): if isinstance(msg, Error): # match msg: # case Error(): @@ -1001,7 +998,6 @@ def _raise_from_no_key_in_msg( # the stream._eoc outside this in the calleer always? # case Stop(): elif ( - # msg.get('stop') isinstance(msg, Stop) or ( stream -- 2.34.1 From baee80865405c84e63551ce7694aa82b710ec1bc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 22 Apr 2024 18:01:09 -0400 Subject: [PATCH 065/305] Move `MsgTypeError` maker func to `._exceptions` Since it's going to be used from the IPC primitive APIs (`Context`/`MsgStream`) for similarly handling payload type spec validation errors and bc it's really not well situation in the IPC module XD Summary of (impl) tweaks: - obvi move `_mk_msg_type_err()` and import and use it in `._ipc`; ends up avoiding a lot of ad-hoc imports we had from `._exceptions` anyway! - mask out "new codec" runtime log emission from `MsgpackTCPStream`. - allow passing a (coming in next commit) `codec: MsgDec` (message decoder) which supports the same required `.pld_spec_str: str` attr. - for send side logging use existing `MsgCodec..pformat_msg_spec()`. - rename `_raise_from_no_key_in_msg()` to the now more appropriate `_raise_from_unexpected_msg()`, but leaving alias for now. --- tractor/_exceptions.py | 136 ++++++++++++++++++++++++++++++++++++++++- tractor/_ipc.py | 130 ++------------------------------------- 2 files changed, 138 insertions(+), 128 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 90163241..b2ba6e84 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -24,6 +24,7 @@ import importlib from pprint import pformat from typing import ( Any, + Callable, Type, TYPE_CHECKING, ) @@ -32,8 +33,11 @@ import traceback import trio from msgspec import ( - structs, defstruct, + msgpack, + Raw, + structs, + ValidationError, ) from tractor._state import current_actor @@ -44,6 +48,8 @@ from tractor.msg import ( Stop, Yield, types as msgtypes, + MsgCodec, + MsgDec, ) from tractor.msg.pretty_struct import ( iter_fields, @@ -932,7 +938,7 @@ def is_multi_cancelled(exc: BaseException) -> bool: return False -def _raise_from_no_key_in_msg( +def _raise_from_unexpected_msg( ctx: Context, msg: MsgType, src_err: AttributeError, @@ -1032,7 +1038,6 @@ def _raise_from_no_key_in_msg( # that arrived which is probably the source of this stream # closure ctx.maybe_raise() - raise eoc from src_err if ( @@ -1052,3 +1057,128 @@ def _raise_from_no_key_in_msg( " BUT received a non-error msg:\n" f'{pformat(msg)}' ) from src_err + + +_raise_from_no_key_in_msg = _raise_from_unexpected_msg + + +def _mk_msg_type_err( + msg: Any|bytes|Raw, + codec: MsgCodec|MsgDec, + + message: str|None = None, + verb_header: str = '', + + src_validation_error: ValidationError|None = None, + src_type_error: TypeError|None = None, + +) -> MsgTypeError: + ''' + Compose a `MsgTypeError` from an input runtime context. + + ''' + # `Channel.send()` case + if src_validation_error is None: + + if isinstance(codec, MsgDec): + raise RuntimeError( + '`codec` must be a `MsgCodec` for send-side errors?' + ) + + # no src error from `msgspec.msgpack.Decoder.decode()` so + # prolly a manual type-check on our part. + if message is None: + fmt_spec: str = codec.pformat_msg_spec() + fmt_stack: str = ( + '\n'.join(traceback.format_stack(limit=3)) + ) + tb_fmt: str = pformat_boxed_tb( + tb_str=fmt_stack, + # fields_str=header, + field_prefix=' ', + indent='', + ) + message: str = ( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + # f' ------ - ------\n' + f'{fmt_spec}\n', + ) + elif src_type_error: + src_message: str = str(src_type_error) + patt: str = 'type ' + type_idx: int = src_message.find('type ') + invalid_type: str = src_message[type_idx + len(patt):].split()[0] + + enc_hook: Callable|None = codec.enc.enc_hook + if enc_hook is None: + message += ( + '\n\n' + + f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" + f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' + + f'Check the `msgspec` docs for ad-hoc type extending:\n' + '|_ https://jcristharif.com/msgspec/extending.html\n' + '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' + ) + + + msgtyperr = MsgTypeError( + message=message, + ipc_msg=msg, + ) + # ya, might be `None` + msgtyperr.__cause__ = src_type_error + return msgtyperr + + # `Channel.recv()` case + else: + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + message: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' + ) + if verb_header: + message = f'{verb_header} ' + message + + # XXX see if we can determine the exact invalid field + # such that we can comprehensively report the + # specific field's type problem + msgspec_msg: str = src_validation_error.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) + message += ( + f'{msg.rstrip("`")}\n\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' + ) + + msgtyperr = MsgTypeError.from_decode( + message=message, + msgdict=msg_dict, + ) + msgtyperr.__cause__ = src_validation_error + return msgtyperr diff --git a/tractor/_ipc.py b/tractor/_ipc.py index f76d4ef5..70774bed 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -49,6 +49,7 @@ from tractor._exceptions import ( MsgTypeError, pack_from_raise, TransportClosed, + _mk_msg_type_err, ) from tractor.msg import ( _ctxvar_MsgCodec, @@ -118,127 +119,6 @@ class MsgTransport(Protocol[MsgType]): ... -def _mk_msg_type_err( - msg: Any|bytes, - codec: MsgCodec, - - message: str|None = None, - verb_header: str = '', - - src_validation_error: msgspec.ValidationError|None = None, - src_type_error: TypeError|None = None, - -) -> MsgTypeError: - - import textwrap - - # `Channel.send()` case - if src_validation_error is None: # send-side - - # no src error from `msgspec.msgpack.Decoder.decode()` so - # prolly a manual type-check on our part. - if message is None: - import traceback - from tractor._exceptions import pformat_boxed_tb - - fmt_spec: str = '\n'.join( - map(str, codec.msg_spec.__args__) - ) - fmt_stack: str = ( - '\n'.join(traceback.format_stack(limit=3)) - ) - tb_fmt: str = pformat_boxed_tb( - tb_str=fmt_stack, - # fields_str=header, - field_prefix=' ', - indent='', - ) - message: str = ( - f'invalid msg -> {msg}: {type(msg)}\n\n' - f'{tb_fmt}\n' - f'Valid IPC msgs are:\n\n' - # f' ------ - ------\n' - f'{fmt_spec}\n', - ) - elif src_type_error: - src_message: str = str(src_type_error) - patt: str = 'type ' - type_idx: int = src_message.find('type ') - invalid_type: str = src_message[type_idx + len(patt):].split()[0] - - enc_hook: Callable|None = codec.enc.enc_hook - if enc_hook is None: - message += ( - '\n\n' - - f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" - f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' - - f'Check the `msgspec` docs for ad-hoc type extending:\n' - '|_ https://jcristharif.com/msgspec/extending.html\n' - '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' - ) - - - msgtyperr = MsgTypeError( - message=message, - ipc_msg=msg, - ) - # ya, might be `None` - msgtyperr.__cause__ = src_type_error - return msgtyperr - - # `Channel.recv()` case - else: - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. - msg_dict: dict = msgspec.msgpack.decode(msg) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(msgtypes, msg_type_name) - message: str = ( - f'invalid `{msg_type_name}` IPC msg\n\n' - ) - if verb_header: - message = f'{verb_header} ' + message - - # XXX see if we can determine the exact invalid field - # such that we can comprehensively report the - # specific field's type problem - msgspec_msg: str = src_validation_error.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - obj = object() - if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_name_expr: str = ( - f' |_{maybe_field}: {codec.pld_spec_str} = ' - ) - fmt_val_lines: list[str] = pformat(field_val).splitlines() - fmt_val: str = ( - f'{fmt_val_lines[0]}\n' - + - textwrap.indent( - '\n'.join(fmt_val_lines[1:]), - prefix=' '*len(field_name_expr), - ) - ) - message += ( - f'{msg.rstrip("`")}\n\n' - f'<{msg_type.__qualname__}(\n' - # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' - f'{field_name_expr}{fmt_val}\n' - f')>' - ) - - msgtyperr = MsgTypeError.from_decode( - message=message, - msgdict=msg_dict, - ) - msgtyperr.__cause__ = src_validation_error - return msgtyperr - - # TODO: not sure why we have to inherit here, but it seems to be an # issue with ``get_msg_transport()`` returning a ``Type[Protocol]``; # probably should make a `mypy` issue? @@ -299,10 +179,10 @@ class MsgpackTCPStream(MsgTransport): _codec._ctxvar_MsgCodec.get() ) # TODO: mask out before release? - log.runtime( - f'New {self} created with codec\n' - f'codec: {self._codec}\n' - ) + # log.runtime( + # f'New {self} created with codec\n' + # f'codec: {self._codec}\n' + # ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: ''' -- 2.34.1 From 5d4681df4bda70aedb91cc06626654723cb84fe1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 22 Apr 2024 18:24:42 -0400 Subject: [PATCH 066/305] Add a `MsgDec` for receive-only decoding In prep for a "payload receiver" abstraction that will wrap `MsgType.pld`-IO delivery from `Context` and `MsgStream`, adds a small `msgspec.msgpack.Decoder` shim which delegates an API similar to `MsgCodec` and is offered via a `.msg._codec.mk_dec()` factory. Detalles: - move over the TODOs/comments from `.msg.types.Start` to to `MsgDec.spec` since it's probably the ideal spot to start thinking about it from a consumer code PoV. - move codec reversion assert and log emit into `finally:` block. - flip default `.types._tractor_codec = mk_codec_ipc_pld(ipc_pld_spec=Raw)` in prep for always doing payload-delayed decodes. - make `MsgCodec._dec` private with public property getter. - change `CancelAck` to NOT derive from `Return` so it's mutex in `match/case:` handling. --- tractor/msg/__init__.py | 2 + tractor/msg/_codec.py | 156 ++++++++++++++++++++++++++++++++++------ tractor/msg/types.py | 33 +-------- 3 files changed, 141 insertions(+), 50 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 8f13f5f8..d968f6cf 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -34,6 +34,7 @@ from ._codec import ( apply_codec as apply_codec, mk_codec as mk_codec, MsgCodec as MsgCodec, + MsgDec as MsgDec, current_codec as current_codec, ) @@ -50,6 +51,7 @@ from .types import ( Yield as Yield, Stop as Stop, Return as Return, + CancelAck as CancelAck, Error as Error, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 766a297a..104f7d99 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -50,7 +50,7 @@ from types import ModuleType import msgspec from msgspec import ( msgpack, - # Raw, + Raw, ) # from trio.lowlevel import ( # RunVar, @@ -71,6 +71,108 @@ if TYPE_CHECKING: log = get_logger(__name__) + +# TODO: unify with `MsgCodec` by making `._dec` part this? +class MsgDec(Struct): + ''' + An IPC msg decoder. + + Normally used to decode only a payload: `MsgType.pld: + PayloadT` field before delivery to IPC consumer code. + + ''' + _dec: msgpack.Decoder + + @property + def dec(self) -> msgpack.Decoder: + return self._dec + + # struct type unions + # https://jcristharif.com/msgspec/structs.html#tagged-unions + # + # ^-TODO-^: make a wrapper type for this such that alt + # backends can be represented easily without a `Union` needed, + # AND so that we have better support for wire transport. + # + # -[ ] maybe `FieldSpec` is a good name since msg-spec + # better applies to a `MsgType[FieldSpec]`? + # + # -[ ] both as part of the `.open_context()` call AND as part of the + # immediate ack-reponse (see similar below) + # we should do spec matching and fail if anything is awry? + # + # -[ ] eventually spec should be generated/parsed from the + # type-annots as # desired in GH issue: + # https://github.com/goodboy/tractor/issues/365 + # + # -[ ] semantics of the mismatch case + # - when caller-callee specs we should raise + # a `MsgTypeError` or `MsgSpecError` or similar? + # + # -[ ] wrapper types for both spec types such that we can easily + # IPC transport them? + # - `TypeSpec: Union[Type]` + # * also a `.__contains__()` for doing `None in + # TypeSpec[None|int]` since rn you need to do it on + # `.__args__` for unions.. + # - `MsgSpec: Union[Type[Msg]] + # + # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo + # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params + # |_ historical pep 695: https://peps.python.org/pep-0695/ + # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ + # |_ on annotation scopes: + # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes + # |_ 3.13 will have subscriptable funcs Bo + # https://peps.python.org/pep-0718/ + @property + def spec(self) -> Union[Type[Struct]]: + # NOTE: defined and applied inside `mk_codec()` + return self._dec.type + + # no difference, as compared to a `MsgCodec` which defines the + # `MsgType.pld: PayloadT` part of its spec separately + pld_spec = spec + + # TODO: would get moved into `FieldSpec.__str__()` right? + @property + def spec_str(self) -> str: + + # TODO: could also use match: instead? + spec: Union[Type]|Type = self.spec + + # `typing.Union` case + if getattr(spec, '__args__', False): + return str(spec) + + # just a single type + else: + return spec.__name__ + + pld_spec_str = spec_str + + def decode( + self, + raw: Raw|bytes, + ) -> Any: + return self._dec.decode(raw) + + @property + def hook(self) -> Callable|None: + return self._dec.dec_hook + + +def mk_dec( + spec: Union[Type[Struct]]|Any = Any, + dec_hook: Callable|None = None, + +) -> MsgDec: + + return msgpack.Decoder( + type=spec, # like `Msg[Any]` + dec_hook=dec_hook, + ) + # TODO: overall IPC msg-spec features (i.e. in this mod)! # # -[ ] API changes towards being interchange lib agnostic! @@ -94,8 +196,7 @@ class MsgCodec(Struct): ''' _enc: msgpack.Encoder _dec: msgpack.Decoder - - pld_spec: Union[Type[Struct]]|None + _pld_spec: Type[Struct]|Raw|Any def __repr__(self) -> str: speclines: str = textwrap.indent( @@ -118,14 +219,21 @@ class MsgCodec(Struct): ')>' ) + @property + def pld_spec(self) -> Type[Struct]|Raw|Any: + return self._pld_spec + @property def pld_spec_str(self) -> str: - spec: Union[Type]|Type = self.pld_spec # TODO: could also use match: instead? + spec: Union[Type]|Type = self.pld_spec + + # `typing.Union` case if getattr(spec, '__args__', False): - # `typing.Union` case return str(spec) + + # just a single type else: return spec.__name__ @@ -133,6 +241,7 @@ class MsgCodec(Struct): # https://jcristharif.com/msgspec/structs.html#tagged-unions @property def msg_spec(self) -> Union[Type[Struct]]: + # NOTE: defined and applied inside `mk_codec()` return self._dec.type def msg_spec_items( @@ -157,8 +266,9 @@ class MsgCodec(Struct): def pformat_msg_spec( self, msg: MsgType|None = None, + join_char: str = '\n', ) -> str: - return '\n'.join( + return join_char.join( self.msg_spec_items(msg=msg).values() ) @@ -405,18 +515,25 @@ def mk_codec( assert len(ipc_msg_spec.__args__) == len(msg_types) assert ipc_msg_spec + # TODO: use this shim instead? + # bc.. unification, err somethin? + # dec: MsgDec = mk_dec( + # spec=ipc_msg_spec, + # dec_hook=dec_hook, + # ) + + dec = msgpack.Decoder( + type=ipc_msg_spec, + dec_hook=dec_hook, + ) enc = msgpack.Encoder( enc_hook=enc_hook, ) - dec = msgpack.Decoder( - type=ipc_msg_spec, # like `Msg[Any]` - dec_hook=dec_hook, - ) codec = MsgCodec( _enc=enc, _dec=dec, - pld_spec=ipc_pld_spec, + _pld_spec=ipc_pld_spec, ) # sanity on expected backend support @@ -435,10 +552,9 @@ _def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # https://jcristharif.com/msgspec/supported-types.html # _def_tractor_codec: MsgCodec = mk_codec( - ipc_pld_spec=Any, - # TODO: use this for debug mode locking prot? - # ipc_pld_spec=Raw, + # ipc_pld_spec=Any, + ipc_pld_spec=Raw, ) # TODO: IDEALLY provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing @@ -538,12 +654,12 @@ def apply_codec( yield var.get() finally: var.reset(token) + log.info( + 'Reverted to last msg-spec codec\n\n' + f'{orig}\n' + ) + assert var.get() is orig - assert var.get() is orig - log.info( - 'Reverted to last msg-spec codec\n\n' - f'{orig}\n' - ) def current_codec() -> MsgCodec: ''' @@ -574,7 +690,7 @@ def limit_msg_spec( ''' __tracebackhide__: bool = True - curr_codec = current_codec() + curr_codec: MsgCodec = current_codec() msgspec_codec: MsgCodec = mk_codec( ipc_pld_spec=payload_spec, **codec_kwargs, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 59ec2a4e..cb124324 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -266,35 +266,7 @@ class Start( # TODO: enforcing a msg-spec in terms `Msg.pld` # parameterizable msgs to be used in the appls IPC dialog. - # - # -[ ] both as part of the `.open_context()` call AND as part of the - # immediate ack-reponse (see similar below) - # we should do spec matching and fail if anything is awry? - # - # -[ ] eventually spec should be generated/parsed from the - # type-annots as # desired in GH issue: - # https://github.com/goodboy/tractor/issues/365 - # - # -[ ] semantics of the mismatch case - # - when caller-callee specs we should raise - # a `MsgTypeError` or `MsgSpecError` or similar? - # - # -[ ] wrapper types for both spec types such that we can easily - # IPC transport them? - # - `TypeSpec: Union[Type]` - # * also a `.__contains__()` for doing `None in - # TypeSpec[None|int]` since rn you need to do it on - # `.__args__` for unions.. - # - `MsgSpec: Union[Type[Msg]] - # - # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo - # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params - # |_ historical pep 695: https://peps.python.org/pep-0695/ - # |_ full lang spec: https://typing.readthedocs.io/en/latest/spec/ - # |_ on annotation scopes: - # https://docs.python.org/3/reference/executionmodel.html#annotation-scopes - # |_ 3.13 will have subscriptable funcs Bo - # https://peps.python.org/pep-0718/ + # => SEE `._codec.MsgDec` for more <= pld_spec: str = str(Any) @@ -382,7 +354,8 @@ class Return( class CancelAck( - Return, + Msg, + Generic[PayloadT], ): ''' Deliver the `bool` return-value from a cancellation `Actor` -- 2.34.1 From 9e5bdd26d758ab58ba85d1410dee19816af21214 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 23 Apr 2024 17:43:45 -0400 Subject: [PATCH 067/305] First draft "payload receiver in a new `.msg._ops` As per much tinkering, re-designs and preceding rubber-ducking via many "commit msg novelas", **finally** this adds the (hopefully) final missing layer for typed msg safety: `tractor.msg._ops.PldRx` (or `PayloadReceiver`? haven't decided how verbose to go..) Design justification summary: ------ - ------ - need a way to be as-close-as-possible to the `tractor`-application such that when `MsgType.pld: PayloadT` validation takes place, it is straightforward and obvious how user code can decide to handle any resulting `MsgTypeError`. - there should be a common and optional-yet-modular way to modify **how** data delivered via IPC (possibly embedded as user defined, type-constrained `.pld: msgspec.Struct`s) can be handled and processed during fault conditions and/or IPC "msg attacks". - support for nested type constraints within a `MsgType.pld` field should be simple to define, implement and understand at runtime. - a layer between the app-level IPC primitive APIs (`Context`/`MsgStream`) and application-task code (consumer code of those APIs) should be easily customized and prove-to-be-as-such through demonstrably rigorous internal (sub-sys) use! -> eg. via seemless runtime RPC eps support like `Actor.cancel()` -> by correctly implementing our `.devx._debug.Lock` REPL TTY mgmt dialog prot, via a dead simple payload-as-ctl-msg-spec. There are some fairly detailed doc strings included so I won't duplicate that content, the majority of the work here is actually somewhat of a factoring of many similar blocks that are doing more or less the same `msg = await Context._rx_chan.receive()` with boilerplate for `Error`/`Stop` handling via `_raise_from_no_key_in_msg()`. The new `PldRx` basically provides a shim layer for this common "receive msg, decode its payload, yield it up to the consuming app task" by pairing the RPC feeder mem-chan with a msg-payload decoder and expecting IPC API internals to use **one** API instead of re-implementing the same pattern all over the place XD `PldRx` breakdown ------ - ------ - for now only expects a `._msgdec: MsgDec` which allows for override-able `MsgType.pld` validation and most obviously used in the impl of `.dec_msg()`, the decode message method. - provides multiple mem-chan receive options including: |_ `.recv_pld()` which does the e2e operation of receiving a payload item. |_ a sync `.recv_pld_nowait()` version. |_ a `.recv_msg_w_pld()` which optionally allows retreiving both the shuttling `MsgType` as well as it's `.pld` body for use cases where info on both is important (eg. draining a `MsgStream`). Dirty internal changeover/implementation deatz: ------ - ------ - obvi move over all the IPC "primitives" that previously had the duplicate recv-n-yield logic: - `MsgStream.receive[_nowait]()` delegating instead to the equivalent `PldRx.recv_pld[_nowait]()`. - add `Context._pld_rx: PldRx`, created and passed in by `mk_context()`; use it for the `.started()` -> `first: Started` retrieval inside `open_context_from_portal()`. - all the relevant `Portal` invocation methods: `.result()`, `.run_from_ns()`, `.run()`; also allows for dropping `_unwrap_msg()` and `.Portal_return_once()` outright Bo - rename `Context.ctx._recv_chan` -> `._rx_chan`. - add detailed `Context._scope` info for logging whether or not it's cancelled inside `_maybe_cancel_and_set_remote_error()`. - move `._context._drain_to_final_msg()` -> `._ops.drain_to_final_msg()` since it's really not necessarily ctx specific per say, and it does kinda fit with "msg operations" more abstractly ;) --- tractor/_context.py | 438 +++++++------------------------- tractor/_portal.py | 127 +++++----- tractor/_runtime.py | 6 +- tractor/_streaming.py | 71 ++---- tractor/msg/_ops.py | 563 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 738 insertions(+), 467 deletions(-) create mode 100644 tractor/msg/_ops.py diff --git a/tractor/_context.py b/tractor/_context.py index 9e5fe6c1..f0fc966e 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -25,26 +25,31 @@ disjoint, parallel executing tasks in separate actors. ''' from __future__ import annotations from collections import deque -from contextlib import asynccontextmanager as acm -from contextvars import ContextVar +from contextlib import ( + asynccontextmanager as acm, +) from dataclasses import ( dataclass, field, ) from functools import partial import inspect -import msgspec from pprint import pformat from typing import ( Any, Callable, AsyncGenerator, + Type, TYPE_CHECKING, + Union, ) import warnings - +# ------ - ------ import trio - +from msgspec import ( + ValidationError, +) +# ------ - ------ from ._exceptions import ( ContextCancelled, InternalError, @@ -53,7 +58,6 @@ from ._exceptions import ( StreamOverrun, pack_from_raise, unpack_error, - _raise_from_no_key_in_msg, ) from .log import get_logger from .msg import ( @@ -70,8 +74,12 @@ from .msg import ( current_codec, pretty_struct, types as msgtypes, + _ops as msgops, +) +from ._ipc import ( + Channel, + _mk_msg_type_err, ) -from ._ipc import Channel from ._streaming import MsgStream from ._state import ( current_actor, @@ -86,294 +94,9 @@ if TYPE_CHECKING: CallerInfo, ) - log = get_logger(__name__) -async def _drain_to_final_msg( - ctx: Context, - - hide_tb: bool = True, - msg_limit: int = 6, - -) -> tuple[ - Return|None, - list[MsgType] -]: - ''' - Drain IPC msgs delivered to the underlying rx-mem-chan - `Context._recv_chan` from the runtime in search for a final - result or error msg. - - The motivation here is to ideally capture errors during ctxc - conditions where a canc-request/or local error is sent but the - local task also excepts and enters the - `Portal.open_context().__aexit__()` block wherein we prefer to - capture and raise any remote error or ctxc-ack as part of the - `ctx.result()` cleanup and teardown sequence. - - ''' - __tracebackhide__: bool = hide_tb - raise_overrun: bool = not ctx._allow_overruns - - # wait for a final context result by collecting (but - # basically ignoring) any bi-dir-stream msgs still in transit - # from the far end. - pre_result_drained: list[MsgType] = [] - return_msg: Return|None = None - while not ( - ctx.maybe_error - and not ctx._final_result_is_set() - ): - try: - # TODO: can remove? - # await trio.lowlevel.checkpoint() - - # NOTE: this REPL usage actually works here dawg! Bo - # from .devx._debug import pause - # await pause() - - # TODO: bad idea? - # -[ ] wrap final outcome channel wait in a scope so - # it can be cancelled out of band if needed? - # - # with trio.CancelScope() as res_cs: - # ctx._res_scope = res_cs - # msg: dict = await ctx._recv_chan.receive() - # if res_cs.cancelled_caught: - - # TODO: ensure there's no more hangs, debugging the - # runtime pretty preaase! - # from .devx._debug import pause - # await pause() - - # TODO: can remove this finally? - # we have no more need for the sync draining right - # since we're can kinda guarantee the async - # `.receive()` below will never block yah? - # - # if ( - # ctx._cancel_called and ( - # ctx.cancel_acked - # # or ctx.chan._cancel_called - # ) - # # or not ctx._final_result_is_set() - # # ctx.outcome is not - # # or ctx.chan._closed - # ): - # try: - # msg: dict = await ctx._recv_chan.receive_nowait()() - # except trio.WouldBlock: - # log.warning( - # 'When draining already `.cancel_called` ctx!\n' - # 'No final msg arrived..\n' - # ) - # break - # else: - # msg: dict = await ctx._recv_chan.receive() - - # TODO: don't need it right jefe? - # with trio.move_on_after(1) as cs: - # if cs.cancelled_caught: - # from .devx._debug import pause - # await pause() - - # pray to the `trio` gawds that we're corrent with this - # msg: dict = await ctx._recv_chan.receive() - msg: MsgType = await ctx._recv_chan.receive() - - # NOTE: we get here if the far end was - # `ContextCancelled` in 2 cases: - # 1. we requested the cancellation and thus - # SHOULD NOT raise that far end error, - # 2. WE DID NOT REQUEST that cancel and thus - # SHOULD RAISE HERE! - except trio.Cancelled: - - # CASE 2: mask the local cancelled-error(s) - # only when we are sure the remote error is - # the source cause of this local task's - # cancellation. - ctx.maybe_raise() - - # CASE 1: we DID request the cancel we simply - # continue to bubble up as normal. - raise - - match msg: - - # final result arrived! - case Return( - # cid=cid, - pld=res, - ): - ctx._result: Any = res - log.runtime( - 'Context delivered final draining msg:\n' - f'{pformat(msg)}' - ) - # XXX: only close the rx mem chan AFTER - # a final result is retreived. - # if ctx._recv_chan: - # await ctx._recv_chan.aclose() - # TODO: ^ we don't need it right? - return_msg = msg - break - - # far end task is still streaming to us so discard - # and report depending on local ctx state. - case Yield(): - pre_result_drained.append(msg) - if ( - (ctx._stream.closed - and (reason := 'stream was already closed') - ) - or (ctx.cancel_acked - and (reason := 'ctx cancelled other side') - ) - or (ctx._cancel_called - and (reason := 'ctx called `.cancel()`') - ) - or (len(pre_result_drained) > msg_limit - and (reason := f'"yield" limit={msg_limit}') - ) - ): - log.cancel( - 'Cancelling `MsgStream` drain since ' - f'{reason}\n\n' - f'<= {ctx.chan.uid}\n' - f' |_{ctx._nsf}()\n\n' - f'=> {ctx._task}\n' - f' |_{ctx._stream}\n\n' - - f'{pformat(msg)}\n' - ) - return ( - return_msg, - pre_result_drained, - ) - - # drain up to the `msg_limit` hoping to get - # a final result or error/ctxc. - else: - log.warning( - 'Ignoring "yield" msg during `ctx.result()` drain..\n' - f'<= {ctx.chan.uid}\n' - f' |_{ctx._nsf}()\n\n' - f'=> {ctx._task}\n' - f' |_{ctx._stream}\n\n' - - f'{pformat(msg)}\n' - ) - continue - - # stream terminated, but no result yet.. - # - # TODO: work out edge cases here where - # a stream is open but the task also calls - # this? - # -[ ] should be a runtime error if a stream is open right? - # Stop() - case Stop(): - pre_result_drained.append(msg) - log.cancel( - 'Remote stream terminated due to "stop" msg:\n\n' - f'{pformat(msg)}\n' - ) - continue - - # remote error msg, likely already handled inside - # `Context._deliver_msg()` - case Error(): - # TODO: can we replace this with `ctx.maybe_raise()`? - # -[ ] would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') - # - re: Exception|None = ctx._remote_error - if re: - assert msg is ctx._cancel_msg - # NOTE: this solved a super duper edge case XD - # this was THE super duper edge case of: - # - local task opens a remote task, - # - requests remote cancellation of far end - # ctx/tasks, - # - needs to wait for the cancel ack msg - # (ctxc) or some result in the race case - # where the other side's task returns - # before the cancel request msg is ever - # rxed and processed, - # - here this surrounding drain loop (which - # iterates all ipc msgs until the ack or - # an early result arrives) was NOT exiting - # since we are the edge case: local task - # does not re-raise any ctxc it receives - # IFF **it** was the cancellation - # requester.. - # - # XXX will raise if necessary but ow break - # from loop presuming any supressed error - # (ctxc) should terminate the context! - ctx._maybe_raise_remote_err( - re, - # NOTE: obvi we don't care if we - # overran the far end if we're already - # waiting on a final result (msg). - # raise_overrun_from_self=False, - raise_overrun_from_self=raise_overrun, - ) - - break # OOOOOF, yeah obvi we need this.. - - # XXX we should never really get here - # right! since `._deliver_msg()` should - # always have detected an {'error': ..} - # msg and already called this right!?! - elif error := unpack_error( - msg=msg, - chan=ctx._portal.channel, - hide_tb=False, - ): - log.critical('SHOULD NEVER GET HERE!?') - assert msg is ctx._cancel_msg - assert error.msgdata == ctx._remote_error.msgdata - assert error.ipc_msg == ctx._remote_error.ipc_msg - from .devx._debug import pause - await pause() - ctx._maybe_cancel_and_set_remote_error(error) - ctx._maybe_raise_remote_err(error) - - else: - # bubble the original src key error - raise - - # XXX should pretty much never get here unless someone - # overrides the default `MsgType` spec. - case _: - pre_result_drained.append(msg) - # It's definitely an internal error if any other - # msg type without a`'cid'` field arrives here! - if not msg.cid: - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' - ) - - raise RuntimeError('Unknown msg type: {msg}') - - else: - log.cancel( - 'Skipping `MsgStream` drain since final outcome is set\n\n' - f'{ctx.outcome}\n' - ) - - return ( - return_msg, - pre_result_drained, - ) - - class Unresolved: ''' Placeholder value for `Context._result` until @@ -423,9 +146,12 @@ class Context: # the "feeder" channels for delivering message values to the # local task from the runtime's msg processing loop. - _recv_chan: trio.MemoryReceiveChannel + _rx_chan: trio.MemoryReceiveChannel _send_chan: trio.MemorySendChannel + # payload receiver + _pld_rx: msgops.PldRx + # full "namespace-path" to target RPC function _nsf: NamespacePath @@ -447,7 +173,7 @@ class Context: _task: trio.lowlevel.Task|None = None # TODO: cs around result waiting so we can cancel any - # permanently blocking `._recv_chan.receive()` call in + # permanently blocking `._rx_chan.receive()` call in # a drain loop? # _res_scope: trio.CancelScope|None = None @@ -504,14 +230,6 @@ class Context: _started_called: bool = False _stream_opened: bool = False _stream: MsgStream|None = None - _pld_codec_var: ContextVar[MsgCodec] = ContextVar( - 'pld_codec', - default=_codec._def_msgspec_codec, # i.e. `Any`-payloads - ) - - @property - def pld_codec(self) -> MsgCodec|None: - return self._pld_codec_var.get() # caller of `Portal.open_context()` for # logging purposes mostly @@ -916,9 +634,8 @@ class Context: else: log.error( f'Remote context error:\n\n' - + # f'{pformat(self)}\n' f'{error}\n' - f'{pformat(self)}\n' ) # always record the cancelling actor's uid since its @@ -955,24 +672,49 @@ class Context: and not self._is_self_cancelled() and not cs.cancel_called and not cs.cancelled_caught - and ( - msgerr - and - # NOTE: allow user to config not cancelling the - # local scope on `MsgTypeError`s - self._cancel_on_msgerr - ) ): - # TODO: it'd sure be handy to inject our own - # `trio.Cancelled` subtype here ;) - # https://github.com/goodboy/tractor/issues/368 - log.cancel('Cancelling local `.open_context()` scope!') - self._scope.cancel() + if not ( + msgerr + # NOTE: we allow user to config not cancelling the + # local scope on `MsgTypeError`s + and not self._cancel_on_msgerr + ): + # TODO: it'd sure be handy to inject our own + # `trio.Cancelled` subtype here ;) + # https://github.com/goodboy/tractor/issues/368 + message: str = 'Cancelling `Context._scope` !\n\n' + self._scope.cancel() + + else: + message: str = ( + 'NOT Cancelling `Context._scope` since,\n' + f'Context._cancel_on_msgerr = {self._cancel_on_msgerr}\n\n' + f'AND we got a msg-type-error!\n' + f'{error}\n' + ) else: - log.cancel('NOT cancelling local `.open_context()` scope!') + message: str = 'NOT cancelling `Context._scope` !\n\n' + scope_info: str = 'No `self._scope: CancelScope` was set/used ?' + if cs: + scope_info: str = ( + f'self._scope: {cs}\n' + f'|_ .cancel_called: {cs.cancel_called}\n' + f'|_ .cancelled_caught: {cs.cancelled_caught}\n' + f'|_ ._cancel_status: {cs._cancel_status}\n\n' + f'{self}\n' + f'|_ ._is_self_cancelled(): {self._is_self_cancelled()}\n' + f'|_ ._cancel_on_msgerr: {self._cancel_on_msgerr}\n\n' + + f'msgerr: {msgerr}\n' + ) + log.cancel( + message + + + f'{scope_info}' + ) # TODO: maybe we should also call `._res_scope.cancel()` if it # exists to support cancelling any drain loop hangs? # NOTE: this usage actually works here B) @@ -1259,7 +1001,7 @@ class Context: # a ``.open_stream()`` block prior or there was some other # unanticipated error or cancellation from ``trio``. - if ctx._recv_chan._closed: + if ctx._rx_chan._closed: raise trio.ClosedResourceError( 'The underlying channel for this stream was already closed!\n' ) @@ -1279,7 +1021,7 @@ class Context: # stream WAS NOT just closed normally/gracefully. async with MsgStream( ctx=self, - rx_chan=ctx._recv_chan, + rx_chan=ctx._rx_chan, ) as stream: # NOTE: we track all existing streams per portal for @@ -1430,13 +1172,12 @@ class Context: # boxed `StreamOverrun`. This is mostly useful for # supressing such faults during # cancellation/error/final-result handling inside - # `_drain_to_final_msg()` such that we do not + # `msg._ops.drain_to_final_msg()` such that we do not # raise such errors particularly in the case where # `._cancel_called == True`. not raise_overrun_from_self and isinstance(remote_error, RemoteActorError) - - and remote_error.boxed_type_str == 'StreamOverrun' + and remote_error.boxed_type is StreamOverrun # and tuple(remote_error.msgdata['sender']) == our_uid and tuple(remote_error.sender) == our_uid @@ -1506,12 +1247,12 @@ class Context: if self._final_result_is_set(): return self._result - assert self._recv_chan + assert self._rx_chan raise_overrun: bool = not self._allow_overruns if ( self.maybe_error is None and - not self._recv_chan._closed # type: ignore + not self._rx_chan._closed # type: ignore ): # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" @@ -1519,7 +1260,7 @@ class Context: ( return_msg, drained_msgs, - ) = await _drain_to_final_msg( + ) = await msgops.drain_to_final_msg( ctx=self, hide_tb=hide_tb, ) @@ -1805,8 +1546,7 @@ class Context: await self.chan.send(started_msg) # raise any msg type error NO MATTER WHAT! - except msgspec.ValidationError as verr: - from tractor._ipc import _mk_msg_type_err + except ValidationError as verr: raise _mk_msg_type_err( msg=msg_bytes, codec=codec, @@ -1893,7 +1633,7 @@ class Context: - NEVER `return` early before delivering the msg! bc if the error is a ctxc and there is a task waiting on `.result()` we need the msg to be - `send_chan.send_nowait()`-ed over the `._recv_chan` so + `send_chan.send_nowait()`-ed over the `._rx_chan` so that the error is relayed to that waiter task and thus raised in user code! @@ -2204,24 +1944,11 @@ async def open_context_from_portal( # -> it's expected that if there is an error in this phase of # the dialog, the `Error` msg should be raised from the `msg` # handling block below. - msg: Started = await ctx._recv_chan.receive() - try: - # the "first" value here is delivered by the callee's - # ``Context.started()`` call. - # first: Any = msg['started'] - first: Any = msg.pld - ctx._started_called: bool = True - - # except KeyError as src_error: - except AttributeError as src_error: - log.exception('Raising from unexpected msg!\n') - _raise_from_no_key_in_msg( - ctx=ctx, - msg=msg, - src_err=src_error, - log=log, - expect_msg=Started, - ) + first: Any = await ctx._pld_rx.recv_pld( + ctx=ctx, + expect_msg=Started, + ) + ctx._started_called: bool = True uid: tuple = portal.channel.uid cid: str = ctx.cid @@ -2543,7 +2270,7 @@ async def open_context_from_portal( # we tear down the runtime feeder chan last # to avoid premature stream clobbers. if ( - (rxchan := ctx._recv_chan) + (rxchan := ctx._rx_chan) # maybe TODO: yes i know the below check is # touching `trio` memchan internals..BUT, there are @@ -2586,7 +2313,7 @@ async def open_context_from_portal( # underlying feeder channel is # once-and-only-CLOSED! with trio.CancelScope(shield=True): - await ctx._recv_chan.aclose() + await ctx._rx_chan.aclose() # XXX: we always raise remote errors locally and # generally speaking mask runtime-machinery related @@ -2631,9 +2358,9 @@ async def open_context_from_portal( # FINALLY, remove the context from runtime tracking and # exit! log.runtime( - 'Removing IPC ctx opened with peer\n' - f'{uid}\n' - f'|_{ctx}\n' + 'De-allocating IPC ctx opened with {ctx.side!r} peer \n' + f'uid: {uid}\n' + f'cid: {ctx.cid}\n' ) portal.actor._contexts.pop( (uid, cid), @@ -2646,6 +2373,7 @@ def mk_context( nsf: NamespacePath, msg_buffer_size: int = 2**6, + pld_spec: Union[Type] = Any, **kwargs, @@ -2665,12 +2393,18 @@ def mk_context( from .devx._code import find_caller_info caller_info: CallerInfo|None = find_caller_info() + pld_rx = msgops.PldRx( + # _rx_mc=recv_chan, + _msgdec=_codec.mk_dec(spec=pld_spec) + ) + ctx = Context( chan=chan, cid=cid, _actor=current_actor(), _send_chan=send_chan, - _recv_chan=recv_chan, + _rx_chan=recv_chan, + _pld_rx=pld_rx, _nsf=nsf, _task=trio.lowlevel.current_task(), _caller_info=caller_info, diff --git a/tractor/_portal.py b/tractor/_portal.py index 052dd8ef..97268972 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -31,7 +31,7 @@ from typing import ( Any, Callable, AsyncGenerator, - # Type, + TYPE_CHECKING, ) from functools import partial from dataclasses import dataclass @@ -46,12 +46,12 @@ from ._state import ( from ._ipc import Channel from .log import get_logger from .msg import ( - Error, + # Error, NamespacePath, Return, ) from ._exceptions import ( - unpack_error, + # unpack_error, NoResult, ) from ._context import ( @@ -62,42 +62,44 @@ from ._streaming import ( MsgStream, ) +if TYPE_CHECKING: + from ._runtime import Actor log = get_logger(__name__) -# TODO: rename to `unwrap_result()` and use -# `._raise_from_no_key_in_msg()` (after tweak to -# accept a `chan: Channel` arg) in key block! -def _unwrap_msg( - msg: Return|Error, - channel: Channel, +# TODO: remove and/or rework? +# -[ ] rename to `unwrap_result()` and use +# `._raise_from_unexpected_msg()` (after tweak to accept a `chan: +# Channel` arg) in key block?? +# -[ ] pretty sure this is entirely covered by +# `_exceptions._raise_from_unexpected_msg()` so REMOVE! +# def _unwrap_msg( +# msg: Return|Error, +# ctx: Context, - hide_tb: bool = True, +# hide_tb: bool = True, -) -> Any: - ''' - Unwrap a final result from a `{return: }` IPC msg. +# ) -> Any: +# ''' +# Unwrap a final result from a `{return: }` IPC msg. - ''' - __tracebackhide__: bool = hide_tb +# ''' +# __tracebackhide__: bool = hide_tb +# try: +# return msg.pld +# except AttributeError as err: - try: - return msg.pld - # return msg['return'] - # except KeyError as ke: - except AttributeError as err: +# # internal error should never get here +# # assert msg.get('cid'), ( +# assert msg.cid, ( +# "Received internal error at portal?" +# ) - # internal error should never get here - # assert msg.get('cid'), ( - assert msg.cid, ( - "Received internal error at portal?" - ) - - raise unpack_error( - msg, - channel - ) from err +# raise unpack_error( +# msg, +# ctx.chan, +# ) from err class Portal: @@ -123,17 +125,21 @@ class Portal: # connected (peer) actors. cancel_timeout: float = 0.5 - def __init__(self, channel: Channel) -> None: + def __init__( + self, + channel: Channel, + ) -> None: + self.chan = channel # during the portal's lifetime - self._result_msg: dict|None = None + self._final_result: Any|None = None # When set to a ``Context`` (when _submit_for_result is called) # it is expected that ``result()`` will be awaited at some # point. - self._expect_result: Context | None = None + self._expect_result_ctx: Context|None = None self._streams: set[MsgStream] = set() - self.actor = current_actor() + self.actor: Actor = current_actor() @property def channel(self) -> Channel: @@ -147,6 +153,7 @@ class Portal: ) return self.chan + # TODO: factor this out into an `ActorNursery` wrapper async def _submit_for_result( self, ns: str, @@ -154,27 +161,18 @@ class Portal: **kwargs ) -> None: - assert self._expect_result is None, ( - "A pending main result has already been submitted" - ) + if self._expect_result_ctx is not None: + raise RuntimeError( + 'A pending main result has already been submitted' + ) - self._expect_result = await self.actor.start_remote_task( + self._expect_result_ctx = await self.actor.start_remote_task( self.channel, nsf=NamespacePath(f'{ns}:{func}'), kwargs=kwargs, portal=self, ) - async def _return_once( - self, - ctx: Context, - - ) -> Return: - - assert ctx._remote_func_type == 'asyncfunc' # single response - msg: Return = await ctx._recv_chan.receive() - return msg - async def result(self) -> Any: ''' Return the result(s) from the remote actor's "main" task. @@ -188,7 +186,7 @@ class Portal: raise exc # not expecting a "main" result - if self._expect_result is None: + if self._expect_result_ctx is None: log.warning( f"Portal for {self.channel.uid} not expecting a final" " result?\nresult() should only be called if subactor" @@ -196,17 +194,15 @@ class Portal: return NoResult # expecting a "main" result - assert self._expect_result + assert self._expect_result_ctx - if self._result_msg is None: - self._result_msg = await self._return_once( - self._expect_result + if self._final_result is None: + self._final_result: Any = await self._expect_result_ctx._pld_rx.recv_pld( + ctx=self._expect_result_ctx, + expect_msg=Return, ) - return _unwrap_msg( - self._result_msg, - self.channel, - ) + return self._final_result async def _cancel_streams(self): # terminate all locally running async generator @@ -337,11 +333,9 @@ class Portal: kwargs=kwargs, portal=self, ) - ctx._portal: Portal = self - msg: Return = await self._return_once(ctx) - return _unwrap_msg( - msg, - self.channel, + return await ctx._pld_rx.recv_pld( + ctx=ctx, + expect_msg=Return, ) async def run( @@ -391,10 +385,9 @@ class Portal: kwargs=kwargs, portal=self, ) - ctx._portal = self - return _unwrap_msg( - await self._return_once(ctx), - self.channel, + return await ctx._pld_rx.recv_pld( + ctx=ctx, + expect_msg=Return, ) @acm @@ -436,7 +429,7 @@ class Portal: # deliver receive only stream async with MsgStream( ctx=ctx, - rx_chan=ctx._recv_chan, + rx_chan=ctx._rx_chan, ) as rchan: self._streams.add(rchan) yield rchan diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 4d675716..7ab14ab9 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -819,8 +819,8 @@ class Actor: state.max_buffer_size = msg_buffer_size except KeyError: - log.runtime( - f'Creating NEW IPC ctx for\n' + log.debug( + f'Allocate new IPC ctx for\n' f'peer: {chan.uid}\n' f'cid: {cid}\n' ) @@ -908,7 +908,7 @@ class Actor: # this should be immediate and does not (yet) wait for the # remote child task to sync via `Context.started()`. with trio.fail_after(ack_timeout): - first_msg: msgtypes.StartAck = await ctx._recv_chan.receive() + first_msg: msgtypes.StartAck = await ctx._rx_chan.receive() try: functype: str = first_msg.functype except AttributeError: diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 16e32cea..764b7c1e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -35,7 +35,7 @@ import warnings import trio from ._exceptions import ( - _raise_from_no_key_in_msg, + # _raise_from_no_key_in_msg, ContextCancelled, ) from .log import get_logger @@ -44,8 +44,9 @@ from .trionics import ( BroadcastReceiver, ) from tractor.msg import ( - Return, - Stop, + # Return, + # Stop, + MsgType, Yield, ) @@ -94,24 +95,23 @@ class MsgStream(trio.abc.Channel): self._eoc: bool|trio.EndOfChannel = False self._closed: bool|trio.ClosedResourceError = False + # TODO: could we make this a direct method bind to `PldRx`? + # -> receive_nowait = PldRx.recv_pld + # |_ means latter would have to accept `MsgStream`-as-`self`? + # => should be fine as long as, + # -[ ] both define `._rx_chan` + # -[ ] .ctx is bound into `PldRx` using a `@cm`? + # # delegate directly to underlying mem channel def receive_nowait( self, - allow_msgs: list[str] = Yield, + expect_msg: MsgType = Yield, ): - msg: Yield|Stop = self._rx_chan.receive_nowait() - # TODO: replace msg equiv of this or does the `.pld` - # interface read already satisfy it? I think so, yes? - try: - return msg.pld - except AttributeError as attrerr: - _raise_from_no_key_in_msg( - ctx=self._ctx, - msg=msg, - src_err=attrerr, - log=log, - stream=self, - ) + ctx: Context = self._ctx + return ctx._pld_rx.recv_pld_nowait( + ctx=ctx, + expect_msg=expect_msg, + ) async def receive( self, @@ -146,24 +146,9 @@ class MsgStream(trio.abc.Channel): src_err: Exception|None = None # orig tb try: - try: - msg: Yield = await self._rx_chan.receive() - return msg.pld - # TODO: implement with match: instead? - except AttributeError as attrerr: - # src_err = kerr - src_err = attrerr - - # NOTE: may raise any of the below error types - # includg EoC when a 'stop' msg is found. - _raise_from_no_key_in_msg( - ctx=self._ctx, - msg=msg, - src_err=attrerr, - log=log, - stream=self, - ) + ctx: Context = self._ctx + return await ctx._pld_rx.recv_pld(ctx=ctx) # XXX: the stream terminates on either of: # - via `self._rx_chan.receive()` raising after manual closure @@ -228,7 +213,7 @@ class MsgStream(trio.abc.Channel): # probably want to instead raise the remote error # over the end-of-stream connection error since likely # the remote error was the source cause? - ctx: Context = self._ctx + # ctx: Context = self._ctx ctx.maybe_raise( raise_ctxc_from_self_call=True, ) @@ -292,7 +277,8 @@ class MsgStream(trio.abc.Channel): while not drained: try: maybe_final_msg = self.receive_nowait( - allow_msgs=[Yield, Return], + # allow_msgs=[Yield, Return], + expect_msg=Yield, ) if maybe_final_msg: log.debug( @@ -472,6 +458,9 @@ class MsgStream(trio.abc.Channel): self, # use memory channel size by default self._rx_chan._state.max_buffer_size, # type: ignore + + # TODO: can remove this kwarg right since + # by default behaviour is to do this anyway? receive_afunc=self.receive, ) @@ -517,19 +506,11 @@ class MsgStream(trio.abc.Channel): raise self._closed try: - # await self._ctx.chan.send( - # payload={ - # 'yield': data, - # 'cid': self._ctx.cid, - # }, - # # hide_tb=hide_tb, - # ) await self._ctx.chan.send( payload=Yield( cid=self._ctx.cid, pld=data, ), - # hide_tb=hide_tb, ) except ( trio.ClosedResourceError, @@ -562,7 +543,7 @@ def stream(func: Callable) -> Callable: ''' # TODO: apply whatever solution ``mypy`` ends up picking for this: # https://github.com/python/mypy/issues/2087#issuecomment-769266912 - func._tractor_stream_function = True # type: ignore + func._tractor_stream_function: bool = True # type: ignore sig = inspect.signature(func) params = sig.parameters diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py new file mode 100644 index 00000000..e78b79a4 --- /dev/null +++ b/tractor/msg/_ops.py @@ -0,0 +1,563 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Near-application abstractions for `MsgType.pld: PayloadT|Raw` +delivery, filtering and type checking as well as generic +operational helpers for processing transaction flows. + +''' +from __future__ import annotations +from contextlib import ( + # asynccontextmanager as acm, + contextmanager as cm, +) +from pprint import pformat +from typing import ( + Any, + Type, + TYPE_CHECKING, + # Union, +) +# ------ - ------ +from msgspec import ( + msgpack, + Raw, + Struct, + ValidationError, +) +import trio +# ------ - ------ +from tractor.log import get_logger +from tractor._exceptions import ( + MessagingError, + InternalError, + _raise_from_unexpected_msg, + MsgTypeError, + _mk_msg_type_err, + pack_from_raise, +) +from ._codec import ( + mk_dec, + MsgDec, +) +from .types import ( + CancelAck, + Error, + MsgType, + PayloadT, + Return, + Started, + Stop, + Yield, + # pretty_struct, +) + + +if TYPE_CHECKING: + from tractor._context import Context + from tractor._streaming import MsgStream + + +log = get_logger(__name__) + + +class PldRx(Struct): + ''' + A "msg payload receiver". + + The pairing of a "feeder" `trio.abc.ReceiveChannel` and an + interchange-specific (eg. msgpack) payload field decoder. The + validation/type-filtering rules are runtime mutable and allow + type constraining the set of `MsgType.pld: Raw|PayloadT` + values at runtime, per IPC task-context. + + This abstraction, being just below "user application code", + allows for the equivalent of our `MsgCodec` (used for + typer-filtering IPC dialog protocol msgs against a msg-spec) + but with granular control around payload delivery (i.e. the + data-values user code actually sees and uses (the blobs that + are "shuttled" by the wrapping dialog prot) such that invalid + `.pld: Raw` can be decoded and handled by IPC-primitive user + code (i.e. that operates on `Context` and `Msgstream` APIs) + without knowledge of the lower level `Channel`/`MsgTransport` + primitives nor the `MsgCodec` in use. Further, lazily decoding + payload blobs allows for topical (and maybe intentionally + "partial") encryption of msg field subsets. + + ''' + # TODO: better to bind it here? + # _rx_mc: trio.MemoryReceiveChannel + _msgdec: MsgDec = mk_dec(spec=Any) + + _ipc: Context|MsgStream|None = None + + @cm + def apply_to_ipc( + self, + ipc_prim: Context|MsgStream, + + ) -> PldRx: + ''' + Apply this payload receiver to an IPC primitive type, one + of `Context` or `MsgStream`. + + ''' + self._ipc = ipc_prim + try: + yield self + finally: + self._ipc = None + + @property + def dec(self) -> msgpack.Decoder: + return self._msgdec.dec + + def recv_pld_nowait( + self, + # TODO: make this `MsgStream` compat as well, see above^ + # ipc_prim: Context|MsgStream, + ctx: Context, + + ipc_msg: MsgType|None = None, + expect_msg: Type[MsgType]|None = None, + + **kwargs, + + ) -> Any|Raw: + + msg: MsgType = ( + ipc_msg + or + + # sync-rx msg from underlying IPC feeder (mem-)chan + ctx._rx_chan.receive_nowait() + ) + return self.dec_msg( + msg, + ctx=ctx, + expect_msg=expect_msg, + ) + + async def recv_pld( + self, + ctx: Context, + ipc_msg: MsgType|None = None, + expect_msg: Type[MsgType]|None = None, + + **kwargs + + ) -> Any|Raw: + ''' + Receive a `MsgType`, then decode and return its `.pld` field. + + ''' + msg: MsgType = ( + ipc_msg + or + + # async-rx msg from underlying IPC feeder (mem-)chan + await ctx._rx_chan.receive() + ) + return self.dec_msg( + msg, + ctx=ctx, + expect_msg=expect_msg, + ) + + def dec_msg( + self, + msg: MsgType, + ctx: Context, + expect_msg: Type[MsgType]|None = None, + + ) -> PayloadT|Raw: + ''' + Decode a msg's payload field: `MsgType.pld: PayloadT|Raw` and + return the value or raise an appropriate error. + + ''' + match msg: + # payload-data shuttle msg; deliver the `.pld` value + # directly to IPC (primitive) client-consumer code. + case ( + Started(pld=pld) # sync phase + |Yield(pld=pld) # streaming phase + |Return(pld=pld) # termination phase + ): + try: + pld: PayloadT = self._msgdec.decode(pld) + log.runtime( + 'Decode msg payload\n\n' + f'{msg}\n\n' + f'{pld}\n' + ) + return pld + + # XXX pld-type failure + except ValidationError as src_err: + msgterr: MsgTypeError = _mk_msg_type_err( + msg=msg, + codec=self._dec, + src_validation_error=src_err, + ) + msg: Error = pack_from_raise( + local_err=msgterr, + cid=msg.cid, + src_uid=ctx.chan.uid, + ) + + # XXX some other decoder specific failure? + # except TypeError as src_error: + # from .devx import mk_pdb + # mk_pdb().set_trace() + # raise src_error + + # a runtime-internal RPC endpoint response. + # always passthrough since (internal) runtime + # responses are generally never exposed to consumer + # code. + case CancelAck( + pld=bool(cancelled) + ): + return cancelled + + case Error(): + src_err = MessagingError( + 'IPC dialog termination by msg' + ) + + case _: + src_err = InternalError( + 'Unknown IPC msg ??\n\n' + f'{msg}\n' + ) + + # fallthrough and raise from `src_err` + _raise_from_unexpected_msg( + ctx=ctx, + msg=msg, + src_err=src_err, + log=log, + expect_msg=expect_msg, + hide_tb=False, + ) + + async def recv_msg_w_pld( + self, + ipc: Context|MsgStream, + + ) -> tuple[MsgType, PayloadT]: + ''' + Retrieve the next avail IPC msg, decode it's payload, and return + the pair of refs. + + ''' + msg: MsgType = await ipc._rx_chan.receive() + + # TODO: is there some way we can inject the decoded + # payload into an existing output buffer for the original + # msg instance? + pld: PayloadT = self.dec_msg( + msg, + ctx=ipc, + ) + return msg, pld + + +async def drain_to_final_msg( + ctx: Context, + + hide_tb: bool = True, + msg_limit: int = 6, + +) -> tuple[ + Return|None, + list[MsgType] +]: + ''' + Drain IPC msgs delivered to the underlying IPC primitive's + rx-mem-chan (eg. `Context._rx_chan`) from the runtime in + search for a final result or error. + + The motivation here is to ideally capture errors during ctxc + conditions where a canc-request/or local error is sent but the + local task also excepts and enters the + `Portal.open_context().__aexit__()` block wherein we prefer to + capture and raise any remote error or ctxc-ack as part of the + `ctx.result()` cleanup and teardown sequence. + + ''' + __tracebackhide__: bool = hide_tb + raise_overrun: bool = not ctx._allow_overruns + + # wait for a final context result by collecting (but + # basically ignoring) any bi-dir-stream msgs still in transit + # from the far end. + pre_result_drained: list[MsgType] = [] + return_msg: Return|None = None + while not ( + ctx.maybe_error + and not ctx._final_result_is_set() + ): + try: + # TODO: can remove? + # await trio.lowlevel.checkpoint() + + # NOTE: this REPL usage actually works here dawg! Bo + # from .devx._debug import pause + # await pause() + + # TODO: bad idea? + # -[ ] wrap final outcome channel wait in a scope so + # it can be cancelled out of band if needed? + # + # with trio.CancelScope() as res_cs: + # ctx._res_scope = res_cs + # msg: dict = await ctx._rx_chan.receive() + # if res_cs.cancelled_caught: + + # TODO: ensure there's no more hangs, debugging the + # runtime pretty preaase! + # from .devx._debug import pause + # await pause() + + # TODO: can remove this finally? + # we have no more need for the sync draining right + # since we're can kinda guarantee the async + # `.receive()` below will never block yah? + # + # if ( + # ctx._cancel_called and ( + # ctx.cancel_acked + # # or ctx.chan._cancel_called + # ) + # # or not ctx._final_result_is_set() + # # ctx.outcome is not + # # or ctx.chan._closed + # ): + # try: + # msg: dict = await ctx._rx_chan.receive_nowait()() + # except trio.WouldBlock: + # log.warning( + # 'When draining already `.cancel_called` ctx!\n' + # 'No final msg arrived..\n' + # ) + # break + # else: + # msg: dict = await ctx._rx_chan.receive() + + # TODO: don't need it right jefe? + # with trio.move_on_after(1) as cs: + # if cs.cancelled_caught: + # from .devx._debug import pause + # await pause() + + # pray to the `trio` gawds that we're corrent with this + # msg: dict = await ctx._rx_chan.receive() + msg, pld = await ctx._pld_rx.recv_msg_w_pld(ipc=ctx) + + # NOTE: we get here if the far end was + # `ContextCancelled` in 2 cases: + # 1. we requested the cancellation and thus + # SHOULD NOT raise that far end error, + # 2. WE DID NOT REQUEST that cancel and thus + # SHOULD RAISE HERE! + except trio.Cancelled: + + # CASE 2: mask the local cancelled-error(s) + # only when we are sure the remote error is + # the source cause of this local task's + # cancellation. + ctx.maybe_raise() + + # CASE 1: we DID request the cancel we simply + # continue to bubble up as normal. + raise + + match msg: + + # final result arrived! + case Return( + # cid=cid, + # pld=res, + ): + # ctx._result: Any = res + ctx._result: Any = pld + log.runtime( + 'Context delivered final draining msg:\n' + f'{pformat(msg)}' + ) + # XXX: only close the rx mem chan AFTER + # a final result is retreived. + # if ctx._rx_chan: + # await ctx._rx_chan.aclose() + # TODO: ^ we don't need it right? + return_msg = msg + break + + # far end task is still streaming to us so discard + # and report depending on local ctx state. + case Yield(): + pre_result_drained.append(msg) + if ( + (ctx._stream.closed + and (reason := 'stream was already closed') + ) + or (ctx.cancel_acked + and (reason := 'ctx cancelled other side') + ) + or (ctx._cancel_called + and (reason := 'ctx called `.cancel()`') + ) + or (len(pre_result_drained) > msg_limit + and (reason := f'"yield" limit={msg_limit}') + ) + ): + log.cancel( + 'Cancelling `MsgStream` drain since ' + f'{reason}\n\n' + f'<= {ctx.chan.uid}\n' + f' |_{ctx._nsf}()\n\n' + f'=> {ctx._task}\n' + f' |_{ctx._stream}\n\n' + + f'{pformat(msg)}\n' + ) + return ( + return_msg, + pre_result_drained, + ) + + # drain up to the `msg_limit` hoping to get + # a final result or error/ctxc. + else: + log.warning( + 'Ignoring "yield" msg during `ctx.result()` drain..\n' + f'<= {ctx.chan.uid}\n' + f' |_{ctx._nsf}()\n\n' + f'=> {ctx._task}\n' + f' |_{ctx._stream}\n\n' + + f'{pformat(msg)}\n' + ) + continue + + # stream terminated, but no result yet.. + # + # TODO: work out edge cases here where + # a stream is open but the task also calls + # this? + # -[ ] should be a runtime error if a stream is open right? + # Stop() + case Stop(): + pre_result_drained.append(msg) + log.cancel( + 'Remote stream terminated due to "stop" msg:\n\n' + f'{pformat(msg)}\n' + ) + continue + + # remote error msg, likely already handled inside + # `Context._deliver_msg()` + case Error(): + # TODO: can we replace this with `ctx.maybe_raise()`? + # -[ ] would this be handier for this case maybe? + # async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') + # + re: Exception|None = ctx._remote_error + if re: + assert msg is ctx._cancel_msg + # NOTE: this solved a super duper edge case XD + # this was THE super duper edge case of: + # - local task opens a remote task, + # - requests remote cancellation of far end + # ctx/tasks, + # - needs to wait for the cancel ack msg + # (ctxc) or some result in the race case + # where the other side's task returns + # before the cancel request msg is ever + # rxed and processed, + # - here this surrounding drain loop (which + # iterates all ipc msgs until the ack or + # an early result arrives) was NOT exiting + # since we are the edge case: local task + # does not re-raise any ctxc it receives + # IFF **it** was the cancellation + # requester.. + # + # XXX will raise if necessary but ow break + # from loop presuming any supressed error + # (ctxc) should terminate the context! + ctx._maybe_raise_remote_err( + re, + # NOTE: obvi we don't care if we + # overran the far end if we're already + # waiting on a final result (msg). + # raise_overrun_from_self=False, + raise_overrun_from_self=raise_overrun, + ) + + break # OOOOOF, yeah obvi we need this.. + + # XXX we should never really get here + # right! since `._deliver_msg()` should + # always have detected an {'error': ..} + # msg and already called this right!?! + # elif error := unpack_error( + # msg=msg, + # chan=ctx._portal.channel, + # hide_tb=False, + # ): + # log.critical('SHOULD NEVER GET HERE!?') + # assert msg is ctx._cancel_msg + # assert error.msgdata == ctx._remote_error.msgdata + # assert error.ipc_msg == ctx._remote_error.ipc_msg + # from .devx._debug import pause + # await pause() + # ctx._maybe_cancel_and_set_remote_error(error) + # ctx._maybe_raise_remote_err(error) + + else: + # bubble the original src key error + raise + + # XXX should pretty much never get here unless someone + # overrides the default `MsgType` spec. + case _: + pre_result_drained.append(msg) + # It's definitely an internal error if any other + # msg type without a`'cid'` field arrives here! + if not msg.cid: + raise InternalError( + 'Unexpected cid-missing msg?\n\n' + f'{msg}\n' + ) + + raise RuntimeError('Unknown msg type: {msg}') + + else: + log.cancel( + 'Skipping `MsgStream` drain since final outcome is set\n\n' + f'{ctx.outcome}\n' + ) + + return ( + return_msg, + pre_result_drained, + ) -- 2.34.1 From 6819cf908ad4ce7a36a538b50cdefba8aa5fc0dc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 24 Apr 2024 12:31:05 -0400 Subject: [PATCH 068/305] Use `Context._stream` in `_raise_from_unexpected_msg()` Instead of expecting it to be passed in (as it was prior), when determining if a `Stop` msg is a valid end-of-channel signal use the `ctx._stream: MsgStream|None` attr which **must** be set by any stream opening API; either of: - `Context.open_stream()` - `Portal.open_stream_from()` Adjust the case block logic to match with fallthrough from any EoC to a closed error if necessary. Change the `_type: str` to match the failing IPC-prim name in the tail case we raise a `MessagingError`. Other: - move `.sender: tuple` uid attr up to `RemoteActorError` since `Error` optionally defines it as a field and for boxed `StreamOverrun`s (an ignore case we check for in the runtime during cancellation) we want it readable from the boxing rae. - drop still unused `InternalActorError`. --- tractor/_exceptions.py | 107 +++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 58 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index b2ba6e84..8d9274fe 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -532,7 +532,8 @@ class RemoteActorError(Exception): self, ) -> BaseException: ''' - Unpack the inner-most source error from it's original IPC msg data. + Unpack the inner-most source error from it's original IPC + msg data. We attempt to reconstruct (as best as we can) the original `Exception` from as it would have been raised in the @@ -570,6 +571,14 @@ class RemoteActorError(Exception): # # boxed_type=get_type_ref(.. # raise NotImplementedError + @property + def sender(self) -> tuple[str, str]|None: + if ( + (msg := self._ipc_msg) + and (value := msg.sender) + ): + return tuple(value) + class ContextCancelled(RemoteActorError): ''' @@ -734,20 +743,6 @@ class StreamOverrun( handled by app code using `MsgStream.send()/.receive()`. ''' - @property - def sender(self) -> tuple[str, str] | None: - value = self._ipc_msg.sender - if value: - return tuple(value) - - -# class InternalActorError(RemoteActorError): -# ''' -# Boxed (Remote) internal `tractor` error indicating failure of some -# primitive, machinery state or lowlevel task that should never -# occur. - -# ''' class TransportClosed(trio.ClosedResourceError): @@ -945,7 +940,6 @@ def _raise_from_unexpected_msg( log: StackLevelAdapter, # caller specific `log` obj expect_msg: str = Yield, - stream: MsgStream | None = None, # allow "deeper" tbs when debugging B^o hide_tb: bool = True, @@ -987,6 +981,8 @@ def _raise_from_unexpected_msg( ) from src_err # TODO: test that shows stream raising an expected error!!! + stream: MsgStream|None + _type: str = 'Context' # raise the error message in a boxed exception type! if isinstance(msg, Error): @@ -1003,55 +999,50 @@ def _raise_from_unexpected_msg( # TODO: does it make more sense to pack # the stream._eoc outside this in the calleer always? # case Stop(): - elif ( - isinstance(msg, Stop) - or ( - stream - and stream._eoc - ) - ): - log.debug( - f'Context[{cid}] stream was stopped by remote side\n' - f'cid: {cid}\n' - ) + elif stream := ctx._stream: + _type: str = 'MsgStream' - # TODO: if the a local task is already blocking on - # a `Context.result()` and thus a `.receive()` on the - # rx-chan, we close the chan and set state ensuring that - # an eoc is raised! + if ( + stream._eoc + or + isinstance(msg, Stop) + ): + log.debug( + f'Context[{cid}] stream was stopped by remote side\n' + f'cid: {cid}\n' + ) - # XXX: this causes ``ReceiveChannel.__anext__()`` to - # raise a ``StopAsyncIteration`` **and** in our catch - # block below it will trigger ``.aclose()``. - eoc = trio.EndOfChannel( - f'Context stream ended due to msg:\n\n' - f'{pformat(msg)}\n' - ) - # XXX: important to set so that a new `.receive()` - # call (likely by another task using a broadcast receiver) - # doesn't accidentally pull the `return` message - # value out of the underlying feed mem chan which is - # destined for the `Context.result()` call during ctx-exit! - stream._eoc: Exception = eoc + # TODO: if the a local task is already blocking on + # a `Context.result()` and thus a `.receive()` on the + # rx-chan, we close the chan and set state ensuring that + # an eoc is raised! - # in case there already is some underlying remote error - # that arrived which is probably the source of this stream - # closure - ctx.maybe_raise() - raise eoc from src_err + # XXX: this causes ``ReceiveChannel.__anext__()`` to + # raise a ``StopAsyncIteration`` **and** in our catch + # block below it will trigger ``.aclose()``. + eoc = trio.EndOfChannel( + f'Context stream ended due to msg:\n\n' + f'{pformat(msg)}\n' + ) + # XXX: important to set so that a new `.receive()` + # call (likely by another task using a broadcast receiver) + # doesn't accidentally pull the `return` message + # value out of the underlying feed mem chan which is + # destined for the `Context.result()` call during ctx-exit! + stream._eoc: Exception = eoc - if ( - stream - and stream._closed - ): - # TODO: our own error subtype? - raise trio.ClosedResourceError( - 'This stream was closed' - ) + # in case there already is some underlying remote error + # that arrived which is probably the source of this stream + # closure + ctx.maybe_raise() + raise eoc from src_err + + if stream._closed: + # TODO: our own error subtype? + raise trio.ClosedResourceError('This stream was closed') # always re-raise the source error if no translation error case # is activated above. - _type: str = 'Stream' if stream else 'Context' raise MessagingError( f"{_type} was expecting a {expect_msg} message" " BUT received a non-error msg:\n" -- 2.34.1 From 875081e7a2de203e08c1d21a51523f442e6d2c60 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 24 Apr 2024 12:43:08 -0400 Subject: [PATCH 069/305] Set `Context._stream` in `Portal.open_stream_from()`.. --- tractor/_portal.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 97268972..f3928657 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -420,7 +420,6 @@ class Portal: kwargs=kwargs, portal=self, ) - ctx._portal = self # ensure receive-only stream entrypoint assert ctx._remote_func_type == 'asyncgen' @@ -430,9 +429,10 @@ class Portal: async with MsgStream( ctx=ctx, rx_chan=ctx._rx_chan, - ) as rchan: - self._streams.add(rchan) - yield rchan + ) as stream: + self._streams.add(stream) + ctx._stream = stream + yield stream finally: @@ -454,7 +454,7 @@ class Portal: # XXX: should this always be done? # await recv_chan.aclose() - self._streams.remove(rchan) + self._streams.remove(stream) # NOTE: impl is found in `._context`` mod to make # reading/groking the details simpler code-org-wise. This -- 2.34.1 From e46046a746c8aea2db1a4e720f9db19e77bcc930 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 24 Apr 2024 13:07:05 -0400 Subject: [PATCH 070/305] Try out `msgspec` encode-buffer optimization As per the reco: https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffe BUT, seems to cause this error in `pikerd`.. `BufferError: Existing exports of data: object cannot be re-sized` Soo no idea? Maybe there's a tweak needed that we can glean from tests/examples in the `msgspec` repo? Disabling for now. --- tractor/msg/__init__.py | 5 +++++ tractor/msg/_codec.py | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index d968f6cf..13739cdb 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -37,6 +37,11 @@ from ._codec import ( MsgDec as MsgDec, current_codec as current_codec, ) +# currently can't bc circular with `._context` +# from ._ops import ( +# PldRx as PldRx, +# _drain_to_final_msg as _drain_to_final_msg, +# ) from .types import ( Msg as Msg, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 104f7d99..e3540c3d 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -280,17 +280,32 @@ class MsgCodec(Struct): def enc(self) -> msgpack.Encoder: return self._enc + # TODO: reusing encode buffer for perf? + # https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffer + _buf: bytearray = bytearray() + def encode( self, py_obj: Any, + use_buf: bool = False, + # ^-XXX-^ uhh why am i getting this? + # |_BufferError: Existing exports of data: object cannot be re-sized + ) -> bytes: ''' Encode input python objects to `msgpack` bytes for transfer on a tranport protocol connection. + When `use_buf == True` use the output buffer optimization: + https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffer + ''' - return self._enc.encode(py_obj) + if use_buf: + self._enc.encode_into(py_obj, self._buf) + return self._buf + else: + return self._enc.encode(py_obj) @property def dec(self) -> msgpack.Decoder: -- 2.34.1 From 94d8bef2d65f72c841fc3062a042507d5e776896 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 12:33:10 -0400 Subject: [PATCH 071/305] Another `._rpc` mod passthrough - tweaking logging to include more `MsgType` dumps on IPC faults. - removing some commented cruft. - comment formatting / cleanups / add-ons. - more type annots. - fill out some TODO content. --- tractor/_rpc.py | 163 ++++++++++++++++++++++++------------------------ 1 file changed, 80 insertions(+), 83 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 5970a101..17d37564 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -177,12 +177,11 @@ async def _invoke_non_context( # way: using the linked IPC context machinery. failed_resp: bool = False try: - await chan.send( - StartAck( - cid=cid, - functype='asyncfunc', - ) + ack = StartAck( + cid=cid, + functype='asyncfunc', ) + await chan.send(ack) except ( trio.ClosedResourceError, trio.BrokenResourceError, @@ -190,12 +189,11 @@ async def _invoke_non_context( ) as ipc_err: failed_resp = True if is_rpc: - raise + raise ipc_err else: - # TODO: should this be an `.exception()` call? - log.warning( - f'Failed to respond to non-rpc request: {func}\n' - f'{ipc_err}' + log.exception( + f'Failed to respond to runtime RPC request for\n\n' + f'{ack}\n' ) with cancel_scope as cs: @@ -216,20 +214,19 @@ async def _invoke_non_context( and chan.connected() ): try: - await chan.send( - return_msg( - cid=cid, - pld=result, - ) + ret_msg = return_msg( + cid=cid, + pld=result, ) + await chan.send(ret_msg) except ( BrokenPipeError, trio.BrokenResourceError, ): log.warning( - 'Failed to return result:\n' - f'{func}@{actor.uid}\n' - f'remote chan: {chan.uid}' + 'Failed to send RPC result?\n' + f'|_{func}@{actor.uid}() -> {ret_msg}\n\n' + f'x=> peer: {chan.uid}\n' ) @acm @@ -246,7 +243,7 @@ async def _errors_relayed_via_ipc( ] = trio.TASK_STATUS_IGNORED, ) -> None: - __tracebackhide__: bool = hide_tb # TODO: use hide_tb here? + __tracebackhide__: bool = hide_tb try: yield # run RPC invoke body @@ -258,23 +255,19 @@ async def _errors_relayed_via_ipc( KeyboardInterrupt, ) as err: - # always hide this frame from debug REPL if the crash - # originated from an rpc task and we DID NOT fail due to - # an IPC transport error! + # NOTE: always hide this frame from debug REPL call stack + # if the crash originated from an RPC task and we DID NOT + # fail due to an IPC transport error! if ( is_rpc - and chan.connected() + and + chan.connected() ): __tracebackhide__: bool = hide_tb + # TODO: maybe we'll want different "levels" of debugging + # eventualy such as ('app', 'supervisory', 'runtime') ? if not is_multi_cancelled(err): - - # TODO: maybe we'll want different "levels" of debugging - # eventualy such as ('app', 'supervisory', 'runtime') ? - - # if not isinstance(err, trio.ClosedResourceError) and ( - # if not is_multi_cancelled(err) and ( - entered_debug: bool = False if ( ( @@ -306,19 +299,18 @@ async def _errors_relayed_via_ipc( # strange bug in our transport layer itself? Going # to keep this open ended for now. entered_debug = await _debug._maybe_enter_pm(err) - if not entered_debug: log.exception( 'RPC task crashed\n' f'|_{ctx}' ) - # always (try to) ship RPC errors back to caller + # ALWAYS try to ship RPC errors back to parent/caller task if is_rpc: - # + # TODO: tests for this scenario: # - RPC caller closes connection before getting a response - # should **not** crash this actor.. + # should **not** crash this actor.. await try_ship_error_to_remote( chan, err, @@ -327,33 +319,41 @@ async def _errors_relayed_via_ipc( hide_tb=hide_tb, ) - # error is probably from above coro running code *not from - # the target rpc invocation since a scope was never - # allocated around the coroutine await. + # if the ctx cs is NOT allocated, the error is likely from + # above `coro` invocation machinery NOT from inside the + # `coro` itself, i.e. err is NOT a user application error. if ctx._scope is None: # we don't ever raise directly here to allow the # msg-loop-scheduler to continue running for this # channel. task_status.started(err) - # always reraise KBIs so they propagate at the sys-process - # level. + # always reraise KBIs so they propagate at the sys-process level. if isinstance(err, KeyboardInterrupt): raise - - # RPC task bookeeping + # RPC task bookeeping. + # since RPC tasks are scheduled inside a flat + # `Actor._service_n`, we add "handles" to each such that + # they can be individually ccancelled. finally: try: - ctx, func, is_complete = actor._rpc_tasks.pop( + ctx: Context + func: Callable + is_complete: trio.Event + ( + ctx, + func, + is_complete, + ) = actor._rpc_tasks.pop( (chan, ctx.cid) ) is_complete.set() except KeyError: + # If we're cancelled before the task returns then the + # cancel scope will not have been inserted yet if is_rpc: - # If we're cancelled before the task returns then the - # cancel scope will not have been inserted yet log.warning( 'RPC task likely errored or cancelled before start?' f'|_{ctx._task}\n' @@ -368,7 +368,7 @@ async def _errors_relayed_via_ipc( finally: if not actor._rpc_tasks: - log.runtime("All RPC tasks have completed") + log.runtime('All RPC tasks have completed') actor._ongoing_rpc_tasks.set() @@ -410,19 +410,16 @@ async def _invoke( # TODO: possibly a specially formatted traceback # (not sure what typing is for this..)? - # tb = None + # tb: TracebackType = None cancel_scope = CancelScope() - # activated cancel scope ref - cs: CancelScope|None = None - + cs: CancelScope|None = None # ref when activated ctx = actor.get_context( chan=chan, cid=cid, nsf=NamespacePath.from_ref(func), - # TODO: if we wanted to get cray and support it? - # side='callee', + # NOTE: no portal passed bc this is the "child"-side # We shouldn't ever need to pass this through right? # it's up to the soon-to-be called rpc task to @@ -455,8 +452,8 @@ async def _invoke( kwargs['stream'] = ctx + # handle decorated ``@tractor.context`` async function elif getattr(func, '_tractor_context_function', False): - # handle decorated ``@tractor.context`` async function kwargs['ctx'] = ctx context = True @@ -470,7 +467,8 @@ async def _invoke( task_status=task_status, ): if not ( - inspect.isasyncgenfunction(func) or + inspect.isasyncgenfunction(func) + or inspect.iscoroutinefunction(func) ): raise TypeError(f'{func} must be an async function!') @@ -482,8 +480,7 @@ async def _invoke( except TypeError: raise - # TODO: implement all these cases in terms of the - # `Context` one! + # TODO: impl all these cases in terms of the `Context` one! if not context: await _invoke_non_context( actor, @@ -499,7 +496,7 @@ async def _invoke( return_msg, task_status, ) - # below is only for `@context` funcs + # XXX below fallthrough is ONLY for `@context` eps return # our most general case: a remote SC-transitive, @@ -576,9 +573,6 @@ async def _invoke( # itself calls `ctx._maybe_cancel_and_set_remote_error()` # which cancels the scope presuming the input error # is not a `.cancel_acked` pleaser. - # - currently a never-should-happen-fallthrough case - # inside ._context._drain_to_final_msg()`.. - # # TODO: remove this ^ right? if ctx._scope.cancelled_caught: our_uid: tuple = actor.uid @@ -594,9 +588,7 @@ async def _invoke( if cs.cancel_called: canceller: tuple = ctx.canceller - msg: str = ( - 'actor was cancelled by ' - ) + msg: str = 'actor was cancelled by ' # NOTE / TODO: if we end up having # ``Actor._cancel_task()`` call @@ -619,6 +611,8 @@ async def _invoke( else: msg += 'a remote peer' + # TODO: move this "div centering" into + # a helper for use elsewhere! div_chars: str = '------ - ------' div_offset: int = ( round(len(msg)/2)+1 @@ -698,11 +692,9 @@ async def _invoke( ctx: Context = actor._contexts.pop(( chan.uid, cid, - # ctx.side, )) merr: Exception|None = ctx.maybe_error - ( res_type_str, res_str, @@ -716,7 +708,7 @@ async def _invoke( ) log.runtime( f'IPC context terminated with a final {res_type_str}\n\n' - f'{ctx}\n' + f'{ctx}' ) @@ -802,13 +794,19 @@ async def process_messages( and `Actor.cancel()` process-wide-runtime-shutdown requests (as utilized inside `Portal.cancel_actor()` ). - ''' assert actor._service_n # state sanity # TODO: once `trio` get's an "obvious way" for req/resp we # should use it? - # https://github.com/python-trio/trio/issues/467 + # -[ ] existing GH https://github.com/python-trio/trio/issues/467 + # -[ ] for other transports (like QUIC) we can possibly just + # entirely avoid the feeder mem-chans since each msg will be + # delivered with a ctx-id already? + # + # |_ for ex, from `aioquic` which exposed "stream ids": + # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L1175 + # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L659 log.runtime( 'Entering RPC msg loop:\n' f'peer: {chan.uid}\n' @@ -846,7 +844,7 @@ async def process_messages( | Return(cid=cid) | CancelAck(cid=cid) - # `.cid` means RPC-ctx-task specific + # `.cid` indicates RPC-ctx-task scoped | Error(cid=cid) # recv-side `MsgType` decode violation @@ -1042,16 +1040,16 @@ async def process_messages( trio.Event(), ) - # runtime-scoped remote error (since no `.cid`) + # runtime-scoped remote (internal) error + # (^- bc no `Error.cid` -^) + # + # NOTE: this is the non-rpc error case, that + # is, an error NOT raised inside a call to + # `_invoke()` (i.e. no cid was provided in the + # msg - see above). Raise error inline and + # mark the channel as "globally errored" for + # all downstream consuming primitives. case Error(): - # NOTE: this is the non-rpc error case, - # that is, an error **not** raised inside - # a call to ``_invoke()`` (i.e. no cid was - # provided in the msg - see above). Push - # this error to all local channel - # consumers (normally portals) by marking - # the channel as errored - # assert chan.uid chan._exc: Exception = unpack_error( msg, chan=chan, @@ -1107,7 +1105,7 @@ async def process_messages( f'|_{chan.raddr}\n' ) - # transport **was** disconnected + # transport **WAS** disconnected return True except ( @@ -1146,12 +1144,11 @@ async def process_messages( finally: # msg debugging for when he machinery is brokey log.runtime( - 'Exiting IPC msg loop with\n' - f'peer: {chan.uid}\n' + 'Exiting IPC msg loop with final msg\n\n' + f'<= peer: {chan.uid}\n' f'|_{chan}\n\n' - 'final msg:\n' - f'{pformat(msg)}\n' + f'{pformat(msg)}\n\n' ) - # transport **was not** disconnected + # transport **WAS NOT** disconnected return False -- 2.34.1 From 3fb99f2ba5d8ab83135acadc85645cb83dfd76e2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 12:36:14 -0400 Subject: [PATCH 072/305] Flip back `StartAck` timeout to `inf`.. --- tractor/_runtime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 7ab14ab9..c12365be 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -852,7 +852,7 @@ class Actor: msg_buffer_size: int|None = None, allow_overruns: bool = False, load_nsf: bool = False, - ack_timeout: float = 3, + ack_timeout: float = float('inf'), ) -> Context: ''' -- 2.34.1 From 83e3a75c1015a6051f9c746d42f454d23cf73fa5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 12:38:05 -0400 Subject: [PATCH 073/305] Add `Context.peer_side: str` property, mk static-meth private. --- tractor/_context.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index f0fc966e..762d8798 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -472,13 +472,17 @@ class Context: return 'parent' if self._portal else 'child' @staticmethod - def peer_side(side: str) -> str: + def _peer_side(side: str) -> str: match side: case 'child': return 'parent' case 'parent': return 'child' + @property + def peer_side(self) -> str: + return self._peer_side(self.side) + # TODO: remove stat! # -[ ] re-implement the `.experiemental._pubsub` stuff # with `MsgStream` and that should be last usage? @@ -512,9 +516,7 @@ class Context: equiv of a `StopIteration`. ''' - await self.chan.send( - Stop(cid=self.cid) - ) + await self.chan.send(Stop(cid=self.cid)) def _maybe_cancel_and_set_remote_error( self, @@ -593,7 +595,6 @@ class Context: # TODO: never do this right? # if self._remote_error: # return - peer_side: str = self.peer_side(self.side) # XXX: denote and set the remote side's error so that # after we cancel whatever task is the opener of this @@ -601,7 +602,7 @@ class Context: # appropriately. log.runtime( 'Setting remote error for ctx\n\n' - f'<= {peer_side!r}: {self.chan.uid}\n' + f'<= {self.peer_side!r}: {self.chan.uid}\n' f'=> {self.side!r}\n\n' f'{error}' ) @@ -623,9 +624,8 @@ class Context: elif isinstance(error, MsgTypeError): msgerr = True - peer_side: str = self.peer_side(self.side) log.error( - f'IPC dialog error due to msg-type caused by {peer_side!r} side\n\n' + f'IPC dialog error due to msg-type caused by {self.peer_side!r} side\n\n' f'{error}\n' f'{pformat(self)}\n' @@ -1070,12 +1070,12 @@ class Context: except trio.EndOfChannel as eoc: if ( eoc - and stream.closed + and + stream.closed ): # sanity, can remove? assert eoc is stream._eoc - # from .devx import pause - # await pause() + log.warning( 'Stream was terminated by EoC\n\n' # NOTE: won't show the error but @@ -1647,10 +1647,9 @@ class Context: side: str = self.side if side == 'child': assert not self._portal - peer_side: str = self.peer_side(side) flow_body: str = ( - f'<= peer {peer_side!r}: {from_uid}\n' + f'<= peer {self.peer_side!r}: {from_uid}\n' f' |_<{nsf}()>\n\n' f'=> {side!r}: {self._task}\n' @@ -1668,7 +1667,7 @@ class Context: log_meth = log.runtime log_meth( - f'Delivering IPC ctx error from {peer_side!r} to {side!r} task\n\n' + f'Delivering IPC ctx error from {self.peer_side!r} to {side!r} task\n\n' f'{flow_body}' @@ -2333,7 +2332,7 @@ async def open_context_from_portal( and ctx.cancel_acked ): log.cancel( - 'Context cancelled by caller task\n' + 'Context cancelled by {ctx.side!r}-side task\n' f'|_{ctx._task}\n\n' f'{repr(scope_err)}\n' @@ -2367,6 +2366,7 @@ async def open_context_from_portal( None, ) + def mk_context( chan: Channel, cid: str, -- 2.34.1 From 0efc4c1b87137141510bb80cb700eaa0a2982036 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 16:19:39 -0400 Subject: [PATCH 074/305] Use `Context.[peer_]side` in ctxc messages --- tractor/_rpc.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 17d37564..595d1352 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -588,7 +588,7 @@ async def _invoke( if cs.cancel_called: canceller: tuple = ctx.canceller - msg: str = 'actor was cancelled by ' + explain: str = f'{ctx.side!r}-side task was cancelled by ' # NOTE / TODO: if we end up having # ``Actor._cancel_task()`` call @@ -598,24 +598,28 @@ async def _invoke( if ctx._cancel_called: # TODO: test for this!!!!! canceller: tuple = our_uid - msg += 'itself ' + explain += 'itself ' # if the channel which spawned the ctx is the # one that cancelled it then we report that, vs. # it being some other random actor that for ex. # some actor who calls `Portal.cancel_actor()` # and by side-effect cancels this ctx. + # + # TODO: determine if the ctx peer task was the + # exact task which cancelled, vs. some other + # task in the same actor. elif canceller == ctx.chan.uid: - msg += 'its caller' + explain += f'its {ctx.peer_side!r}-side peer' else: - msg += 'a remote peer' + explain += 'a remote peer' # TODO: move this "div centering" into # a helper for use elsewhere! div_chars: str = '------ - ------' div_offset: int = ( - round(len(msg)/2)+1 + round(len(explain)/2)+1 + round(len(div_chars)/2)+1 ) @@ -626,11 +630,12 @@ async def _invoke( + f'{div_chars}\n' ) - msg += ( + explain += ( div_str + f'<= canceller: {canceller}\n' - f'=> uid: {our_uid}\n' - f' |_{ctx._task}()' + f'=> cancellee: {our_uid}\n' + # TODO: better repr for ctx tasks.. + f' |_{ctx.side!r} {ctx._task}' # TODO: instead just show the # ctx.__str__() here? @@ -649,7 +654,7 @@ async def _invoke( # task, so relay this cancel signal to the # other side. ctxc = ContextCancelled( - message=msg, + message=explain, boxed_type=trio.Cancelled, canceller=canceller, ) -- 2.34.1 From 7910e1297bf078f572f4d1c09ec0c23872d59a31 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 25 Apr 2024 20:00:13 -0400 Subject: [PATCH 075/305] Mk `.msg.pretty_struct.Struct.pformat()` a mod func More along the lines of `msgspec.struct` and also far more useful internally for pprinting `MsgTypes`. Of course add method aliases. --- tractor/msg/pretty_struct.py | 110 +++++++++++++++++------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index a67bbd26..f27fb89c 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -102,6 +102,59 @@ def iter_fields(struct: Struct) -> Iterator[ ) +def pformat( + struct: Struct, + field_indent: int = 2, + indent: int = 0, + +) -> str: + ''' + Recursion-safe `pprint.pformat()` style formatting of + a `msgspec.Struct` for sane reading by a human using a REPL. + + ''' + # global whitespace indent + ws: str = ' '*indent + + # field whitespace indent + field_ws: str = ' '*(field_indent + indent) + + # qtn: str = ws + struct.__class__.__qualname__ + qtn: str = struct.__class__.__qualname__ + + obj_str: str = '' # accumulator + fi: structs.FieldInfo + k: str + v: Any + for fi, k, v in iter_fields(struct): + + # TODO: how can we prefer `Literal['option1', 'option2, + # ..]` over .__name__ == `Literal` but still get only the + # latter for simple types like `str | int | None` etc..? + ft: type = fi.type + typ_name: str = getattr(ft, '__name__', str(ft)) + + # recurse to get sub-struct's `.pformat()` output Bo + if isinstance(v, Struct): + val_str: str = v.pformat( + indent=field_indent + indent, + field_indent=indent + field_indent, + ) + + else: # the `pprint` recursion-safe format: + # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr + val_str: str = saferepr(v) + + # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! + obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') + + return ( + f'{qtn}(\n' + f'{obj_str}' + f'{ws})' + ) + + class Struct( _Struct, @@ -140,65 +193,12 @@ class Struct( return sin_props - # TODO: make thisi a mod-func! - def pformat( - self, - field_indent: int = 2, - indent: int = 0, - - ) -> str: - ''' - Recursion-safe `pprint.pformat()` style formatting of - a `msgspec.Struct` for sane reading by a human using a REPL. - - ''' - # global whitespace indent - ws: str = ' '*indent - - # field whitespace indent - field_ws: str = ' '*(field_indent + indent) - - # qtn: str = ws + self.__class__.__qualname__ - qtn: str = self.__class__.__qualname__ - - obj_str: str = '' # accumulator - fi: structs.FieldInfo - k: str - v: Any - for fi, k, v in iter_fields(self): - - # TODO: how can we prefer `Literal['option1', 'option2, - # ..]` over .__name__ == `Literal` but still get only the - # latter for simple types like `str | int | None` etc..? - ft: type = fi.type - typ_name: str = getattr(ft, '__name__', str(ft)) - - # recurse to get sub-struct's `.pformat()` output Bo - if isinstance(v, Struct): - val_str: str = v.pformat( - indent=field_indent + indent, - field_indent=indent + field_indent, - ) - - else: # the `pprint` recursion-safe format: - # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - val_str: str = saferepr(v) - - # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! - obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') - - return ( - f'{qtn}(\n' - f'{obj_str}' - f'{ws})' - ) - + pformat = pformat + # __str__ = __repr__ = pformat # TODO: use a pprint.PrettyPrinter instance around ONLY rendering # inside a known tty? # def __repr__(self) -> str: # ... - - # __str__ = __repr__ = pformat __repr__ = pformat def copy( -- 2.34.1 From 3b38fa867397332b7821ab7424fa42771aae15ee Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 12:45:10 -0400 Subject: [PATCH 076/305] Add more useful `MsgDec.__repr__()` Basically exact same as that for `MsgCodec` with the `.spec` displayed via a better (maybe multi-line) `.spec_str: str` generated from a common new set of helper mod funcs factored out msg-codec meths: - `mk_msgspec_table()` to gen a `MsgType` name -> msg table. - `pformat_msgspec()` to `str`-ify said table values nicely.q Also add a new `MsgCodec.msg_spec_str: str` prop which delegates to the above for the same. --- tractor/msg/_codec.py | 123 ++++++++++++++++++++++++++++-------------- 1 file changed, 82 insertions(+), 41 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e3540c3d..901c0da1 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -75,7 +75,7 @@ log = get_logger(__name__) # TODO: unify with `MsgCodec` by making `._dec` part this? class MsgDec(Struct): ''' - An IPC msg decoder. + An IPC msg (payload) decoder. Normally used to decode only a payload: `MsgType.pld: PayloadT` field before delivery to IPC consumer code. @@ -87,6 +87,31 @@ class MsgDec(Struct): def dec(self) -> msgpack.Decoder: return self._dec + def __repr__(self) -> str: + + speclines: str = self.spec_str + + # in multi-typed spec case we stick the list + # all on newlines after the |__pld_spec__:, + # OW it's prolly single type spec-value + # so just leave it on same line. + if '\n' in speclines: + speclines: str = '\n' + textwrap.indent( + speclines, + prefix=' '*3, + ) + + body: str = textwrap.indent( + f'|_dec_hook: {self.dec.dec_hook}\n' + f'|__pld_spec__: {speclines}\n', + prefix=' '*2, + ) + return ( + f'<{type(self).__name__}(\n' + f'{body}' + ')>' + ) + # struct type unions # https://jcristharif.com/msgspec/structs.html#tagged-unions # @@ -137,17 +162,7 @@ class MsgDec(Struct): # TODO: would get moved into `FieldSpec.__str__()` right? @property def spec_str(self) -> str: - - # TODO: could also use match: instead? - spec: Union[Type]|Type = self.spec - - # `typing.Union` case - if getattr(spec, '__args__', False): - return str(spec) - - # just a single type - else: - return spec.__name__ + return pformat_msgspec(codec=self) pld_spec_str = spec_str @@ -168,9 +183,57 @@ def mk_dec( ) -> MsgDec: - return msgpack.Decoder( - type=spec, # like `Msg[Any]` - dec_hook=dec_hook, + return MsgDec( + _dec=msgpack.Decoder( + type=spec, # like `Msg[Any]` + dec_hook=dec_hook, + ) + ) + + +def mk_msgspec_table( + dec: msgpack.Decoder, + msg: MsgType|None = None, + +) -> dict[str, MsgType]|str: + ''' + Fill out a `dict` of `MsgType`s keyed by name + for a given input `msgspec.msgpack.Decoder` + as defined by its `.type: Union[Type]` setting. + + If `msg` is provided, only deliver a `dict` with a single + entry for that type. + + ''' + msgspec: Union[Type]|Type = dec.type + + if not (msgtypes := getattr(msgspec, '__args__', False)): + msgtypes = [msgspec] + + msgt_table: dict[str, MsgType] = { + msgt: str(msgt) + for msgt in msgtypes + } + if msg: + msgt: MsgType = type(msg) + str_repr: str = msgt_table[msgt] + return {msgt: str_repr} + + return msgt_table + + +def pformat_msgspec( + codec: MsgCodec|MsgDec, + msg: MsgType|None = None, + join_char: str = '\n', + +) -> str: + dec: msgpack.Decoder = getattr(codec, 'dec', codec) + return join_char.join( + mk_msgspec_table( + dec=dec, + msg=msg, + ).values() ) # TODO: overall IPC msg-spec features (i.e. in this mod)! @@ -200,7 +263,7 @@ class MsgCodec(Struct): def __repr__(self) -> str: speclines: str = textwrap.indent( - self.pformat_msg_spec(), + pformat_msgspec(codec=self), prefix=' '*3, ) body: str = textwrap.indent( @@ -244,33 +307,11 @@ class MsgCodec(Struct): # NOTE: defined and applied inside `mk_codec()` return self._dec.type - def msg_spec_items( - self, - msg: MsgType|None = None, - - ) -> dict[str, MsgType]|str: - - msgt_table: dict[str, MsgType] = { - msgt: str(msgt) - for msgt in self.msg_spec.__args__ - } - if msg: - msgt: MsgType = type(msg) - str_repr: str = msgt_table[msgt] - return {msgt: str_repr} - - return msgt_table - # TODO: some way to make `pretty_struct.Struct` use this # wrapped field over the `.msg_spec` one? - def pformat_msg_spec( - self, - msg: MsgType|None = None, - join_char: str = '\n', - ) -> str: - return join_char.join( - self.msg_spec_items(msg=msg).values() - ) + @property + def msg_spec_str(self) -> str: + return pformat_msgspec(self.msg_spec) lib: ModuleType = msgspec -- 2.34.1 From bf08066031ab72f48f98a2d238cdf01b385c3c44 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 13:03:07 -0400 Subject: [PATCH 077/305] Use new `Msg[Co]Dec` repr meths in `._exceptions` Particularly when logging around `MsgTypeError`s. Other: - make `_raise_from_unexpected_msg()`'s `expect_msg` a non-default value arg, must always be passed by caller. - drop `'canceller'` from `_body_fields` ow it shows up twice for ctxc. - use `.msg.pretty_struct.pformat()`. - parameterize `RemoteActorError.reprol()` (repr-one-line method) to show `RemoteActorError[]( ..` to make obvi the boxed remote error type. - re-impl `.boxed_type_str` as `str`-casting the `.boxed_type` value which is guaranteed to render non-`None`. --- tractor/_exceptions.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 8d9274fe..f2ff8c21 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -54,6 +54,7 @@ from tractor.msg import ( from tractor.msg.pretty_struct import ( iter_fields, Struct, + pformat as struct_format, ) if TYPE_CHECKING: @@ -108,6 +109,10 @@ _body_fields: list[str] = list( 'relay_path', '_msg_dict', 'cid', + + # since only ctxc should show it but `Error` does + # have it as an optional field. + 'canceller', } ) @@ -382,6 +387,9 @@ class RemoteActorError(Exception): ''' Error type raised by original remote faulting actor. + When the error has only been relayed a single actor-hop + this will be the same as the `.boxed_type`. + ''' if self._src_type is None: self._src_type = get_err_type( @@ -396,7 +404,8 @@ class RemoteActorError(Exception): String-name of the (last hop's) boxed error type. ''' - return self._ipc_msg.boxed_type_str + bt: Type[BaseException] = self.boxed_type + return str(bt.__name__) @property def boxed_type(self) -> str: @@ -492,7 +501,11 @@ class RemoteActorError(Exception): ''' # TODO: use this matryoshka emjoi XD # => 🪆 - reprol_str: str = f'{type(self).__name__}(' + reprol_str: str = ( + f'{type(self).__name__}' # type name + f'[{self.boxed_type_str}]' # parameterized by boxed type + '(' # init-style look + ) _repr: str = self._mk_fields_str( self.reprol_fields, end_char=' ', @@ -653,8 +666,8 @@ class MsgTypeError( - `Yield` - TODO: any embedded `.pld` type defined by user code? - Normally the source of an error is re-raised from some `.msg._codec` - decode which itself raises in a backend interchange + Normally the source of an error is re-raised from some + `.msg._codec` decode which itself raises in a backend interchange lib (eg. a `msgspec.ValidationError`). ''' @@ -939,7 +952,7 @@ def _raise_from_unexpected_msg( src_err: AttributeError, log: StackLevelAdapter, # caller specific `log` obj - expect_msg: str = Yield, + expect_msg: Type[MsgType], # allow "deeper" tbs when debugging B^o hide_tb: bool = True, @@ -1037,16 +1050,16 @@ def _raise_from_unexpected_msg( ctx.maybe_raise() raise eoc from src_err + # TODO: our own transport/IPC-broke error subtype? if stream._closed: - # TODO: our own error subtype? raise trio.ClosedResourceError('This stream was closed') # always re-raise the source error if no translation error case # is activated above. raise MessagingError( - f"{_type} was expecting a {expect_msg} message" - " BUT received a non-error msg:\n" - f'{pformat(msg)}' + f'{_type} was expecting a {expect_msg.__name__!r} message' + ' BUT received a non-error msg:\n\n' + f'{struct_format(msg)}' ) from src_err @@ -1079,13 +1092,11 @@ def _mk_msg_type_err( # no src error from `msgspec.msgpack.Decoder.decode()` so # prolly a manual type-check on our part. if message is None: - fmt_spec: str = codec.pformat_msg_spec() fmt_stack: str = ( '\n'.join(traceback.format_stack(limit=3)) ) tb_fmt: str = pformat_boxed_tb( tb_str=fmt_stack, - # fields_str=header, field_prefix=' ', indent='', ) @@ -1093,8 +1104,7 @@ def _mk_msg_type_err( f'invalid msg -> {msg}: {type(msg)}\n\n' f'{tb_fmt}\n' f'Valid IPC msgs are:\n\n' - # f' ------ - ------\n' - f'{fmt_spec}\n', + f'{codec.msg_spec_str}\n', ) elif src_type_error: src_message: str = str(src_type_error) -- 2.34.1 From b78732781fc843c8975f1697075f2a27f6981d60 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 13:13:04 -0400 Subject: [PATCH 078/305] More bitty (runtime) logging tweaks --- tractor/_portal.py | 6 +++--- tractor/_streaming.py | 8 ++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index f3928657..e25a6c70 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -254,11 +254,11 @@ class Portal: return False reminfo: str = ( - f'`Portal.cancel_actor()` => {self.channel.uid}\n' - f' |_{chan}\n' + f'Portal.cancel_actor() => {self.channel.uid}\n' + f'|_{chan}\n' ) log.cancel( - f'Sending runtime `.cancel()` request to peer\n\n' + f'Requesting runtime cancel for peer\n\n' f'{reminfo}' ) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 764b7c1e..dd4cd0e1 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -364,14 +364,10 @@ class MsgStream(trio.abc.Channel): if not self._eoc: message: str = ( - f'Context stream closed by {self._ctx.side!r}\n' + f'Stream self-closed by {self._ctx.side!r}-side before EoC\n' f'|_{self}\n' ) - log.cancel( - 'Stream self-closed before receiving EoC\n\n' - + - message - ) + log.cancel(message) self._eoc = trio.EndOfChannel(message) # ?XXX WAIT, why do we not close the local mem chan `._rx_chan` XXX? -- 2.34.1 From 49443d3a7ed06f09dbcbd4af15667d66d2a0c9c4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 13:18:06 -0400 Subject: [PATCH 079/305] Make `.msg.types.Msg.pld: Raw` only, since `PldRx`.. --- tractor/msg/types.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index cb124324..63c0a467 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -56,6 +56,7 @@ log = get_logger('tractor.msgspec') PayloadT = TypeVar('PayloadT') +# TODO: PayloadMsg class Msg( Struct, Generic[PayloadT], @@ -81,7 +82,7 @@ class Msg( tree. ''' - cid: str|None # call/context-id + cid: str # call/context-id # ^-TODO-^: more explicit type? # -[ ] use UNSET here? # https://jcristharif.com/msgspec/supported-types.html#unset @@ -106,7 +107,7 @@ class Msg( # TODO: could also be set to `msgspec.Raw` if the sub-decoders # approach is preferred over the generic parameterization # approach as take by `mk_msg_spec()` below. - pld: PayloadT|Raw + pld: Raw class Aid( -- 2.34.1 From f9de439b876b227b30af351c0c4dbc5866049ab8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 26 Apr 2024 15:29:50 -0400 Subject: [PATCH 080/305] First draft payload-spec limit API Add new task-scope oriented `PldRx.pld_spec` management API similar to `.msg._codec.limit_msg_spec()`, but obvi built to process and filter `MsgType.pld` values. New API related changes include: - new per-task singleton getter `msg._ops.current_pldrx()` which delivers the current (global) payload receiver via a new `_ctxvar_PldRx: ContextVar` configured with a default `_def_any_pldec: MsgDec[Any]` decoder. - a `PldRx.limit_plds()` which sets the decoder (`.type` underneath) for the specific payload rx instance. - `.msg._ops.limit_plds()` which obtains the current task-scoped `PldRx` and applies the pld spec via a new `PldRx.limit_plds()`. - rename `PldRx._msgdec` -> `._pldec`. - add `.pld_dec` as pub attr for -^ Unrelated adjustments: - use `.msg.pretty_struct.pformat()` where handy. - always pass `expect_msg: MsgType`. - add a `case Stop()` to `PldRx.dec_msg()` which will `log.warning()` when a stop is received by no stream was open on this receiving side since we rarely want that to raise since it's prolly just a runtime race or mistake in user code. Other: --- tractor/msg/_ops.py | 162 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 17 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index e78b79a4..5a9ab46a 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -25,12 +25,12 @@ from contextlib import ( # asynccontextmanager as acm, contextmanager as cm, ) -from pprint import pformat +from contextvars import ContextVar from typing import ( Any, Type, TYPE_CHECKING, - # Union, + Union, ) # ------ - ------ from msgspec import ( @@ -63,7 +63,7 @@ from .types import ( Started, Stop, Yield, - # pretty_struct, + pretty_struct, ) @@ -75,6 +75,9 @@ if TYPE_CHECKING: log = get_logger(__name__) +_def_any_pldec: MsgDec = mk_dec() + + class PldRx(Struct): ''' A "msg payload receiver". @@ -101,10 +104,13 @@ class PldRx(Struct): ''' # TODO: better to bind it here? # _rx_mc: trio.MemoryReceiveChannel - _msgdec: MsgDec = mk_dec(spec=Any) - + _pldec: MsgDec _ipc: Context|MsgStream|None = None + @property + def pld_dec(self) -> MsgDec: + return self._pldec + @cm def apply_to_ipc( self, @@ -122,9 +128,29 @@ class PldRx(Struct): finally: self._ipc = None + @cm + def limit_plds( + self, + spec: Union[Type[Struct]], + + ) -> MsgDec: + ''' + Type-limit the loadable msg payloads via an applied + `MsgDec` given an input spec, revert to prior decoder on + exit. + + ''' + orig_dec: MsgDec = self._pldec + limit_dec: MsgDec = mk_dec(spec=spec) + try: + self._pldec = limit_dec + yield limit_dec + finally: + self._pldec = orig_dec + @property def dec(self) -> msgpack.Decoder: - return self._msgdec.dec + return self._pldec.dec def recv_pld_nowait( self, @@ -182,7 +208,7 @@ class PldRx(Struct): self, msg: MsgType, ctx: Context, - expect_msg: Type[MsgType]|None = None, + expect_msg: Type[MsgType]|None, ) -> PayloadT|Raw: ''' @@ -199,11 +225,11 @@ class PldRx(Struct): |Return(pld=pld) # termination phase ): try: - pld: PayloadT = self._msgdec.decode(pld) + pld: PayloadT = self._pldec.decode(pld) log.runtime( - 'Decode msg payload\n\n' - f'{msg}\n\n' - f'{pld}\n' + 'Decoded msg payload\n\n' + f'{msg}\n' + f'|_pld={pld!r}' ) return pld @@ -237,9 +263,42 @@ class PldRx(Struct): case Error(): src_err = MessagingError( - 'IPC dialog termination by msg' + 'IPC ctx dialog terminated without `Return`-ing a result' ) + case Stop(cid=cid): + message: str = ( + f'{ctx.side!r}-side of ctx received stream-`Stop` from ' + f'{ctx.peer_side!r} peer ?\n' + f'|_cid: {cid}\n\n' + + f'{pretty_struct.pformat(msg)}\n' + ) + if ctx._stream is None: + explain: str = ( + f'BUT, no `MsgStream` (was) open(ed) on this ' + f'{ctx.side!r}-side of the IPC ctx?\n' + f'Maybe check your code for streaming phase race conditions?\n' + ) + log.warning( + message + + + explain + ) + # let caller decide what to do when only one + # side opened a stream, don't raise. + return msg + + else: + explain: str = ( + 'Received a `Stop` when it should NEVER be possible!?!?\n' + ) + # TODO: this is constructed inside + # `_raise_from_unexpected_msg()` but maybe we + # should pass it in? + # src_err = trio.EndOfChannel(explain) + src_err = None + case _: src_err = InternalError( 'Unknown IPC msg ??\n\n' @@ -259,6 +318,7 @@ class PldRx(Struct): async def recv_msg_w_pld( self, ipc: Context|MsgStream, + expect_msg: MsgType, ) -> tuple[MsgType, PayloadT]: ''' @@ -274,10 +334,75 @@ class PldRx(Struct): pld: PayloadT = self.dec_msg( msg, ctx=ipc, + expect_msg=expect_msg, ) return msg, pld +# Always maintain a task-context-global `PldRx` +_def_pld_rx: PldRx = PldRx( + _pldec=_def_any_pldec, +) +_ctxvar_PldRx: ContextVar[PldRx] = ContextVar( + 'pld_rx', + default=_def_pld_rx, +) + + +def current_pldrx() -> PldRx: + ''' + Return the current `trio.Task.context`'s msg-payload + receiver, the post IPC but pre-app code `MsgType.pld` + filter. + + Modification of the current payload spec via `limit_plds()` + allows an application to contextually filter typed IPC msg + content delivered via wire transport. + + ''' + return _ctxvar_PldRx.get() + + +@cm +def limit_plds( + spec: Union[Type[Struct]], + **kwargs, + +) -> MsgDec: + ''' + Apply a `MsgCodec` that will natively decode the SC-msg set's + `Msg.pld: Union[Type[Struct]]` payload fields using + tagged-unions of `msgspec.Struct`s from the `payload_types` + for all IPC contexts in use by the current `trio.Task`. + + ''' + __tracebackhide__: bool = True + try: + # sanity on orig settings + orig_pldrx: PldRx = current_pldrx() + orig_pldec: MsgDec = orig_pldrx.pld_dec + + with orig_pldrx.limit_plds( + spec=spec, + **kwargs, + ) as pldec: + log.info( + 'Applying payload-decoder\n\n' + f'{pldec}\n' + ) + yield pldec + finally: + log.info( + 'Reverted to previous payload-decoder\n\n' + f'{orig_pldec}\n' + ) + assert ( + (pldrx := current_pldrx()) is orig_pldrx + and + pldrx.pld_dec is orig_pldec + ) + + async def drain_to_final_msg( ctx: Context, @@ -368,7 +493,10 @@ async def drain_to_final_msg( # pray to the `trio` gawds that we're corrent with this # msg: dict = await ctx._rx_chan.receive() - msg, pld = await ctx._pld_rx.recv_msg_w_pld(ipc=ctx) + msg, pld = await ctx._pld_rx.recv_msg_w_pld( + ipc=ctx, + expect_msg=Return, + ) # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -399,7 +527,7 @@ async def drain_to_final_msg( ctx._result: Any = pld log.runtime( 'Context delivered final draining msg:\n' - f'{pformat(msg)}' + f'{pretty_struct.pformat(msg)}' ) # XXX: only close the rx mem chan AFTER # a final result is retreived. @@ -435,7 +563,7 @@ async def drain_to_final_msg( f'=> {ctx._task}\n' f' |_{ctx._stream}\n\n' - f'{pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) return ( return_msg, @@ -452,7 +580,7 @@ async def drain_to_final_msg( f'=> {ctx._task}\n' f' |_{ctx._stream}\n\n' - f'{pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) continue @@ -467,7 +595,7 @@ async def drain_to_final_msg( pre_result_drained.append(msg) log.cancel( 'Remote stream terminated due to "stop" msg:\n\n' - f'{pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) continue -- 2.34.1 From 1f4c780b98dd1e52809b49674936a9ba4b98ccbe Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:15:46 -0400 Subject: [PATCH 081/305] Mk `process_messages()` return last msg; summary logging Not sure it's **that** useful (yet) but in theory would allow avoiding certain log level usage around transient RPC requests for discovery methods (like `.register_actor()` and friends); can't hurt to be able to introspect that last message for other future cases I'd imagine as well. Adjust the calling code in `._runtime` to match; other spots are using the `trio.Nursery.start()` schedule style and are fine as is. Improve a bunch more log messages throughout a few mods mostly by going to a "summary" single-emission style where possible/appropriate: - in `._runtime` more "single summary" status style log emissions: |_mk `Actor.load_modules()` render a single mod loaded summary. |_use a summary `con_status: str` for `Actor._stream_handler()` conn setup and an equiv (`con_teardown_status`) for connection teardowns. |_similar thing in `Actor.wait_for_actor()`. - generally more usage of `.msg.pretty_struct` apis throughout `._runtime`. --- tractor/_entry.py | 2 +- tractor/_portal.py | 19 +++-- tractor/_rpc.py | 51 +++++------ tractor/_runtime.py | 203 +++++++++++++++++++++++--------------------- 4 files changed, 147 insertions(+), 128 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index bf719abb..78f83283 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -146,7 +146,7 @@ def _trio_main( finally: log.info( - 'Actor terminated\n' + 'Subactor terminated\n' + actor_info ) diff --git a/tractor/_portal.py b/tractor/_portal.py index e25a6c70..806dcc7b 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -435,7 +435,6 @@ class Portal: yield stream finally: - # cancel the far end task on consumer close # NOTE: this is a special case since we assume that if using # this ``.open_fream_from()`` api, the stream is one a one @@ -496,7 +495,7 @@ class LocalPortal: async def open_portal( channel: Channel, - nursery: trio.Nursery|None = None, + tn: trio.Nursery|None = None, start_msg_loop: bool = True, shield: bool = False, @@ -504,15 +503,19 @@ async def open_portal( ''' Open a ``Portal`` through the provided ``channel``. - Spawns a background task to handle message processing (normally - done by the actor-runtime implicitly). + Spawns a background task to handle RPC processing, normally + done by the actor-runtime implicitly via a call to + `._rpc.process_messages()`. just after connection establishment. ''' actor = current_actor() assert actor was_connected: bool = False - async with maybe_open_nursery(nursery, shield=shield) as nursery: + async with maybe_open_nursery( + tn, + shield=shield, + ) as tn: if not channel.connected(): await channel.connect() @@ -524,7 +527,7 @@ async def open_portal( msg_loop_cs: trio.CancelScope|None = None if start_msg_loop: from ._runtime import process_messages - msg_loop_cs = await nursery.start( + msg_loop_cs = await tn.start( partial( process_messages, actor, @@ -544,7 +547,7 @@ async def open_portal( await channel.aclose() # cancel background msg loop task - if msg_loop_cs: + if msg_loop_cs is not None: msg_loop_cs.cancel() - nursery.cancel_scope.cancel() + tn.cancel_scope.cancel() diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 595d1352..56d91534 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -61,11 +61,13 @@ from .msg import ( current_codec, MsgCodec, NamespacePath, + pretty_struct, ) from tractor.msg.types import ( CancelAck, Error, Msg, + MsgType, Return, Start, StartAck, @@ -770,7 +772,10 @@ async def process_messages( shield: bool = False, task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED, -) -> bool: +) -> ( + bool, # chan diconnected + MsgType, # last msg +): ''' This is the low-level, per-IPC-channel, RPC task scheduler loop. @@ -812,11 +817,6 @@ async def process_messages( # |_ for ex, from `aioquic` which exposed "stream ids": # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L1175 # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L659 - log.runtime( - 'Entering RPC msg loop:\n' - f'peer: {chan.uid}\n' - f'|_{chan}\n' - ) nursery_cancelled_before_task: bool = False msg: Msg|None = None try: @@ -830,12 +830,15 @@ async def process_messages( async for msg in chan: log.transport( # type: ignore - f'<= IPC msg from peer: {chan.uid}\n\n' + f'IPC msg from peer\n' + f'<= {chan.uid}\n\n' # TODO: avoid fmting depending on loglevel for perf? - # -[ ] specifically `pformat()` sub-call..? + # -[ ] specifically `pretty_struct.pformat()` sub-call..? + # - how to only log-level-aware actually call this? # -[ ] use `.msg.pretty_struct` here now instead! - f'{pformat(msg)}\n' + # f'{pretty_struct.pformat(msg)}\n' + f'{msg}\n' ) match msg: @@ -949,10 +952,11 @@ async def process_messages( uid=actorid, ): log.runtime( - 'Handling RPC `Start` request from\n' - f'peer: {actorid}\n' - '\n' - f'=> {ns}.{funcname}({kwargs})\n' + 'Handling RPC `Start` request\n' + f'<= peer: {actorid}\n' + f' |_{ns}.{funcname}({kwargs})\n\n' + + f'{pretty_struct.pformat(msg)}\n' ) # runtime-internal endpoint: `Actor.` @@ -1093,25 +1097,24 @@ async def process_messages( parent_chan=chan, ) - except ( - TransportClosed, - ): + except TransportClosed: # channels "breaking" (for TCP streams by EOF or 104 # connection-reset) is ok since we don't have a teardown # handshake for them (yet) and instead we simply bail out of # the message loop and expect the teardown sequence to clean # up.. - # TODO: add a teardown handshake? and, + # + # TODO: maybe add a teardown handshake? and, # -[ ] don't show this msg if it's an ephemeral discovery ep call? # -[ ] figure out how this will break with other transports? log.runtime( - f'channel closed abruptly with\n' - f'peer: {chan.uid}\n' - f'|_{chan.raddr}\n' + f'IPC channel closed abruptly\n' + f'<=x peer: {chan.uid}\n' + f' |_{chan.raddr}\n' ) # transport **WAS** disconnected - return True + return (True, msg) except ( Exception, @@ -1151,9 +1154,9 @@ async def process_messages( log.runtime( 'Exiting IPC msg loop with final msg\n\n' f'<= peer: {chan.uid}\n' - f'|_{chan}\n\n' - f'{pformat(msg)}\n\n' + f' |_{chan}\n\n' + f'{pretty_struct.pformat(msg)}' ) # transport **WAS NOT** disconnected - return False + return (False, msg) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index c12365be..84940222 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -50,6 +50,7 @@ from pprint import pformat import signal import sys from typing import ( + Any, Callable, TYPE_CHECKING, ) @@ -68,7 +69,7 @@ from tractor.msg import ( pretty_struct, NamespacePath, types as msgtypes, - Msg, + MsgType, ) from ._ipc import Channel from ._context import ( @@ -92,19 +93,6 @@ from ._rpc import ( process_messages, try_ship_error_to_remote, ) -# from tractor.msg.types import ( -# Aid, -# SpawnSpec, -# Start, -# StartAck, -# Started, -# Yield, -# Stop, -# Return, -# Error, -# ) - - if TYPE_CHECKING: @@ -311,29 +299,32 @@ class Actor: self._reg_addrs = addrs async def wait_for_peer( - self, uid: tuple[str, str] + self, + uid: tuple[str, str], + ) -> tuple[trio.Event, Channel]: ''' - Wait for a connection back from a spawned actor with a `uid` - using a `trio.Event` for sync. + Wait for a connection back from a (spawned sub-)actor with + a `uid` using a `trio.Event` for sync. ''' - log.runtime(f"Waiting for peer {uid} to connect") + log.debug(f'Waiting for peer {uid!r} to connect') event = self._peer_connected.setdefault(uid, trio.Event()) await event.wait() - log.runtime(f"{uid} successfully connected back to us") + log.debug(f'{uid!r} successfully connected back to us') return event, self._peers[uid][-1] def load_modules( self, - debug_mode: bool = False, + # debug_mode: bool = False, ) -> None: ''' - Load enabled RPC py-modules locally (after process fork/spawn). + Load explicitly enabled python modules from local fs after + process spawn. Since this actor may be spawned on a different machine from the original nursery we need to try and load the local module - code (presuming it exists). + code manually (presuming it exists). ''' try: @@ -346,16 +337,21 @@ class Actor: _mp_fixup_main._fixup_main_from_path( parent_data['init_main_from_path']) + status: str = 'Attempting to import enabled modules:\n' for modpath, filepath in self.enable_modules.items(): # XXX append the allowed module to the python path which # should allow for relative (at least downward) imports. sys.path.append(os.path.dirname(filepath)) - log.runtime(f"Attempting to import {modpath}@{filepath}") - mod = importlib.import_module(modpath) + status += ( + f'|_{modpath!r} -> {filepath!r}\n' + ) + mod: ModuleType = importlib.import_module(modpath) self._mods[modpath] = mod if modpath == '__main__': self._mods['__mp_main__'] = mod + log.runtime(status) + except ModuleNotFoundError: # it is expected the corresponding `ModuleNotExposed` error # will be raised later @@ -415,21 +411,23 @@ class Actor: else: log.runtime(f'New connection to us @{chan.raddr}') - con_msg: str = '' + con_status: str = '' + + # TODO: remove this branch since can never happen? + # NOTE: `.uid` is only set after first contact if their_uid: - # NOTE: `.uid` is only set after first contact - con_msg = ( - 'IPC Re-connection from already known peer? ' + con_status = ( + 'IPC Re-connection from already known peer?\n' ) else: - con_msg = ( - 'New IPC connection to us ' + con_status = ( + 'New inbound IPC connection <=\n' ) - con_msg += ( - f'<= @{chan.raddr}\n' + con_status += ( f'|_{chan}\n' # f' |_@{chan.raddr}\n\n' + # ^-TODO-^ remove since alfready in chan.__repr__()? ) # send/receive initial handshake response try: @@ -449,13 +447,13 @@ class Actor: # a bound listener on the "arbiter" addr. the reset will be # because the handshake was never meant took place. log.warning( - con_msg + con_status + ' -> But failed to handshake? Ignoring..\n' ) return - con_msg += ( + con_status += ( f' -> Handshake with actor `{uid[0]}[{uid[1][-6:]}]` complete\n' ) # IPC connection tracking for both peers and new children: @@ -468,7 +466,7 @@ class Actor: None, ) if event: - con_msg += ( + con_status += ( ' -> Waking subactor spawn waiters: ' f'{event.statistics().tasks_waiting}\n' f' -> Registered IPC chan for child actor {uid}@{chan.raddr}\n' @@ -479,7 +477,7 @@ class Actor: event.set() else: - con_msg += ( + con_status += ( f' -> Registered IPC chan for peer actor {uid}@{chan.raddr}\n' ) # type: ignore @@ -493,13 +491,18 @@ class Actor: # TODO: can we just use list-ref directly? chans.append(chan) - log.runtime(con_msg) + con_status += ' -> Entering RPC msg loop..\n' + log.runtime(con_status) # Begin channel management - respond to remote requests and # process received reponses. disconnected: bool = False + last_msg: MsgType try: - disconnected: bool = await process_messages( + ( + disconnected, + last_msg, + ) = await process_messages( self, chan, ) @@ -600,16 +603,24 @@ class Actor: # that the IPC layer may have failed # unexpectedly since it may be the cause of # other downstream errors. - entry = local_nursery._children.get(uid) + entry: tuple|None = local_nursery._children.get(uid) if entry: proc: trio.Process _, proc, _ = entry if ( (poll := getattr(proc, 'poll', None)) - and poll() is None + and + poll() is None # proc still alive ): - log.cancel( + # TODO: change log level based on + # detecting whether chan was created for + # ephemeral `.register_actor()` request! + # -[ ] also, that should be avoidable by + # re-using any existing chan from the + # `._discovery.get_registry()` call as + # well.. + log.runtime( f'Peer IPC broke but subproc is alive?\n\n' f'<=x {chan.uid}@{chan.raddr}\n' @@ -618,17 +629,17 @@ class Actor: # ``Channel`` teardown and closure sequence # drop ref to channel so it can be gc-ed and disconnected - log.runtime( - f'Disconnected IPC channel:\n' - f'uid: {chan.uid}\n' - f'|_{pformat(chan)}\n' + con_teardown_status: str = ( + f'IPC channel disconnected:\n' + f'<=x uid: {chan.uid}\n' + f' |_{pformat(chan)}\n\n' ) chans.remove(chan) # TODO: do we need to be this pedantic? if not chans: - log.runtime( - f'No more channels with {chan.uid}' + con_teardown_status += ( + f'-> No more channels with {chan.uid}' ) self._peers.pop(uid, None) @@ -642,15 +653,16 @@ class Actor: f' |_[{i}] {pformat(chan)}\n' ) - log.runtime( - f'Remaining IPC {len(self._peers)} peers:\n' - + peers_str + con_teardown_status += ( + f'-> Remaining IPC {len(self._peers)} peers: {peers_str}\n' ) # No more channels to other actors (at all) registered # as connected. if not self._peers: - log.runtime("Signalling no more peer channel connections") + con_teardown_status += ( + 'Signalling no more peer channel connections' + ) self._no_more_peers.set() # NOTE: block this actor from acquiring the @@ -725,13 +737,16 @@ class Actor: # TODO: figure out why this breaks tests.. db_cs.cancel() + log.runtime(con_teardown_status) + # finally block closure + # TODO: rename to `._deliver_payload()` since this handles # more then just `result` msgs now obvi XD async def _deliver_ctx_payload( self, chan: Channel, cid: str, - msg: Msg|MsgTypeError, + msg: MsgType|MsgTypeError, ) -> None|bool: ''' @@ -756,7 +771,7 @@ class Actor: # XXX don't need right since it's always in msg? # f'=> cid: {cid}\n\n' - f'{pretty_struct.Struct.pformat(msg)}\n' + f'{pretty_struct.pformat(msg)}\n' ) return @@ -898,9 +913,11 @@ class Actor: cid=cid, ) log.runtime( - 'Sending RPC start msg\n\n' + 'Sending RPC `Start`\n\n' f'=> peer: {chan.uid}\n' - f' |_ {ns}.{func}({kwargs})\n' + f' |_ {ns}.{func}({kwargs})\n\n' + + f'{pretty_struct.pformat(msg)}' ) await chan.send(msg) @@ -957,31 +974,29 @@ class Actor: if self._spawn_method == "trio": - # Receive runtime state from our parent - # parent_data: dict[str, Any] - # parent_data = await chan.recv() - - # TODO: maybe we should just wrap this directly - # in a `Actor.spawn_info: SpawnInfo` struct? + # Receive post-spawn runtime state from our parent. spawnspec: msgtypes.SpawnSpec = await chan.recv() self._spawn_spec = spawnspec - # TODO: eventually all these msgs as - # `msgspec.Struct` with a special mode that - # pformats them in multi-line mode, BUT only - # if "trace"/"util" mode is enabled? log.runtime( 'Received runtime spec from parent:\n\n' - f'{pformat(spawnspec)}\n' + + # TODO: eventually all these msgs as + # `msgspec.Struct` with a special mode that + # pformats them in multi-line mode, BUT only + # if "trace"/"util" mode is enabled? + f'{pretty_struct.pformat(spawnspec)}\n' ) - # accept_addrs: list[tuple[str, int]] = parent_data.pop('bind_addrs') accept_addrs: list[tuple[str, int]] = spawnspec.bind_addrs - # rvs = parent_data.pop('_runtime_vars') - rvs = spawnspec._runtime_vars + # TODO: another `Struct` for rtvs.. + rvs: dict[str, Any] = spawnspec._runtime_vars if rvs['_debug_mode']: try: - log.info( + # TODO: maybe return some status msgs upward + # to that we can emit them in `con_status` + # instead? + log.devx( 'Enabling `stackscope` traces on SIGUSR1' ) from .devx import enable_stack_on_sig @@ -991,7 +1006,6 @@ class Actor: '`stackscope` not installed for use in debug mode!' ) - log.runtime(f'Runtime vars are: {rvs}') rvs['_is_root'] = False _state._runtime_vars.update(rvs) @@ -1008,18 +1022,12 @@ class Actor: for val in spawnspec.reg_addrs ] - # for attr, value in parent_data.items(): + # TODO: better then monkey patching.. + # -[ ] maybe read the actual f#$-in `._spawn_spec` XD for _, attr, value in pretty_struct.iter_fields( spawnspec, ): setattr(self, attr, value) - # if ( - # attr == 'reg_addrs' - # and value - # ): - # self.reg_addrs = [tuple(val) for val in value] - # else: - # setattr(self, attr, value) return ( chan, @@ -1028,12 +1036,11 @@ class Actor: except OSError: # failed to connect log.warning( - f'Failed to connect to parent!?\n\n' - 'Closing IPC [TCP] transport server to\n' - f'{parent_addr}\n' + f'Failed to connect to spawning parent actor!?\n' + f'x=> {parent_addr}\n' f'|_{self}\n\n' ) - await self.cancel(chan=None) # self cancel + await self.cancel(req_chan=None) # self cancel raise async def _serve_forever( @@ -1111,8 +1118,7 @@ class Actor: # chan whose lifetime limits the lifetime of its remotely # requested and locally spawned RPC tasks - similar to the # supervision semantics of a nursery wherein the actual - # implementation does start all such tasks in - # a sub-nursery. + # implementation does start all such tasks in a sub-nursery. req_chan: Channel|None, ) -> bool: @@ -1153,7 +1159,7 @@ class Actor: # other) repr fields instead of doing this all manual.. msg: str = ( f'Runtime cancel request from {requester_type}:\n\n' - f'<= .cancel(): {requesting_uid}\n' + f'<= .cancel(): {requesting_uid}\n\n' ) # TODO: what happens here when we self-cancel tho? @@ -1168,8 +1174,8 @@ class Actor: dbcs = _debug.DebugStatus.req_cs if dbcs is not None: msg += ( - '>> Cancelling active debugger request..\n' - f'|_{_debug.Lock}\n' + '-> Cancelling active debugger request..\n' + f'|_{_debug.Lock.pformat()}' ) dbcs.cancel() @@ -1420,7 +1426,12 @@ class Actor: ''' if self._server_n: - log.runtime("Shutting down channel server") + # TODO: obvi a different server type when we eventually + # support some others XD + server_prot: str = 'TCP' + log.runtime( + f'Cancelling {server_prot} server' + ) self._server_n.cancel_scope.cancel() return True @@ -1604,6 +1615,7 @@ async def async_main( assert accept_addrs try: + # TODO: why is this not with the root nursery? actor._server_n = await service_nursery.start( partial( actor._serve_forever, @@ -1888,13 +1900,13 @@ class Arbiter(Actor): sockaddrs: list[tuple[str, int]] = [] sockaddr: tuple[str, int] - for (aname, _), sockaddr in self._registry.items(): - log.runtime( - f'Actor mailbox info:\n' - f'aname: {aname}\n' - f'sockaddr: {sockaddr}\n' + mailbox_info: str = 'Actor registry contact infos:\n' + for uid, sockaddr in self._registry.items(): + mailbox_info += ( + f'|_uid: {uid}\n' + f'|_sockaddr: {sockaddr}\n\n' ) - if name == aname: + if name == uid[0]: sockaddrs.append(sockaddr) if not sockaddrs: @@ -1906,6 +1918,7 @@ class Arbiter(Actor): if not isinstance(uid, trio.Event): sockaddrs.append(self._registry[uid]) + log.runtime(mailbox_info) return sockaddrs async def register_actor( -- 2.34.1 From 2cdd5b5b8f94a1803fdd8a54386184eb048b8a3c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:37:17 -0400 Subject: [PATCH 082/305] Reorg frames pformatters, add `Context.repr_state` A better spot for the pretty-formatting of frame text (and thus tracebacks) is in the new `.devx._code` module: - move from `._exceptions` -> `.devx._code.pformat_boxed_tb()`. - add new `pformat_caller_frame()` factored out the use case in `._exceptions._mk_msg_type_err()` where we dump a stack trace for bad `.send()` side IPC msgs. Add some new pretty-format methods to `Context`: - explicitly implement `.pformat()` and allow an `extra_fields: dict` which can be used to inject additional fields (maybe eventually by default) such as is now used inside `._maybe_cancel_and_set_remote_error()` when reporting the internal `._scope` state in cancel logging. - add a new `.repr_state -> str` which provides a single string status depending on the internal state of the IPC ctx in terms of the shuttle protocol's "phase"; use it from `.pformat()` for the `|_state:`. - set `.started(complain_no_parity=False)` now since we presume decoding with `.pld: Raw` now with the new `PldRx` design. - use new `msgops.current_pldrx()` in `mk_context()`. --- tractor/_context.py | 163 +++++++++++++++++++++++++++++------------ tractor/_exceptions.py | 83 +++------------------ tractor/devx/_code.py | 102 ++++++++++++++++++++++++++ 3 files changed, 227 insertions(+), 121 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 762d8798..20584979 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -61,7 +61,6 @@ from ._exceptions import ( ) from .log import get_logger from .msg import ( - _codec, Error, MsgType, MsgCodec, @@ -103,7 +102,6 @@ class Unresolved: a final return value or raised error is resolved. ''' - ... # TODO: make this a .msg.types.Struct! @@ -116,19 +114,19 @@ class Context: NB: This class should **never be instatiated directly**, it is allocated by the runtime in 2 ways: - - by entering ``Portal.open_context()`` which is the primary - public API for any "caller" task or, + - by entering `Portal.open_context()` which is the primary + public API for any "parent" task or, - by the RPC machinery's `._rpc._invoke()` as a `ctx` arg - to a remotely scheduled "callee" function. + to a remotely scheduled "child" function. - AND is always constructed using the below ``mk_context()``. + AND is always constructed using the below `mk_context()`. Allows maintaining task or protocol specific state between 2 cancel-scope-linked, communicating and parallel executing `trio.Task`s. Contexts are allocated on each side of any task RPC-linked msg dialog, i.e. for every request to a remote actor from a `Portal`. On the "callee" side a context is - always allocated inside ``._rpc._invoke()``. + always allocated inside `._rpc._invoke()`. TODO: more detailed writeup on cancellation, error and streaming semantics.. @@ -262,7 +260,13 @@ class Context: _strict_started: bool = False _cancel_on_msgerr: bool = True - def __str__(self) -> str: + def pformat( + self, + extra_fields: dict[str, Any]|None = None, + # ^-TODO-^ some built-in extra state fields + # we'll want in some devx specific cases? + + ) -> str: ds: str = '=' # ds: str = ': ' @@ -279,11 +283,7 @@ class Context: outcome_str: str = self.repr_outcome( show_error_fields=True ) - outcome_typ_str: str = self.repr_outcome( - type_only=True - ) - - return ( + fmtstr: str = ( f'\n' ) # NOTE: making this return a value that can be passed to @@ -335,7 +345,8 @@ class Context: # logging perspective over `eval()`-ability since we do NOT # target serializing non-struct instances! # def __repr__(self) -> str: - __repr__ = __str__ + __str__ = pformat + __repr__ = pformat @property def cancel_called(self) -> bool: @@ -615,10 +626,10 @@ class Context: whom: str = ( 'us' if error.canceller == self._actor.uid - else 'peer' + else 'a remote peer (not us)' ) log.cancel( - f'IPC context cancelled by {whom}!\n\n' + f'IPC context was cancelled by {whom}!\n\n' f'{error}' ) @@ -626,7 +637,6 @@ class Context: msgerr = True log.error( f'IPC dialog error due to msg-type caused by {self.peer_side!r} side\n\n' - f'{error}\n' f'{pformat(self)}\n' ) @@ -696,24 +706,23 @@ class Context: else: message: str = 'NOT cancelling `Context._scope` !\n\n' - scope_info: str = 'No `self._scope: CancelScope` was set/used ?' + fmt_str: str = 'No `self._scope: CancelScope` was set/used ?' if cs: - scope_info: str = ( - f'self._scope: {cs}\n' - f'|_ .cancel_called: {cs.cancel_called}\n' - f'|_ .cancelled_caught: {cs.cancelled_caught}\n' - f'|_ ._cancel_status: {cs._cancel_status}\n\n' + fmt_str: str = self.pformat( + extra_fields={ + '._is_self_cancelled()': self._is_self_cancelled(), + '._cancel_on_msgerr': self._cancel_on_msgerr, - f'{self}\n' - f'|_ ._is_self_cancelled(): {self._is_self_cancelled()}\n' - f'|_ ._cancel_on_msgerr: {self._cancel_on_msgerr}\n\n' - - f'msgerr: {msgerr}\n' + '._scope': cs, + '._scope.cancel_called': cs.cancel_called, + '._scope.cancelled_caught': cs.cancelled_caught, + '._scope._cancel_status': cs._cancel_status, + } ) log.cancel( message + - f'{scope_info}' + fmt_str ) # TODO: maybe we should also call `._res_scope.cancel()` if it # exists to support cancelling any drain loop hangs? @@ -751,7 +760,7 @@ class Context: ) return ( # f'{self._nsf}() -{{{codec}}}-> {repr(self.outcome)}:' - f'{self._nsf}() -> {outcome_str}:' + f'{self._nsf}() -> {outcome_str}' ) @property @@ -839,7 +848,7 @@ class Context: if not self._portal: raise InternalError( 'No portal found!?\n' - 'Why is this supposed caller context missing it?' + 'Why is this supposed {self.side!r}-side ctx task missing it?!?' ) cid: str = self.cid @@ -1277,11 +1286,11 @@ class Context: ) log.cancel( - 'Ctx drained pre-result msgs:\n' - f'{pformat(drained_msgs)}\n\n' + 'Ctx drained to final result msgs\n' + f'{return_msg}\n\n' - f'Final return msg:\n' - f'{return_msg}\n' + f'pre-result drained msgs:\n' + f'{pformat(drained_msgs)}\n' ) self.maybe_raise( @@ -1446,6 +1455,65 @@ class Context: repr(self._result) ) + @property + def repr_state(self) -> str: + ''' + A `str`-status describing the current state of this + inter-actor IPC context in terms of the current "phase" state + of the SC shuttling dialog protocol. + + ''' + merr: Exception|None = self.maybe_error + outcome: Unresolved|Exception|Any = self.outcome + + match ( + outcome, + merr, + ): + case ( + Unresolved, + ContextCancelled(), + ) if self.cancel_acked: + status = 'self-cancelled' + + case ( + Unresolved, + ContextCancelled(), + ) if ( + self.canceller + and not self._cancel_called + ): + status = 'peer-cancelled' + + case ( + Unresolved, + BaseException(), + ) if self.canceller: + status = 'errored' + + case ( + _, # any non-unresolved value + None, + ) if self._final_result_is_set(): + status = 'returned' + + case ( + Unresolved, # noqa (weird.. ruff) + None, + ): + if stream := self._stream: + if stream.closed: + status = 'streaming-finished' + else: + status = 'streaming' + elif self._started_called: + status = 'started' + + case _: + status = 'unknown!?' + + return status + async def started( self, @@ -1454,7 +1522,11 @@ class Context: value: PayloadT|None = None, strict_parity: bool = False, - complain_no_parity: bool = True, + + # TODO: this will always emit now that we do `.pld: Raw` + # passthrough.. so maybe just only complain when above strict + # flag is set? + complain_no_parity: bool = False, ) -> None: ''' @@ -1514,18 +1586,19 @@ class Context: ) raise RuntimeError( 'Failed to roundtrip `Started` msg?\n' - f'{pformat(rt_started)}\n' + f'{pretty_struct.pformat(rt_started)}\n' ) if rt_started != started_msg: # TODO: break these methods out from the struct subtype? + # TODO: make that one a mod func too.. diff = pretty_struct.Struct.__sub__( rt_started, started_msg, ) complaint: str = ( - 'Started value does not match after codec rountrip?\n\n' + 'Started value does not match after roundtrip?\n\n' f'{diff}' ) @@ -1541,8 +1614,6 @@ class Context: else: log.warning(complaint) - # started_msg = rt_started - await self.chan.send(started_msg) # raise any msg type error NO MATTER WHAT! @@ -2357,7 +2428,7 @@ async def open_context_from_portal( # FINALLY, remove the context from runtime tracking and # exit! log.runtime( - 'De-allocating IPC ctx opened with {ctx.side!r} peer \n' + f'De-allocating IPC ctx opened with {ctx.side!r} peer \n' f'uid: {uid}\n' f'cid: {ctx.cid}\n' ) @@ -2393,10 +2464,8 @@ def mk_context( from .devx._code import find_caller_info caller_info: CallerInfo|None = find_caller_info() - pld_rx = msgops.PldRx( - # _rx_mc=recv_chan, - _msgdec=_codec.mk_dec(spec=pld_spec) - ) + # TODO: when/how do we apply `.limit_plds()` from here? + pld_rx: msgops.PldRx = msgops.current_pldrx() ctx = Context( chan=chan, diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index f2ff8c21..af653f92 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -46,7 +46,7 @@ from tractor.msg import ( Error, MsgType, Stop, - Yield, + # Yield, types as msgtypes, MsgCodec, MsgDec, @@ -140,71 +140,6 @@ def get_err_type(type_name: str) -> BaseException|None: return type_ref -def pformat_boxed_tb( - tb_str: str, - fields_str: str|None = None, - field_prefix: str = ' |_', - - tb_box_indent: int|None = None, - tb_body_indent: int = 1, - -) -> str: - if ( - fields_str - and - field_prefix - ): - fields: str = textwrap.indent( - fields_str, - prefix=field_prefix, - ) - else: - fields = fields_str or '' - - tb_body = tb_str - if tb_body_indent: - tb_body: str = textwrap.indent( - tb_str, - prefix=tb_body_indent * ' ', - ) - - tb_box: str = ( - - # orig - # f' |\n' - # f' ------ - ------\n\n' - # f'{tb_str}\n' - # f' ------ - ------\n' - # f' _|\n' - - f'|\n' - f' ------ - ------\n\n' - # f'{tb_str}\n' - f'{tb_body}' - f' ------ - ------\n' - f'_|\n' - ) - tb_box_indent: str = ( - tb_box_indent - or - 1 - - # (len(field_prefix)) - # ? ^-TODO-^ ? if you wanted another indent level - ) - if tb_box_indent > 0: - tb_box: str = textwrap.indent( - tb_box, - prefix=tb_box_indent * ' ', - ) - - return ( - fields - + - tb_box - ) - - def pack_from_raise( local_err: ( ContextCancelled @@ -504,12 +439,15 @@ class RemoteActorError(Exception): reprol_str: str = ( f'{type(self).__name__}' # type name f'[{self.boxed_type_str}]' # parameterized by boxed type - '(' # init-style look ) + _repr: str = self._mk_fields_str( self.reprol_fields, end_char=' ', ) + if _repr: + reprol_str += '(' # init-style call + return ( reprol_str + @@ -521,6 +459,7 @@ class RemoteActorError(Exception): Nicely formatted boxed error meta data + traceback. ''' + from tractor.devx._code import pformat_boxed_tb fields: str = self._mk_fields_str( _body_fields + @@ -1092,14 +1031,10 @@ def _mk_msg_type_err( # no src error from `msgspec.msgpack.Decoder.decode()` so # prolly a manual type-check on our part. if message is None: - fmt_stack: str = ( - '\n'.join(traceback.format_stack(limit=3)) - ) - tb_fmt: str = pformat_boxed_tb( - tb_str=fmt_stack, - field_prefix=' ', - indent='', + from tractor.devx._code import ( + pformat_caller_frame, ) + tb_fmt: str = pformat_caller_frame(stack_limit=3) message: str = ( f'invalid msg -> {msg}: {type(msg)}\n\n' f'{tb_fmt}\n' diff --git a/tractor/devx/_code.py b/tractor/devx/_code.py index 01d64cd1..8d55212b 100644 --- a/tractor/devx/_code.py +++ b/tractor/devx/_code.py @@ -23,6 +23,8 @@ from __future__ import annotations import inspect # import msgspec # from pprint import pformat +import textwrap +import traceback from types import ( FrameType, FunctionType, @@ -175,3 +177,103 @@ def find_caller_info( ) return None + + +def pformat_boxed_tb( + tb_str: str, + fields_str: str|None = None, + field_prefix: str = ' |_', + + tb_box_indent: int|None = None, + tb_body_indent: int = 1, + +) -> str: + ''' + Create a "boxed" looking traceback string. + + Useful for emphasizing traceback text content as being an + embedded attribute of some other object (like + a `RemoteActorError` or other boxing remote error shuttle + container). + + Any other parent/container "fields" can be passed in the + `fields_str` input along with other prefix/indent settings. + + ''' + if ( + fields_str + and + field_prefix + ): + fields: str = textwrap.indent( + fields_str, + prefix=field_prefix, + ) + else: + fields = fields_str or '' + + tb_body = tb_str + if tb_body_indent: + tb_body: str = textwrap.indent( + tb_str, + prefix=tb_body_indent * ' ', + ) + + tb_box: str = ( + + # orig + # f' |\n' + # f' ------ - ------\n\n' + # f'{tb_str}\n' + # f' ------ - ------\n' + # f' _|\n' + + f'|\n' + f' ------ - ------\n\n' + # f'{tb_str}\n' + f'{tb_body}' + f' ------ - ------\n' + f'_|\n' + ) + tb_box_indent: str = ( + tb_box_indent + or + 1 + + # (len(field_prefix)) + # ? ^-TODO-^ ? if you wanted another indent level + ) + if tb_box_indent > 0: + tb_box: str = textwrap.indent( + tb_box, + prefix=tb_box_indent * ' ', + ) + + return ( + fields + + + tb_box + ) + + +def pformat_caller_frame( + stack_limit: int = 1, + box_tb: bool = True, +) -> str: + ''' + Capture and return the traceback text content from + `stack_limit` call frames up. + + ''' + tb_str: str = ( + '\n'.join( + traceback.format_stack(limit=stack_limit) + ) + ) + if box_tb: + tb_str: str = pformat_boxed_tb( + tb_str=tb_str, + field_prefix=' ', + indent='', + ) + return tb_str -- 2.34.1 From 7b6881cf0a5188f64904bb4a4268928d224e64a8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:55:46 -0400 Subject: [PATCH 083/305] Fix attr name error, use public `MsgDec.dec` --- tractor/msg/_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 5a9ab46a..4cf20496 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -229,7 +229,7 @@ class PldRx(Struct): log.runtime( 'Decoded msg payload\n\n' f'{msg}\n' - f'|_pld={pld!r}' + f'|_pld={pld!r}\n' ) return pld @@ -237,7 +237,7 @@ class PldRx(Struct): except ValidationError as src_err: msgterr: MsgTypeError = _mk_msg_type_err( msg=msg, - codec=self._dec, + codec=self.dec, src_validation_error=src_err, ) msg: Error = pack_from_raise( -- 2.34.1 From ceaafc064ee30f3220d2749517fd0871c6acdb13 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 12:59:38 -0400 Subject: [PATCH 084/305] Type annot the proc from `trio.lowlevel.open_process()` --- tractor/_spawn.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 824f41f3..3f886c01 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -455,10 +455,9 @@ async def trio_proc( proc: trio.Process|None = None try: try: - # TODO: needs ``trio_typing`` patch? - proc = await trio.lowlevel.open_process(spawn_cmd) + proc: trio.Process = await trio.lowlevel.open_process(spawn_cmd) log.runtime( - 'Started new sub-proc\n' + 'Started new child\n' f'|_{proc}\n' ) -- 2.34.1 From 998c0f0bd5f73ebaf1dafa34a30b746400405511 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 30 Apr 2024 13:01:07 -0400 Subject: [PATCH 085/305] Add todo for rigorous struct-type spec of `SpawnSpec` fields --- tractor/msg/types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 63c0a467..7e10dab0 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -144,6 +144,8 @@ class SpawnSpec( `Aid` msg. ''' + # TODO: similar to the `Start` kwargs spec needed below, we need + # a hard `Struct` def for all of these fields! _parent_main_data: dict _runtime_vars: dict[str, Any] -- 2.34.1 From 467764d45e6a9b94b315345e91d3d36f493da2da Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 12:55:16 -0400 Subject: [PATCH 086/305] Change to `RemoteActorError.pformat()` For more sane manual calls as needed in logging purposes. Obvi remap the dunder methods to it. Other: - drop `hide_tb: bool` from `unpack_error()`, shouldn't need it since frame won't ever be part of any tb raised from returned error. - add a `is_invalid_payload: bool` to `_raise_from_unexpected_msg()` to be used from `PldRx` where we don't need to decode the IPC msg, just the payload; make the error message reflect this case. - drop commented `._portal._unwrap_msg()` since we've replaced it with `PldRx`'s delegation to newer `._raise_from_unexpected_msg()`. - hide the `Portal.result()` frame by default, again. --- tractor/_exceptions.py | 156 +++++++++++++++++++++++------------------ tractor/_portal.py | 37 +--------- 2 files changed, 89 insertions(+), 104 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index af653f92..83675069 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -46,7 +46,6 @@ from tractor.msg import ( Error, MsgType, Stop, - # Yield, types as msgtypes, MsgCodec, MsgDec, @@ -212,6 +211,8 @@ class RemoteActorError(Exception): ) -> None: super().__init__(message) + # for manual display without having to muck with `Exception.args` + self._message: str = message # TODO: maybe a better name? # - .errtype # - .retype @@ -454,32 +455,46 @@ class RemoteActorError(Exception): _repr ) - def __repr__(self) -> str: + def pformat(self) -> str: ''' - Nicely formatted boxed error meta data + traceback. + Nicely formatted boxed error meta data + traceback, OR just + the normal message from `.args` (for eg. as you'd want shown + by a locally raised `ContextCancelled`). ''' - from tractor.devx._code import pformat_boxed_tb - fields: str = self._mk_fields_str( - _body_fields - + - self.extra_body_fields, - ) - body: str = pformat_boxed_tb( - tb_str=self.tb_str, - fields_str=fields, - field_prefix=' |_', - # ^- is so that it's placed like so, - # just after ' ) + __repr__ = pformat + __str__ = pformat + def unwrap( self, ) -> BaseException: @@ -809,12 +824,9 @@ def pack_error( def unpack_error( msg: Error, - - chan: Channel|None = None, + chan: Channel, box_type: RemoteActorError = RemoteActorError, - hide_tb: bool = True, - ) -> None|Exception: ''' Unpack an 'error' message from the wire @@ -824,12 +836,10 @@ def unpack_error( which is the responsibilitiy of the caller. ''' - __tracebackhide__: bool = hide_tb - if not isinstance(msg, Error): return None - # retrieve the remote error's encoded details from fields + # retrieve the remote error's msg-encoded details tb_str: str = msg.tb_str message: str = ( f'{chan.uid}\n' @@ -858,7 +868,6 @@ def unpack_error( # original source error. elif boxed_type_str == 'RemoteActorError': assert boxed_type is RemoteActorError - # assert len(error_dict['relay_path']) >= 1 assert len(msg.relay_path) >= 1 exc = box_type( @@ -943,8 +952,6 @@ def _raise_from_unexpected_msg( raise unpack_error( msg, ctx.chan, - hide_tb=hide_tb, - ) from src_err # `MsgStream` termination msg. @@ -1014,6 +1021,7 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, + is_invalid_payload: bool = False, ) -> MsgTypeError: ''' @@ -1028,12 +1036,12 @@ def _mk_msg_type_err( '`codec` must be a `MsgCodec` for send-side errors?' ) + from tractor.devx import ( + pformat_caller_frame, + ) # no src error from `msgspec.msgpack.Decoder.decode()` so # prolly a manual type-check on our part. if message is None: - from tractor.devx._code import ( - pformat_caller_frame, - ) tb_fmt: str = pformat_caller_frame(stack_limit=3) message: str = ( f'invalid msg -> {msg}: {type(msg)}\n\n' @@ -1071,47 +1079,57 @@ def _mk_msg_type_err( # `Channel.recv()` case else: - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. - msg_dict: dict = msgpack.decode(msg) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(msgtypes, msg_type_name) - message: str = ( - f'invalid `{msg_type_name}` IPC msg\n\n' - ) + if is_invalid_payload: + msg_type: str = type(msg) + message: str = ( + f'invalid `{msg_type.__qualname__}` payload\n\n' + f'<{type(msg).__qualname__}(\n' + f' |_pld: {codec.pld_spec_str} = {msg.pld!r}' + f')>\n' + ) + + else: + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any + # `msgspec.Struct` handling) so that we can + # determine what `.msg.types.Msg` is the culprit + # by reporting the received value. + msg_dict: dict = msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + message: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' + ) + # XXX be "fancy" and see if we can determine the exact + # invalid field such that we can comprehensively report + # the specific field's type problem. + msgspec_msg: str = src_validation_error.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) + message += ( + f'{msg.rstrip("`")}\n\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' + ) + if verb_header: message = f'{verb_header} ' + message - # XXX see if we can determine the exact invalid field - # such that we can comprehensively report the - # specific field's type problem - msgspec_msg: str = src_validation_error.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - obj = object() - if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_name_expr: str = ( - f' |_{maybe_field}: {codec.pld_spec_str} = ' - ) - fmt_val_lines: list[str] = pformat(field_val).splitlines() - fmt_val: str = ( - f'{fmt_val_lines[0]}\n' - + - textwrap.indent( - '\n'.join(fmt_val_lines[1:]), - prefix=' '*len(field_name_expr), - ) - ) - message += ( - f'{msg.rstrip("`")}\n\n' - f'<{msg_type.__qualname__}(\n' - # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' - f'{field_name_expr}{fmt_val}\n' - f')>' - ) - msgtyperr = MsgTypeError.from_decode( message=message, msgdict=msg_dict, diff --git a/tractor/_portal.py b/tractor/_portal.py index 806dcc7b..79a9dc5d 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -68,40 +68,6 @@ if TYPE_CHECKING: log = get_logger(__name__) -# TODO: remove and/or rework? -# -[ ] rename to `unwrap_result()` and use -# `._raise_from_unexpected_msg()` (after tweak to accept a `chan: -# Channel` arg) in key block?? -# -[ ] pretty sure this is entirely covered by -# `_exceptions._raise_from_unexpected_msg()` so REMOVE! -# def _unwrap_msg( -# msg: Return|Error, -# ctx: Context, - -# hide_tb: bool = True, - -# ) -> Any: -# ''' -# Unwrap a final result from a `{return: }` IPC msg. - -# ''' -# __tracebackhide__: bool = hide_tb -# try: -# return msg.pld -# except AttributeError as err: - -# # internal error should never get here -# # assert msg.get('cid'), ( -# assert msg.cid, ( -# "Received internal error at portal?" -# ) - -# raise unpack_error( -# msg, -# ctx.chan, -# ) from err - - class Portal: ''' A 'portal' to a memory-domain-separated `Actor`. @@ -173,12 +139,13 @@ class Portal: portal=self, ) + # @api_frame async def result(self) -> Any: ''' Return the result(s) from the remote actor's "main" task. ''' - # __tracebackhide__ = True + __tracebackhide__ = True # Check for non-rpc errors slapped on the # channel for which we always raise exc = self.channel._exc -- 2.34.1 From 219d5c17456adda9381cd8fd68433c4cbdc016a4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 13:04:58 -0400 Subject: [PATCH 087/305] Move pformatters into new `.devx.pformat` Since `._code` is prolly gonna get renamed (to something "frame & stack tools" related) and to give a bit better organization. Also adds a new `add_div()` helper, factored out of ctxc message creation in `._rpc._invoke()`, for adding a little "header line" divider under a given `message: str` with a little math to center it. --- tractor/devx/pformat.py | 135 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 tractor/devx/pformat.py diff --git a/tractor/devx/pformat.py b/tractor/devx/pformat.py new file mode 100644 index 00000000..0b35feee --- /dev/null +++ b/tractor/devx/pformat.py @@ -0,0 +1,135 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Pretty formatters for use throughout the code base. +Mostly handy for logging and exception message content. + +''' +import textwrap +import traceback + + +def add_div( + message: str, + div_str: str = '------ - ------', + +) -> str: + ''' + Add a "divider string" to the input `message` with + a little math to center it underneath. + + ''' + div_offset: int = ( + round(len(message)/2)+1 + - + round(len(div_str)/2)+1 + ) + div_str: str = ( + '\n' + ' '*div_offset + f'{div_str}\n' + ) + return div_str + + +def pformat_boxed_tb( + tb_str: str, + fields_str: str|None = None, + field_prefix: str = ' |_', + + tb_box_indent: int|None = None, + tb_body_indent: int = 1, + +) -> str: + ''' + Create a "boxed" looking traceback string. + + Useful for emphasizing traceback text content as being an + embedded attribute of some other object (like + a `RemoteActorError` or other boxing remote error shuttle + container). + + Any other parent/container "fields" can be passed in the + `fields_str` input along with other prefix/indent settings. + + ''' + if ( + fields_str + and + field_prefix + ): + fields: str = textwrap.indent( + fields_str, + prefix=field_prefix, + ) + else: + fields = fields_str or '' + + tb_body = tb_str + if tb_body_indent: + tb_body: str = textwrap.indent( + tb_str, + prefix=tb_body_indent * ' ', + ) + + tb_box: str = ( + f'|\n' + f' ------ - ------\n' + f'{tb_body}' + f' ------ - ------\n' + f'_|\n' + ) + tb_box_indent: str = ( + tb_box_indent + or + 1 + + # (len(field_prefix)) + # ? ^-TODO-^ ? if you wanted another indent level + ) + if tb_box_indent > 0: + tb_box: str = textwrap.indent( + tb_box, + prefix=tb_box_indent * ' ', + ) + + return ( + fields + + + tb_box + ) + + +def pformat_caller_frame( + stack_limit: int = 1, + box_tb: bool = True, +) -> str: + ''' + Capture and return the traceback text content from + `stack_limit` call frames up. + + ''' + tb_str: str = ( + '\n'.join( + traceback.format_stack(limit=stack_limit) + ) + ) + if box_tb: + tb_str: str = pformat_boxed_tb( + tb_str=tb_str, + field_prefix=' ', + indent='', + ) + return tb_str -- 2.34.1 From 048c60f1128f832822cc05f5c4cccb72c6662589 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 13:12:44 -0400 Subject: [PATCH 088/305] "Icons" in `._entry`'s subactor `.info()` messages Add a little `>` or `X` supervision icon indicating the spawning or termination of each sub-actor respectively. --- tractor/_entry.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index 78f83283..750dc59f 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -20,6 +20,7 @@ Sub-process entry points. """ from __future__ import annotations from functools import partial +# import textwrap from typing import ( Any, TYPE_CHECKING, @@ -91,7 +92,7 @@ def _mp_main( pass # handle it the same way trio does? finally: - log.info(f"Actor {actor.uid} terminated") + log.info(f"Subactor {actor.uid} terminated") def _trio_main( @@ -125,9 +126,11 @@ def _trio_main( f' loglevel: {actor.loglevel}\n' ) log.info( - 'Started new trio process:\n' + 'Started new trio subactor:\n' + - actor_info + '>\n' # like a "started/play"-icon from super perspective + + + actor_info, ) try: @@ -148,5 +151,7 @@ def _trio_main( log.info( 'Subactor terminated\n' + + 'x\n' # like a "crossed-out/killed" from super perspective + + actor_info ) -- 2.34.1 From d680e31e4fee43855ecdd424be38695c18c68e1c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 6 May 2024 13:27:00 -0400 Subject: [PATCH 089/305] Mk `drain_to_final_msg()` never raise from `Error` Since we usually want them raised from some (internal) call to `Context.maybe_raise()` and NOT directly from the drainage call, make it possible via a new `raise_error: bool` to both `PldRx.recv_msg_w_pld()` and `.dec_msg()`. In support, - rename `return_msg` -> `result_msg` since we expect to return `Error`s. - do a `result_msg` assign and `break` in the `case Error()`. - add `**dec_msg_kwargs` passthrough for other `.dec_msg()` calling methods. Other, - drop/aggregate todo-notes around the main loop's `ctx._pld_rx.recv_msg_w_pld()` call. - add (configurable) frame hiding to most payload receive meths. --- tractor/msg/_ops.py | 146 ++++++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 74 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 4cf20496..1ba623db 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -161,9 +161,10 @@ class PldRx(Struct): ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, - **kwargs, + **dec_msg_kwargs, ) -> Any|Raw: + __tracebackhide__: bool = True msg: MsgType = ( ipc_msg @@ -176,6 +177,7 @@ class PldRx(Struct): msg, ctx=ctx, expect_msg=expect_msg, + **dec_msg_kwargs, ) async def recv_pld( @@ -183,14 +185,16 @@ class PldRx(Struct): ctx: Context, ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, + hide_tb: bool = True, - **kwargs + **dec_msg_kwargs, ) -> Any|Raw: ''' Receive a `MsgType`, then decode and return its `.pld` field. ''' + __tracebackhide__: bool = hide_tb msg: MsgType = ( ipc_msg or @@ -199,9 +203,10 @@ class PldRx(Struct): await ctx._rx_chan.receive() ) return self.dec_msg( - msg, + msg=msg, ctx=ctx, expect_msg=expect_msg, + **dec_msg_kwargs, ) def dec_msg( @@ -210,12 +215,16 @@ class PldRx(Struct): ctx: Context, expect_msg: Type[MsgType]|None, + raise_error: bool = True, + hide_tb: bool = True, + ) -> PayloadT|Raw: ''' Decode a msg's payload field: `MsgType.pld: PayloadT|Raw` and return the value or raise an appropriate error. ''' + __tracebackhide__: bool = hide_tb match msg: # payload-data shuttle msg; deliver the `.pld` value # directly to IPC (primitive) client-consumer code. @@ -228,7 +237,8 @@ class PldRx(Struct): pld: PayloadT = self._pldec.decode(pld) log.runtime( 'Decoded msg payload\n\n' - f'{msg}\n' + f'{msg}\n\n' + f'where payload is\n' f'|_pld={pld!r}\n' ) return pld @@ -237,8 +247,9 @@ class PldRx(Struct): except ValidationError as src_err: msgterr: MsgTypeError = _mk_msg_type_err( msg=msg, - codec=self.dec, + codec=self.pld_dec, src_validation_error=src_err, + is_invalid_payload=True, ) msg: Error = pack_from_raise( local_err=msgterr, @@ -263,8 +274,29 @@ class PldRx(Struct): case Error(): src_err = MessagingError( - 'IPC ctx dialog terminated without `Return`-ing a result' + 'IPC ctx dialog terminated without `Return`-ing a result\n' + f'Instead it raised {msg.boxed_type_str!r}!' ) + # XXX NOTE XXX another super subtle runtime-y thing.. + # + # - when user code (transitively) calls into this + # func (usually via a `Context/MsgStream` API) we + # generally want errors to propagate immediately + # and directly so that the user can define how it + # wants to handle them. + # + # HOWEVER, + # + # - for certain runtime calling cases, we don't want to + # directly raise since the calling code might have + # special logic around whether to raise the error + # or supress it silently (eg. a `ContextCancelled` + # received from the far end which was requested by + # this side, aka a self-cancel). + # + # SO, we offer a flag to control this. + if not raise_error: + return src_err case Stop(cid=cid): message: str = ( @@ -305,6 +337,9 @@ class PldRx(Struct): f'{msg}\n' ) + # TODO: maybe use the new `.add_note()` from 3.11? + # |_https://docs.python.org/3.11/library/exceptions.html#BaseException.add_note + # # fallthrough and raise from `src_err` _raise_from_unexpected_msg( ctx=ctx, @@ -312,7 +347,7 @@ class PldRx(Struct): src_err=src_err, log=log, expect_msg=expect_msg, - hide_tb=False, + hide_tb=hide_tb, ) async def recv_msg_w_pld( @@ -320,6 +355,8 @@ class PldRx(Struct): ipc: Context|MsgStream, expect_msg: MsgType, + **kwargs, + ) -> tuple[MsgType, PayloadT]: ''' Retrieve the next avail IPC msg, decode it's payload, and return @@ -335,6 +372,7 @@ class PldRx(Struct): msg, ctx=ipc, expect_msg=expect_msg, + **kwargs, ) return msg, pld @@ -433,70 +471,33 @@ async def drain_to_final_msg( # basically ignoring) any bi-dir-stream msgs still in transit # from the far end. pre_result_drained: list[MsgType] = [] - return_msg: Return|None = None + result_msg: Return|Error|None = None while not ( ctx.maybe_error and not ctx._final_result_is_set() ): try: - # TODO: can remove? - # await trio.lowlevel.checkpoint() - - # NOTE: this REPL usage actually works here dawg! Bo - # from .devx._debug import pause - # await pause() - - # TODO: bad idea? - # -[ ] wrap final outcome channel wait in a scope so - # it can be cancelled out of band if needed? - # - # with trio.CancelScope() as res_cs: - # ctx._res_scope = res_cs - # msg: dict = await ctx._rx_chan.receive() - # if res_cs.cancelled_caught: - - # TODO: ensure there's no more hangs, debugging the - # runtime pretty preaase! - # from .devx._debug import pause - # await pause() - - # TODO: can remove this finally? - # we have no more need for the sync draining right - # since we're can kinda guarantee the async - # `.receive()` below will never block yah? - # - # if ( - # ctx._cancel_called and ( - # ctx.cancel_acked - # # or ctx.chan._cancel_called - # ) - # # or not ctx._final_result_is_set() - # # ctx.outcome is not - # # or ctx.chan._closed - # ): - # try: - # msg: dict = await ctx._rx_chan.receive_nowait()() - # except trio.WouldBlock: - # log.warning( - # 'When draining already `.cancel_called` ctx!\n' - # 'No final msg arrived..\n' - # ) - # break - # else: - # msg: dict = await ctx._rx_chan.receive() - - # TODO: don't need it right jefe? - # with trio.move_on_after(1) as cs: - # if cs.cancelled_caught: - # from .devx._debug import pause - # await pause() - - # pray to the `trio` gawds that we're corrent with this - # msg: dict = await ctx._rx_chan.receive() + # receive all msgs, scanning for either a final result + # or error; the underlying call should never raise any + # remote error directly! msg, pld = await ctx._pld_rx.recv_msg_w_pld( ipc=ctx, expect_msg=Return, + raise_error=False, ) + # ^-TODO-^ some bad ideas? + # -[ ] wrap final outcome .receive() in a scope so + # it can be cancelled out of band if needed? + # |_with trio.CancelScope() as res_cs: + # ctx._res_scope = res_cs + # msg: dict = await ctx._rx_chan.receive() + # if res_cs.cancelled_caught: + # + # -[ ] make sure pause points work here for REPLing + # the runtime itself; i.e. ensure there's no hangs! + # |_from tractor.devx._debug import pause + # await pause() + # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: @@ -504,7 +505,7 @@ async def drain_to_final_msg( # SHOULD NOT raise that far end error, # 2. WE DID NOT REQUEST that cancel and thus # SHOULD RAISE HERE! - except trio.Cancelled: + except trio.Cancelled as taskc: # CASE 2: mask the local cancelled-error(s) # only when we are sure the remote error is @@ -514,7 +515,7 @@ async def drain_to_final_msg( # CASE 1: we DID request the cancel we simply # continue to bubble up as normal. - raise + raise taskc match msg: @@ -534,7 +535,7 @@ async def drain_to_final_msg( # if ctx._rx_chan: # await ctx._rx_chan.aclose() # TODO: ^ we don't need it right? - return_msg = msg + result_msg = msg break # far end task is still streaming to us so discard @@ -565,10 +566,7 @@ async def drain_to_final_msg( f'{pretty_struct.pformat(msg)}\n' ) - return ( - return_msg, - pre_result_drained, - ) + break # drain up to the `msg_limit` hoping to get # a final result or error/ctxc. @@ -604,9 +602,9 @@ async def drain_to_final_msg( case Error(): # TODO: can we replace this with `ctx.maybe_raise()`? # -[ ] would this be handier for this case maybe? - # async with maybe_raise_on_exit() as raises: - # if raises: - # log.error('some msg about raising..') + # |_async with maybe_raise_on_exit() as raises: + # if raises: + # log.error('some msg about raising..') # re: Exception|None = ctx._remote_error if re: @@ -640,7 +638,7 @@ async def drain_to_final_msg( # raise_overrun_from_self=False, raise_overrun_from_self=raise_overrun, ) - + result_msg = msg break # OOOOOF, yeah obvi we need this.. # XXX we should never really get here @@ -686,6 +684,6 @@ async def drain_to_final_msg( ) return ( - return_msg, + result_msg, pre_result_drained, ) -- 2.34.1 From d3e13658ab55e41c743742a7ee61c9d2ef9f34f1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 7 May 2024 09:20:43 -0400 Subject: [PATCH 090/305] Add a "current IPC `Context`" `ContextVar` Expose it from `._state.current_ipc_ctx()` and set it inside `._rpc._invoke()` for child and inside `Portal.open_context()` for parent. Still need to write a few more tests (particularly demonstrating usage throughout multiple nested nurseries on each side) but this suffices as a proto for testing with some debugger request-from-subactor stuff. Other, - use new `.devx.pformat.add_div()` for ctxc messages. - add a block to always traceback dump on corrupted cs stacks. - better handle non-RAEs exception output-formatting in context termination summary log message. - use a summary for `start_status` for msg logging in RPC loop. --- tests/test_context_stream_semantics.py | 4 + tractor/_rpc.py | 133 +++++++++++++++---------- tractor/_state.py | 23 +++++ 3 files changed, 109 insertions(+), 51 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index cedddf73..8edea510 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -25,6 +25,7 @@ from tractor._exceptions import ( StreamOverrun, ContextCancelled, ) +from tractor._state import current_ipc_ctx from tractor._testing import ( tractor_test, @@ -144,6 +145,8 @@ async def simple_setup_teardown( global _state _state = True + assert current_ipc_ctx() is ctx + # signal to parent that we're up await ctx.started(data + 1) @@ -204,6 +207,7 @@ def test_simple_context( block_forever=callee_blocks_forever, ) as (ctx, sent), ): + assert current_ipc_ctx() is ctx assert sent == 11 if callee_blocks_forever: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 56d91534..b8dc42b6 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -54,7 +54,10 @@ from ._exceptions import ( pack_error, unpack_error, ) -from .devx import _debug +from .devx import ( + _debug, + add_div, +) from . import _state from .log import get_logger from .msg import ( @@ -246,6 +249,9 @@ async def _errors_relayed_via_ipc( ) -> None: __tracebackhide__: bool = hide_tb + # TODO: a debug nursery when in debug mode! + # async with maybe_open_debugger_nursery() as debug_tn: + # => see matching comment in side `._debug._pause()` try: yield # run RPC invoke body @@ -269,6 +275,8 @@ async def _errors_relayed_via_ipc( # TODO: maybe we'll want different "levels" of debugging # eventualy such as ('app', 'supervisory', 'runtime') ? + # + # -[ ] this if check is duplicate with `._maybe_enter_pm()`.. if not is_multi_cancelled(err): entered_debug: bool = False if ( @@ -292,7 +300,6 @@ async def _errors_relayed_via_ipc( ) ) ): - # await _debug.pause() # XXX QUESTION XXX: is there any case where we'll # want to debug IPC disconnects as a default? # => I can't think of a reason that inspecting this @@ -300,7 +307,14 @@ async def _errors_relayed_via_ipc( # recovery logic - the only case is some kind of # strange bug in our transport layer itself? Going # to keep this open ended for now. - entered_debug = await _debug._maybe_enter_pm(err) + log.debug( + 'RPC task crashed, attempting to enter debugger\n' + f'|_{ctx}' + ) + entered_debug = await _debug._maybe_enter_pm( + err, + api_frame=inspect.currentframe(), + ) if not entered_debug: log.exception( 'RPC task crashed\n' @@ -430,6 +444,8 @@ async def _invoke( ) context: bool = False + assert not _state._ctxvar_Context.get() + # TODO: deprecate this style.. if getattr(func, '_tractor_stream_function', False): # handle decorated ``@tractor.stream`` async functions @@ -553,6 +569,7 @@ async def _invoke( async with trio.open_nursery() as tn: ctx._scope_nursery = tn ctx._scope = tn.cancel_scope + _state._ctxvar_Context.set(ctx) task_status.started(ctx) # TODO: should would be nice to have our @@ -588,7 +605,6 @@ async def _invoke( cs: CancelScope = ctx._scope if cs.cancel_called: - canceller: tuple = ctx.canceller explain: str = f'{ctx.side!r}-side task was cancelled by ' @@ -617,23 +633,9 @@ async def _invoke( else: explain += 'a remote peer' - # TODO: move this "div centering" into - # a helper for use elsewhere! - div_chars: str = '------ - ------' - div_offset: int = ( - round(len(explain)/2)+1 - + - round(len(div_chars)/2)+1 - ) - div_str: str = ( - '\n' - + - ' '*div_offset - + - f'{div_chars}\n' - ) explain += ( - div_str + + add_div(message=explain) + + f'<= canceller: {canceller}\n' f'=> cancellee: {our_uid}\n' # TODO: better repr for ctx tasks.. @@ -660,10 +662,10 @@ async def _invoke( boxed_type=trio.Cancelled, canceller=canceller, ) - # assign local error so that the `.outcome` - # resolves to an error for both reporting and - # state checks. - ctx._local_error = ctxc + # does this matter other then for + # consistentcy/testing? |_ no user code should be + # in this scope at this point.. + # ctx._local_error = ctxc raise ctxc # XXX: do we ever trigger this block any more? @@ -673,6 +675,13 @@ async def _invoke( BaseException, ) as scope_error: + if ( + isinstance(scope_error, RuntimeError) + and scope_error.args + and 'Cancel scope stack corrupted' in scope_error.args[0] + ): + log.exception('Cancel scope stack corrupted!?\n') + # _debug.mk_pdb().set_trace() # always set this (child) side's exception as the # local error on the context @@ -706,17 +715,32 @@ async def _invoke( res_type_str, res_str, ) = ( - ('error', f'{type(merr)}',) - if merr + ('error', f'{type(merr)}',) if merr else ( 'result', f'`{repr(ctx.outcome)}`', ) ) - log.runtime( + message: str = ( f'IPC context terminated with a final {res_type_str}\n\n' f'{ctx}' ) + if merr: + from tractor import RemoteActorError + if not isinstance(merr, RemoteActorError): + fmt_merr: str = ( + f'\n{merr!r}\n' + # f'{merr.args[0]!r}\n' + ) + else: + fmt_merr = f'\n{merr!r}' + log.error( + message + + + fmt_merr + ) + else: + log.runtime(message) async def try_ship_error_to_remote( @@ -951,12 +975,19 @@ async def process_messages( kwargs=kwargs, # type-spec this? see `msg.types` uid=actorid, ): - log.runtime( + start_status: str = ( 'Handling RPC `Start` request\n' - f'<= peer: {actorid}\n' - f' |_{ns}.{funcname}({kwargs})\n\n' + f'<= peer: {actorid}\n\n' + f' |_{chan}\n' + f' |_cid: {cid}\n\n' + # f' |_{ns}.{funcname}({kwargs})\n' + f'>> {actor.uid}\n' + f' |_{actor}\n' + f' -> nsp: `{ns}.{funcname}({kwargs})`\n' - f'{pretty_struct.pformat(msg)}\n' + # f' |_{ns}.{funcname}({kwargs})\n\n' + + # f'{pretty_struct.pformat(msg)}\n' ) # runtime-internal endpoint: `Actor.` @@ -985,6 +1016,10 @@ async def process_messages( await chan.send(err_msg) continue + start_status += ( + f' -> func: {func}\n' + ) + # schedule a task for the requested RPC function # in the actor's main "service nursery". # @@ -992,18 +1027,8 @@ async def process_messages( # supervision isolation? would avoid having to # manage RPC tasks individually in `._rpc_tasks` # table? - log.runtime( - f'Spawning task for RPC request\n' - f'<= caller: {chan.uid}\n' - f' |_{chan}\n\n' - # ^-TODO-^ maddr style repr? - # f' |_@ /ipv4/{chan.raddr}/tcp/{chan.rport}/' - # f'cid="{cid[-16:]} .."\n\n' - - f'=> {actor}\n' - f' |_cid: {cid}\n' - f' |>> {func}()\n' - ) + start_status += ' -> scheduling new task..\n' + log.runtime(start_status) try: ctx: Context = await actor._service_n.start( partial( @@ -1031,8 +1056,9 @@ async def process_messages( # scoped exception from ``_invoke()`` itself. if isinstance(err := ctx, Exception): log.warning( - 'Task for RPC failed?' - f'|_ {func}()\n\n' + start_status + + + ' -> task for RPC failed?\n\n' f'{err}' ) continue @@ -1151,12 +1177,17 @@ async def process_messages( finally: # msg debugging for when he machinery is brokey - log.runtime( - 'Exiting IPC msg loop with final msg\n\n' - f'<= peer: {chan.uid}\n' - f' |_{chan}\n\n' - f'{pretty_struct.pformat(msg)}' - ) + if msg is None: + message: str = 'Exiting IPC msg loop without receiving a msg?' + else: + message: str = ( + 'Exiting IPC msg loop with final msg\n\n' + f'<= peer: {chan.uid}\n' + f' |_{chan}\n\n' + f'{pretty_struct.pformat(msg)}' + ) + + log.runtime(message) # transport **WAS NOT** disconnected return (False, msg) diff --git a/tractor/_state.py b/tractor/_state.py index 30346a6a..a3729833 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -19,13 +19,19 @@ Per process state """ from __future__ import annotations +from contextvars import ( + ContextVar, +) from typing import ( Any, TYPE_CHECKING, ) +from trio.lowlevel import current_task + if TYPE_CHECKING: from ._runtime import Actor + from ._context import Context _current_actor: Actor|None = None # type: ignore # noqa @@ -110,3 +116,20 @@ def debug_mode() -> bool: def is_root_process() -> bool: return _runtime_vars['_is_root'] + + +_ctxvar_Context: ContextVar[Context] = ContextVar( + 'ipc_context', + default=None, +) + + +def current_ipc_ctx() -> Context: + ctx: Context = _ctxvar_Context.get() + if not ctx: + from ._exceptions import InternalError + raise InternalError( + 'No IPC context has been allocated for this task yet?\n' + f'|_{current_task()}\n' + ) + return ctx -- 2.34.1 From 332ce97650b46242e9bb1464ac4ae6a76d94d597 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 08:50:16 -0400 Subject: [PATCH 091/305] Allow `Stop` passthrough from `PldRx.recv_msg_w_pld()` Since we need to allow it (at the least) inside `drain_until_final_msg()` for handling stream-phase termination races where we don't want to have to handle a raised error from something like `Context.result()`. Expose the passthrough option via a `passthrough_non_pld_msgs: bool` kwarg. Add comprehensive comment to `current_pldrx()`. --- tractor/msg/_ops.py | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 1ba623db..3b0b8339 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -355,6 +355,9 @@ class PldRx(Struct): ipc: Context|MsgStream, expect_msg: MsgType, + # NOTE: generally speaking only for handling `Stop`-msgs that + # arrive during a call to `drain_to_final_msg()` above! + passthrough_non_pld_msgs: bool = True, **kwargs, ) -> tuple[MsgType, PayloadT]: @@ -365,6 +368,11 @@ class PldRx(Struct): ''' msg: MsgType = await ipc._rx_chan.receive() + if passthrough_non_pld_msgs: + match msg: + case Stop(): + return msg, None + # TODO: is there some way we can inject the decoded # payload into an existing output buffer for the original # msg instance? @@ -389,15 +397,30 @@ _ctxvar_PldRx: ContextVar[PldRx] = ContextVar( def current_pldrx() -> PldRx: ''' - Return the current `trio.Task.context`'s msg-payload - receiver, the post IPC but pre-app code `MsgType.pld` - filter. + Return the current `trio.Task.context`'s msg-payload-receiver. + + A payload receiver is the IPC-msg processing sub-sys which + filters inter-actor-task communicated payload data, i.e. the + `PayloadMsg.pld: PayloadT` field value, AFTER it's container + shuttlle msg (eg. `Started`/`Yield`/`Return) has been delivered + up from `tractor`'s transport layer but BEFORE the data is + yielded to application code, normally via an IPC primitive API + like, for ex., `pld_data: PayloadT = MsgStream.receive()`. Modification of the current payload spec via `limit_plds()` - allows an application to contextually filter typed IPC msg - content delivered via wire transport. + allows a `tractor` application to contextually filter IPC + payload content with a type specification as supported by + the interchange backend. + + - for `msgspec` see . + + NOTE that the `PldRx` itself is a per-`Context` global sub-system + that normally does not change other then the applied pld-spec + for the current `trio.Task`. ''' + # ctx: context = current_ipc_ctx() + # return ctx._pld_rx return _ctxvar_PldRx.get() -- 2.34.1 From 04bd53ff10b0d1143f4a7b2b8f9edc9db5e1399d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 09:08:01 -0400 Subject: [PATCH 092/305] Big debugger rework, more tolerance for internal err-hangs Since i was running into them (internal errors) during lock request machinery dev and was getting all sorts of difficult to understand hangs whenever i intro-ed a bug to either side of the ipc ctx; this all while trying to get the msg-spec working for `Lock` requesting subactors.. Deats: - hideframes for `@acm`s and `trio.Event.wait()`, `Lock.release()`. - better detail out the `Lock.acquire/release()` impls - drop `Lock.remote_task_in_debug`, use new `.ctx_in_debug`. - add a `Lock.release(force: bool)`. - move most of what was `_acquire_debug_lock_from_root_task()` and some of the `lock_tty_for_child().__a[enter/exit]()` logic into `Lock.[acquire/release]()` including bunch more logging. - move `lock_tty_for_child()` up in the module to below `Lock`, with some rework: - drop `subactor_uid: tuple` arg since we can just use the `ctx`.. - add exception handler blocks for reporting internal (impl) errors and always force release the lock in such cases. - extend `DebugStatus` (prolly will rename to `DebugRequest` btw): - add `.req_ctx: Context` for subactor side. - add `.req_finished: trio.Event` to sub to signal request task exit. - extend `.shield_sigint()` doc-str. - add `.release()` to encaps all the state mgmt previously strewn about inside `._pause()`.. - use new `DebugStatus.release()` to replace all the duplication: - inside `PdbREPL.set_[continue/quit]()`. - inside `._pause()` for the subactor branch on internal repl-invocation error cases, - in the `_enter_repl_sync()` closure on error, - replace `apply_debug_codec()` -> `apply_debug_pldec()` in tandem with the new `PldRx` sub-sys which handles the new `__pld_spec__`. - add a new `pformat_cs()` helper orig to help debug cs stack a corruption; going to move to `.devx.pformat` obvi. - rename `wait_for_parent_stdin_hijack()` -> `request_root_stdio_lock()` with improvements: - better doc-str and add todos, - use `DebugStatus` more stringently to encaps all subactor req state. - error handling blocks for cancellation and straight up impl errors directly around the `.open_context()` block with the latter doing a `ctx.cancel()` to avoid hanging in the shielded `.req_cs` scope. - similar exc blocks for the func's overall body with explicit `log.exception()` reporting. - only set the new `DebugStatus.req_finished: trio.Event` in `finally`. - rename `mk_mpdb()` -> `mk_pdb()` and don't cal `.shield_sigint()` implicitly since the caller usage does matter for this. - factor out `any_connected_locker_child()` from the SIGINT handler. - rework SIGINT handler to better handle any stale-lock/hang cases: - use new `Lock.ctx_in_debug: Context` to detect subactor-in-debug. and use it to cancel any lock request instead of the lower level - use `problem: str` summary approach to log emissions. - rework `_pause()` given all of the above, stuff not yet mentioned: - don't take `shield: bool` input and proxy to `debug_func()` (for now). - drop `extra_frames_up_when_async: int` usage, expect `**debug_func_kwargs` to passthrough an `api_frame: Frametype` (more on this later). - lotsa asserts around the request ctx vs. task-in-debug ctx using new `current_ipc_ctx()`. - asserts around `DebugStatus` state. - rework and simplify the `debug_func` hooks, `_set_trace()`/`_post_mortem()`: - make them accept a non-optional `repl: PdbRepl` and `api_frame: FrameType` which should be used to set the current frame when the REPL engages. - always hide the hook frames. - always accept a `tb: TracebackType` to `_post_mortem()`. |_ copy and re-impl what was the delegation to `pdbp.xpm()`/`pdbp.post_mortem()` and instead call the underlying `Pdb.interaction()` ourselves with a `caller_frame` and tb instance. - adjust the public `.pause()` impl: - accept optional `hide_tb` and `api_frame` inputs. - mask opening a cancel-scope for now (can cause `trio` stack corruption, see notes) and thus don't use the `shield` input other then to eventually passthrough to `_post_mortem()`? |_ thus drop `task_status` support for now as well. |_ pretty sure correct soln is a debug-nursery around `._invoke()`. - since no longer using `extra_frames_up_when_async` inside `debug_func()`s ensure all public apis pass a `api_frame`. - re-impl our `tractor.post_mortem()` to directly call into `._pause()` instead of binding in via `partial` and mk it take similar input as `.pause()`. - drop `Lock.release()` from `_maybe_enter_pm()`, expose and pass expected frame and tb. - use necessary changes from all the above within `maybe_wait_for_debugger()` and `acquire_debug_lock()`. Lel, sorry thought that would be shorter.. There's still a lot more re-org to do particularly with `DebugStatus` encapsulation but it's coming in follow up. --- tractor/devx/__init__.py | 13 +- tractor/devx/_debug.py | 1729 ++++++++++++++++++++++---------------- 2 files changed, 993 insertions(+), 749 deletions(-) diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index c1a93878..bb72d1f9 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -30,17 +30,19 @@ from ._debug import ( open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, post_mortem as post_mortem, + mk_pdb as mk_pdb, ) from ._stackscope import ( enable_stack_on_sig as enable_stack_on_sig, ) -# from .pformat import ( -# add_div as add_div, -# pformat_caller_frame as pformat_caller_frame, -# pformat_boxed_tb as pformat_boxed_tb, -# ) +from .pformat import ( + add_div as add_div, + pformat_caller_frame as pformat_caller_frame, + pformat_boxed_tb as pformat_boxed_tb, +) +# TODO, move this to a new `.devx._pdbp` mod? def _enable_readline_feats() -> str: ''' Handle `readline` when compiled with `libedit` to avoid breaking @@ -72,5 +74,4 @@ def _enable_readline_feats() -> str: return 'readline' -# TODO, move this to a new `.devx._pdbp` mod? _enable_readline_feats() diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index e4ab7d83..0567e42a 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -26,11 +26,13 @@ from contextlib import ( contextmanager as cm, nullcontext, _GeneratorContextManager, + _AsyncGeneratorContextManager, ) from functools import ( partial, cached_property, ) +import inspect import os import signal import sys @@ -48,13 +50,14 @@ from typing import ( from types import ( FrameType, ModuleType, + TracebackType, ) from msgspec import Struct import pdbp import sniffio -import tractor import trio +from trio import CancelScope from trio.lowlevel import ( current_task, Task, @@ -62,26 +65,25 @@ from trio.lowlevel import ( from trio import ( TaskStatus, ) - +import tractor from tractor.log import get_logger -from tractor.msg import ( - _codec, -) from tractor._state import ( current_actor, is_root_process, debug_mode, + current_ipc_ctx, ) -from tractor._exceptions import ( - is_multi_cancelled, - ContextCancelled, -) -from tractor._ipc import Channel +# from .pformat import pformat_caller_frame if TYPE_CHECKING: + from tractor._ipc import Channel + from tractor._context import Context from tractor._runtime import ( Actor, ) + from tractor.msg import ( + _codec, + ) log = get_logger(__name__) @@ -115,6 +117,8 @@ log = get_logger(__name__) pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) pdbp.hideframe(trio._core._run.CancelScope.__exit__) pdbp.hideframe(_GeneratorContextManager.__exit__) +pdbp.hideframe(_AsyncGeneratorContextManager.__aexit__) +pdbp.hideframe(trio.Event.wait) __all__ = [ 'breakpoint', @@ -141,14 +145,14 @@ class LockRelease( cid: str -__msg_spec__: TypeAlias = LockStatus|LockRelease +__pld_spec__: TypeAlias = LockStatus|LockRelease class Lock: ''' - Actor global debug lock state. + Actor-tree-global debug lock state, exists only in a root process. - Mostly to avoid a lot of ``global`` declarations for now XD. + Mostly to avoid a lot of global declarations for now XD. ''' # XXX local ref to the `Pbp` instance, ONLY set in the @@ -157,30 +161,17 @@ class Lock: # that does not have this lock acquired in the root proc. repl: PdbREPL|None = None - # placeholder for function to set a ``trio.Event`` on debugger exit - # pdb_release_hook: Callable | None = None - - remote_task_in_debug: str|None = None - @staticmethod - def get_locking_task_cs() -> trio.CancelScope|None: - if is_root_process(): - return Lock._locking_task_cs - - raise RuntimeError( - '`Lock.locking_task_cs` is invalid in subactors!' - ) - - @staticmethod - def set_locking_task_cs( - cs: trio.CancelScope, - ) -> None: + def get_locking_task_cs() -> CancelScope|None: if not is_root_process(): raise RuntimeError( '`Lock.locking_task_cs` is invalid in subactors!' ) - Lock._locking_task_cs = cs + if ctx := Lock.ctx_in_debug: + return ctx._scope + + return None # ROOT ONLY # ------ - ------- @@ -195,12 +186,14 @@ class Lock: # * in case it needs to be manually cancelled in root due to # a stale lock condition (eg. IPC failure with the locking # child - global_actor_in_debug: tuple[str, str]|None = None - no_remote_has_tty: trio.Event|None = None - _locking_task_cs: trio.CancelScope|None = None + ctx_in_debug: Context|None = None + no_remote_has_tty: trio.Event|None = None _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() - _blocked: set[tuple[str, str]] = set() # `Actor.uid` block list + _blocked: set[ + tuple[str, str] # `Actor.uid` for per actor + |str # Context.cid for per task + ] = set() @classmethod def repr(cls) -> str: @@ -213,12 +206,11 @@ class Lock: if is_root_process(): lock_stats: trio.LockStatistics = cls._debug_lock.statistics() fields += ( - f'global_actor_in_debug: {cls.global_actor_in_debug}\n' f'no_remote_has_tty: {cls.no_remote_has_tty}\n' - f'remote_task_in_debug: {cls.remote_task_in_debug}\n' - f'_locking_task_cs: {cls.get_locking_task_cs()}\n' f'_blocked: {cls._blocked}\n\n' + f'ctx_in_debug: {cls.ctx_in_debug}\n\n' + f'_debug_lock: {cls._debug_lock}\n' f'lock_stats: {lock_stats}\n' ) @@ -234,16 +226,29 @@ class Lock: ) @classmethod - def release(cls): + @pdbp.hideframe + def release( + cls, + force: bool = False, + ): + lock: trio.StrictFIFOLock = cls._debug_lock try: - if not DebugStatus.is_main_trio_thread(): - trio.from_thread.run_sync( - cls._debug_lock.release - ) + if lock.locked(): + if not DebugStatus.is_main_trio_thread(): + trio.from_thread.run_sync( + cls._debug_lock.release + ) + else: + cls._debug_lock.release() + + message: str = 'TTY lock released for child\n' else: - cls._debug_lock.release() + message: str = 'TTY lock not held by any child\n' except RuntimeError as rte: + message: str = 'TTY lock FAILED to release for child??\n' + log.exception(message) + # uhhh makes no sense but been seeing the non-owner # release error even though this is definitely the task # that locked? @@ -256,7 +261,7 @@ class Lock: # raise RuntimeError( # 'Stale `Lock` detected, no remote task active!?\n' # f'|_{owner}\n' - # # f'{Lock}' + # # f'{cls}' # ) from rte if owner: @@ -266,23 +271,265 @@ class Lock: # something somethin corrupts a cancel-scope # somewhere.. + finally: + # IFF there are no more requesting tasks queued up fire, the + # "tty-unlocked" event thereby alerting any monitors of the lock that + # we are now back in the "tty unlocked" state. This is basically + # and edge triggered signal around an empty queue of sub-actor + # tasks that may have tried to acquire the lock. + stats = cls._debug_lock.statistics() + if ( + not stats.owner + or force + # and cls.no_remote_has_tty is not None + ): + message += '-> No more child ctx tasks hold the TTY lock!\n' + + # set and release + if cls.no_remote_has_tty is not None: + cls.no_remote_has_tty.set() + cls.no_remote_has_tty = None + + # cls.remote_task_in_debug = None + + else: + message += ( + f'-> Not signalling `Lock.no_remote_has_tty` since it has value:{cls.no_remote_has_tty}\n' + ) + + else: + # wakeup any waiters since the lock was released + # (presumably) temporarily. + if no_remote_has_tty := cls.no_remote_has_tty: + no_remote_has_tty.set() + no_remote_has_tty = trio.Event() + + message += ( + f'-> A child ctx task still owns the `Lock` ??\n' + f' |_owner task: {stats.owner}\n' + ) + + cls.ctx_in_debug = None + + @classmethod + @acm + async def acquire( + cls, + ctx: Context, + # subactor_uid: tuple[str, str], + # remote_task_uid: str, + + ) -> AsyncIterator[trio.StrictFIFOLock]: + ''' + Acquire a root-actor local FIFO lock which tracks mutex access of + the process tree's global debugger breakpoint. + + This lock avoids tty clobbering (by preventing multiple processes + reading from stdstreams) and ensures multi-actor, sequential access + to the ``pdb`` repl. + + ''' + if not is_root_process(): + raise RuntimeError('Only callable by a root actor task!') + + # subactor_uid: tuple[str, str] = ctx.chan.uid + we_acquired: bool = False + log.runtime( + f'Attempting to acquire TTY lock for sub-actor\n' + f'{ctx}' + ) try: - # sometimes the ``trio`` might already be terminated in - # which case this call will raise. - if DebugStatus.repl_release is not None: - DebugStatus.repl_release.set() + pre_msg: str = ( + f'Entering lock checkpoint for sub-actor\n' + f'{ctx}' + ) + stats = cls._debug_lock.statistics() + if owner := stats.owner: + # and cls.no_remote_has_tty is not None + pre_msg += ( + f'\n' + f'`Lock` already held by local task?\n' + f'{owner}\n\n' + # f'On behalf of task: {cls.remote_task_in_debug!r}\n' + f'On behalf of IPC ctx\n' + f'{ctx}' + ) + log.runtime(pre_msg) + + # NOTE: if the surrounding cancel scope from the + # `lock_tty_for_child()` caller is cancelled, this line should + # unblock and NOT leave us in some kind of + # a "child-locked-TTY-but-child-is-uncontactable-over-IPC" + # condition. + await cls._debug_lock.acquire() + cls.ctx_in_debug = ctx + we_acquired = True + if cls.no_remote_has_tty is None: + # mark the tty lock as being in use so that the runtime + # can try to avoid clobbering any connection from a child + # that's currently relying on it. + cls.no_remote_has_tty = trio.Event() + # cls.remote_task_in_debug = remote_task_uid + + log.runtime( + f'TTY lock acquired for sub-actor\n' + f'{ctx}' + ) + + # NOTE: critical section: this yield is unshielded! + + # IF we received a cancel during the shielded lock entry of some + # next-in-queue requesting task, then the resumption here will + # result in that ``trio.Cancelled`` being raised to our caller + # (likely from ``lock_tty_for_child()`` below)! In + # this case the ``finally:`` below should trigger and the + # surrounding caller side context should cancel normally + # relaying back to the caller. + + yield cls._debug_lock finally: - cls.repl = None - cls.global_actor_in_debug = None + message :str = 'Exiting `Lock.acquire()` on behalf of sub-actor\n' + if ( + we_acquired + # and + # cls._debug_lock.locked() + ): + message += '-> TTY lock released by child\n' + cls.release() - # restore original sigint handler - DebugStatus.unshield_sigint() - # actor-local state, irrelevant for non-root. - DebugStatus.repl_task = None + else: + message += '-> TTY lock never acquired by child??\n' + + log.runtime( + f'{message}\n' + f'{ctx}' + ) -# TODO: actually use this instead throughout for subs! +@tractor.context +async def lock_tty_for_child( + + ctx: Context, + subactor_task_uid: tuple[str, int], + +) -> LockStatus|LockRelease: + ''' + Lock the TTY in the root process of an actor tree in a new + inter-actor-context-task such that the ``pdbp`` debugger console + can be mutex-allocated to the calling sub-actor for REPL control + without interference by other processes / threads. + + NOTE: this task must be invoked in the root process of the actor + tree. It is meant to be invoked as an rpc-task and should be + highly reliable at releasing the mutex complete! + + ''' + subactor_uid: tuple[str, str] = ctx.chan.uid + # NOTE: we use the IPC ctx's cancel scope directly in order to + # ensure that on any transport failure, or cancellation request + # from the child we expect + # `Context._maybe_cancel_and_set_remote_error()` to cancel this + # scope despite the shielding we apply below. + debug_lock_cs: CancelScope = ctx._scope + + try: + if ctx.cid in Lock._blocked: + raise RuntimeError( + f'Double lock request!?\n' + f'The same remote task already has an active request for TTY lock ??\n\n' + f'subactor uid: {subactor_uid}\n\n' + + 'This might be mean that the requesting task ' + 'in `request_root_stdio_lock()` may have crashed?\n' + 'Consider that an internal bug exists given the TTY ' + '`Lock`ing IPC dialog..\n' + ) + + root_task_name: str = current_task().name + if tuple(subactor_uid) in Lock._blocked: + log.warning( + f'Subactor is blocked from acquiring debug lock..\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' + ) + ctx._enter_debugger_on_cancel: bool = False + await ctx.cancel(f'Debug lock blocked for {subactor_uid}') + # TODO: remove right? + # return LockStatus( + # subactor_uid=subactor_uid, + # cid=ctx.cid, + # locked=False, + # ) + + # TODO: when we get to true remote debugging + # this will deliver stdin data? + + log.debug( + 'Subactor attempting to acquire TTY lock\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n' + ) + DebugStatus.shield_sigint() + Lock._blocked.add(ctx.cid) + with ( + # enable the locking msgspec + apply_debug_pldec(), + ): + async with Lock.acquire(ctx=ctx): + debug_lock_cs.shield = True + + # indicate to child that we've locked stdio + await ctx.started( + LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=True, + ) + ) + + log.debug( f'Actor {subactor_uid} acquired TTY lock') + + # wait for unlock pdb by child + async with ctx.open_stream() as stream: + release_msg: LockRelease = await stream.receive() + + # TODO: security around only releasing if + # these match? + log.pdb( + f'TTY lock released requested\n\n' + f'{release_msg}\n' + ) + assert release_msg.cid == ctx.cid + assert release_msg.subactor_uid == tuple(subactor_uid) + + log.debug(f'Actor {subactor_uid} released TTY lock') + + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) + + except BaseException: + log.exception( + 'Errored during root TTY-lock dialog?\n' + 'Forcing release since an internal error caused this!\n' + ) + Lock.release(force=True) + raise + + finally: + Lock._blocked.remove(ctx.cid) + if (no_locker := Lock.no_remote_has_tty): + no_locker.set() + + DebugStatus.unshield_sigint() + + +# TODO: rename to ReplState or somethin? +# DebugRequest, make it a singleton instance? class DebugStatus: ''' Singleton-state for debugging machinery in a subactor. @@ -297,26 +544,26 @@ class DebugStatus: ''' repl: PdbREPL|None = None repl_task: Task|None = None - req_cs: trio.CancelScope|None = None + req_ctx: Context|None = None + req_cs: CancelScope|None = None repl_release: trio.Event|None = None - + req_finished: trio.Event|None = None lock_status: LockStatus|None = None - _orig_sigint_handler: Callable | None = None + _orig_sigint_handler: Callable|None = None _trio_handler: ( Callable[[int, FrameType|None], Any] |int | None ) = None - @classmethod def repr(cls) -> str: fields: str = ( f'repl: {cls.repl}\n' f'repl_task: {cls.repl_task}\n' f'repl_release: {cls.repl_release}\n' - f'req_cs: {cls.req_cs}\n' + f'req_ctx: {cls.req_ctx}\n' ) body: str = textwrap.indent( fields, @@ -328,19 +575,37 @@ class DebugStatus: ')>' ) + # TODO: how do you get this to work on a non-inited class? + # __repr__ = classmethod(repr) + # __str__ = classmethod(repr) + @classmethod def shield_sigint(cls): ''' Shield out SIGINT handling (which by default triggers - `trio.Task` cancellation) in subactors when the `pdb` REPL + `trio.Task` cancellation) in subactors when a `pdb` REPL is active. - Avoids cancellation of the current actor (task) when the - user mistakenly sends ctl-c or a signal is received from - an external request; explicit runtime cancel requests are - allowed until the use exits the REPL session using - 'continue' or 'quit', at which point the orig SIGINT - handler is restored. + Avoids cancellation of the current actor (task) when the user + mistakenly sends ctl-c or via a recevied signal (from an + external request). Explicit runtime cancel requests are + allowed until the current REPL-session (the blocking call + `Pdb.interaction()`) exits, normally via the 'continue' or + 'quit' command - at which point the orig SIGINT handler is + restored via `.unshield_sigint()` below. + + Impl notes: + ----------- + - we prefer that `trio`'s default handler is always used when + SIGINT is unshielded (hence disabling the `pdb.Pdb` + defaults in `mk_pdb()`) such that reliable KBI cancellation + is always enforced. + + - we always detect whether we're running from a non-main + thread, in which case schedule the SIGINT shielding override + to in the main thread as per, + + https://docs.python.org/3/library/signal.html#signals-and-threads ''' # @@ -364,6 +629,12 @@ class DebugStatus: @classmethod @pdbp.hideframe # XXX NOTE XXX see below in `.pause_from_sync()` def unshield_sigint(cls): + ''' + Un-shield SIGINT for REPL-active (su)bactor. + + See details in `.shield_sigint()`. + + ''' # always restore ``trio``'s sigint handler. see notes below in # the pdb factory about the nightmare that is that code swapping # out the handler when the repl activates... @@ -374,6 +645,11 @@ class DebugStatus: cls._trio_handler, ) else: + trio_h: Callable = cls._trio_handler + # XXX should never really happen XXX + if not trio_h: + mk_pdb().set_trace() + signal.signal( signal.SIGINT, cls._trio_handler, @@ -411,6 +687,36 @@ class DebugStatus: # is not threading.main_thread() # ) + @classmethod + @pdbp.hideframe + def release( + cls, + cancel_req_task: bool = True, + ): + try: + # sometimes the task might already be terminated in + # which case this call will raise an RTE? + if cls.repl_release is not None: + cls.repl_release.set() + + finally: + # if req_ctx := cls.req_ctx: + # req_ctx._scope.cancel() + + if ( + cancel_req_task + and + (req_cs := cls.req_cs) + ): + req_cs.cancel() + + # restore original sigint handler + cls.unshield_sigint() + + # actor-local state, irrelevant for non-root. + cls.repl_task = None + cls.repl = None + class TractorConfig(pdbp.DefaultConfig): ''' @@ -466,13 +772,24 @@ class PdbREPL(pdbp.Pdb): try: super().set_continue() finally: - Lock.release() + DebugStatus.release() + + # NOTE: for subactors the stdio lock is released via the + # allocated RPC locker task, so for root we have to do it + # manually. + if is_root_process(): + Lock.release() def set_quit(self): try: super().set_quit() finally: - Lock.release() + DebugStatus.release( + cancel_req_task=False, + ) + + if is_root_process(): + Lock.release() # TODO: special handling where we just want the next LOC and # not to resume to the next pause/crash point? @@ -515,413 +832,297 @@ class PdbREPL(pdbp.Pdb): return None -@acm -async def _acquire_debug_lock_from_root_task( - subactor_uid: tuple[str, str], - remote_task_uid: str, - -) -> AsyncIterator[trio.StrictFIFOLock]: - ''' - Acquire a root-actor local FIFO lock which tracks mutex access of - the process tree's global debugger breakpoint. - - This lock avoids tty clobbering (by preventing multiple processes - reading from stdstreams) and ensures multi-actor, sequential access - to the ``pdb`` repl. - - ''' - # task_name: str = current_task().name - we_acquired: bool = False - - log.runtime( - f'Attempting to acquire TTY lock for,\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - try: - pre_msg: str = ( - f'Entering lock checkpoint for sub-actor\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - stats = Lock._debug_lock.statistics() - if owner := stats.owner: - # and Lock.no_remote_has_tty is not None - pre_msg += ( - f'\n' - f'`Lock` already held by local task\n' - f'{owner}\n\n' - f'On behalf of remote task: {Lock.remote_task_in_debug!r}\n' - ) - log.runtime(pre_msg) - - # NOTE: if the surrounding cancel scope from the - # `lock_tty_for_child()` caller is cancelled, this line should - # unblock and NOT leave us in some kind of - # a "child-locked-TTY-but-child-is-uncontactable-over-IPC" - # condition. - await Lock._debug_lock.acquire() - we_acquired = True - - if Lock.no_remote_has_tty is None: - # mark the tty lock as being in use so that the runtime - # can try to avoid clobbering any connection from a child - # that's currently relying on it. - Lock.no_remote_has_tty = trio.Event() - Lock.remote_task_in_debug = remote_task_uid - - Lock.global_actor_in_debug = subactor_uid - log.runtime( - f'TTY lock acquired for,\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - - # NOTE: critical section: this yield is unshielded! - - # IF we received a cancel during the shielded lock entry of some - # next-in-queue requesting task, then the resumption here will - # result in that ``trio.Cancelled`` being raised to our caller - # (likely from ``lock_tty_for_child()`` below)! In - # this case the ``finally:`` below should trigger and the - # surrounding caller side context should cancel normally - # relaying back to the caller. - - yield Lock._debug_lock - - finally: - if ( - we_acquired - and - Lock._debug_lock.locked() - ): - Lock._debug_lock.release() - - # IFF there are no more requesting tasks queued up fire, the - # "tty-unlocked" event thereby alerting any monitors of the lock that - # we are now back in the "tty unlocked" state. This is basically - # and edge triggered signal around an empty queue of sub-actor - # tasks that may have tried to acquire the lock. - stats = Lock._debug_lock.statistics() - if ( - not stats.owner - # and Lock.no_remote_has_tty is not None - ): - # log.runtime( - log.info( - f'No more child ctx tasks hold the TTY lock!\n' - f'last subactor: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - if Lock.no_remote_has_tty is not None: - # set and release - Lock.no_remote_has_tty.set() - Lock.no_remote_has_tty = None - Lock.remote_task_in_debug = None - else: - log.warning( - 'Not signalling `Lock.no_remote_has_tty` since it has value:\n' - f'{Lock.no_remote_has_tty}\n' - ) - else: - log.info( - f'A child ctx tasks still holds the TTY lock ??\n' - f'last subactor: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - f'current local owner task: {stats.owner}\n' - ) - - Lock.global_actor_in_debug = None - log.runtime( - 'TTY lock released by child\n' - f'last subactor: {subactor_uid}\n' - f'remote task: {remote_task_uid}\n' - ) - - -@tractor.context -async def lock_tty_for_child( - - ctx: tractor.Context, - - # TODO: when we finally get a `Start.params: ParamSpec` - # working it'd sure be nice to have `msgspec` auto-decode this - # to an actual tuple XD - subactor_uid: tuple[str, str], - subactor_task_uid: tuple[str, int], - -) -> LockStatus|LockRelease: - ''' - Lock the TTY in the root process of an actor tree in a new - inter-actor-context-task such that the ``pdbp`` debugger console - can be mutex-allocated to the calling sub-actor for REPL control - without interference by other processes / threads. - - NOTE: this task must be invoked in the root process of the actor - tree. It is meant to be invoked as an rpc-task and should be - highly reliable at releasing the mutex complete! - - ''' - req_task_uid: tuple = tuple(subactor_task_uid) - if req_task_uid in Lock._blocked: - raise RuntimeError( - f'Double lock request!?\n' - f'The same remote task already has an active request for TTY lock ??\n\n' - f'task uid: {req_task_uid}\n' - f'subactor uid: {subactor_uid}\n\n' - - 'This might be mean that the requesting task ' - 'in `wait_for_parent_stdin_hijack()` may have crashed?\n' - 'Consider that an internal bug exists given the TTY ' - '`Lock`ing IPC dialog..\n' - ) - - root_task_name: str = current_task().name - if tuple(subactor_uid) in Lock._blocked: - log.warning( - f'Subactor is blocked from acquiring debug lock..\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {subactor_task_uid}\n' - ) - ctx._enter_debugger_on_cancel: bool = False - await ctx.cancel(f'Debug lock blocked for {subactor_uid}') - return LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=False, - ) - - # TODO: when we get to true remote debugging - # this will deliver stdin data? - - log.debug( - 'Subactor attempting to acquire TTY lock\n' - f'root task: {root_task_name}\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {subactor_task_uid}\n' - ) - DebugStatus.shield_sigint() - try: - Lock._blocked.add(req_task_uid) - with ( - # NOTE: though a cs is created for every subactor lock - # REQUEST in this ctx-child task, only the root-task - # holding the `Lock` (on behalf of the ctx parent task - # in a subactor) will set - # `Lock._locking_task_cs` such that if the - # lock holdingn task ever needs to be cancelled (since - # it's shielded by default) that global ref can be - # used to do so! - trio.CancelScope(shield=True) as debug_lock_cs, - - # TODO: make this ONLY limit the pld_spec such that we - # can on-error-decode-`.pld: Raw` fields in - # `Context._deliver_msg()`? - _codec.limit_msg_spec( - payload_spec=__msg_spec__, - ) as codec, - ): - # sanity? - # TODO: don't need the ref right? - assert codec is _codec.current_codec() - - async with _acquire_debug_lock_from_root_task( - subactor_uid, - subactor_task_uid, - ): - # XXX SUPER IMPORTANT BELOW IS ON THIS LINE XXX - # without that the root cs might be, - # - set and then removed in the finally block by - # a task that never acquired the lock, leaving - # - the task that DID acquire the lock STUCK since - # it's original cs was GC-ed bc the first task - # already set the global ref to `None` - Lock.set_locking_task_cs(debug_lock_cs) - - # indicate to child that we've locked stdio - await ctx.started( - LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=True, - ) - ) - - log.debug( f'Actor {subactor_uid} acquired TTY lock') - - # wait for unlock pdb by child - async with ctx.open_stream() as stream: - release_msg: LockRelease = await stream.receive() - - # TODO: security around only releasing if - # these match? - log.pdb( - f'TTY lock released requested\n\n' - f'{release_msg}\n' - ) - assert release_msg.cid == ctx.cid - assert release_msg.subactor_uid == tuple(subactor_uid) - - log.debug(f'Actor {subactor_uid} released TTY lock') - - return LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=False, - ) - - finally: - debug_lock_cs.cancel() - Lock._blocked.remove(req_task_uid) - Lock.set_locking_task_cs(None) - DebugStatus.unshield_sigint() - - @cm -def apply_debug_codec() -> _codec.MsgCodec: +def apply_debug_pldec() -> _codec.MsgCodec: ''' Apply the subactor TTY `Lock`-ing protocol's msgspec temporarily (only in the current task). ''' - with ( - _codec.limit_msg_spec( - payload_spec=__msg_spec__, - ) as debug_codec, - ): - assert debug_codec is _codec.current_codec() - log.pdb( - 'Applied `.devx._debug` msg-spec via codec\n' - f'{debug_codec}\n' - ) - yield debug_codec - log.pdb( - 'REMOVED `.devx._debug` msg-spec via codec\n' - f'{debug_codec}\n' + from tractor.msg import ( + _ops as msgops, + ) + orig_plrx: msgops.PldRx = msgops.current_pldrx() + orig_pldec: msgops.MsgDec = orig_plrx.pld_dec + + try: + with msgops.limit_plds( + spec=__pld_spec__, + ) as debug_dec: + assert debug_dec is msgops.current_pldrx().pld_dec + log.runtime( + 'Applied `.devx._debug` pld-spec\n\n' + f'{debug_dec}\n' + ) + yield debug_dec + + finally: + assert ( + (plrx := msgops.current_pldrx()) is orig_plrx + and + plrx.pld_dec is orig_pldec + ) + log.runtime( + 'Reverted to previous pld-spec\n\n' + f'{orig_pldec}\n' + ) + +# TODO: add this formatter to `.devx.pformat()`! +def pformat_cs( + cs: CancelScope, + var_name: str = 'cs', +) -> str: + return ( + f'{var_name}: {cs}\n' + f'{var_name}.cancel_called = {cs.cancel_called}\n' + f'{var_name}.cancelled_caught = {cs.cancelled_caught}\n' + f'{var_name}._cancel_status = {cs._cancel_status}\n' + f'{var_name}.shield = {cs.shield}\n' ) -async def wait_for_parent_stdin_hijack( +async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], - task_status: TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED + task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED ): ''' - Connect to the root actor via a ``Context`` and invoke a task which - locks a root-local TTY lock: ``lock_tty_for_child()``; this func - should be called in a new task from a child actor **and never the - root*. + Connect to the root actor of this process tree and RPC-invoke + a task which acquires a std-streams global `Lock`: a actor tree + global mutex which prevents other subactors from entering + a `PdbREPL` at the same time as any other. - This function is used by any sub-actor to acquire mutex access to - the ``pdb`` REPL and thus the root's TTY for interactive debugging - (see below inside ``pause()``). It can be used to ensure that - an intermediate nursery-owning actor does not clobber its children - if they are in debug (see below inside - ``maybe_wait_for_debugger()``). + The actual `Lock` singleton exists ONLY in the root actor's + memory and does nothing more then set process-tree global state. + The actual `PdbREPL` interaction is completely isolated to each + sub-actor and with the `Lock` merely providing the multi-process + syncing mechanism to avoid any subactor (or the root itself) from + entering the REPL at the same time. ''' - from .._discovery import get_root + # TODO: likely we can implement this mutex more generally as + # a `._sync.Lock`? + # -[ ] simply add the wrapping needed for the debugger specifics? + # - the `__pld_spec__` impl and maybe better APIs for the client + # vs. server side state tracking? (`Lock` + `DebugStatus`) + # -[ ] for eg. `mp` has a multi-proc lock via the manager + # - https://docs.python.org/3.8/library/multiprocessing.html#synchronization-primitives + # -[ ] technically we need a `RLock` since re-acquire should be a noop + # - https://docs.python.org/3.8/library/multiprocessing.html#multiprocessing.RLock + DebugStatus.req_finished = trio.Event() + try: + from tractor._discovery import get_root + with ( + # NOTE: we need this to ensure that this task exits + # BEFORE the REPl instance raises an error like + # `bdb.BdbQuit` directly, OW you get a trio cs stack + # corruption! + # Further, the since this task is spawned inside the + # `Context._scope_nursery: trio.Nursery`, once an RPC + # task errors that cs is cancel_called and so if we want + # to debug the TPC task that failed we need to shield + # against that expected `.cancel()` call and instead + # expect all of the `PdbREPL`.set_[continue/quit/]()` + # methods to unblock this task by setting the + # `.repl_release: # trio.Event`. + trio.CancelScope(shield=True) as req_cs, - with ( - trio.CancelScope(shield=True) as cs, - apply_debug_codec(), - ): - DebugStatus.req_cs = cs - try: - # TODO: merge into sync async with ? - async with get_root() as portal: - # this syncs to child's ``Context.started()`` call. - async with portal.open_context( - lock_tty_for_child, - subactor_uid=actor_uid, - subactor_task_uid=task_uid, + # NOTE: set it here in the locker request task bc it's + # possible for multiple such requests for the lock in any + # single sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered by the requesting + # application task who called + # `.pause()`/`.post_mortem()`. + # + # SO, applying the pld-spec here means it is only applied to + # this IPC-ctx request task, NOT any other task(s) + # including the one that actually enters the REPL. This + # is oc desired bc ow the debugged task will msg-type-error. + # + apply_debug_pldec() as debug_dec, + ): + log.critical( + 'Request cancel-scope is:\n\n' + f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' - ) as (ctx, resp): - log.pdb( - 'Subactor locked TTY with msg\n\n' - f'{resp}\n' - ) - assert resp.subactor_uid == actor_uid - assert resp.cid + ) + DebugStatus.req_cs = req_cs + try: + # TODO: merge into single async with ? + async with get_root() as portal: - async with ctx.open_stream() as stream: - try: # to unblock local caller + async with portal.open_context( + lock_tty_for_child, + subactor_task_uid=task_uid, + ) as (ctx, status): + + DebugStatus.req_ctx = ctx + + from tractor.msg import ( + _ops as msgops, + ) + assert ( + msgops.current_pldrx().pld_dec is debug_dec + ) + log.debug( + 'Subactor locked TTY with msg\n\n' + f'{status}\n' + ) + + # mk_pdb().set_trace() + assert status.subactor_uid == actor_uid + assert status.cid + + # set last rxed lock dialog status. + DebugStatus.lock_status = status + + async with ctx.open_stream() as stream: assert DebugStatus.repl_release - task_status.started(cs) + task_status.started(ctx) - # wait for local task to exit and - # release the REPL + # wait for local task to exit its + # `PdbREPL.interaction()`, call + # `DebugStatus.release()` and then + # unblock here. await DebugStatus.repl_release.wait() - - finally: await stream.send( LockRelease( subactor_uid=actor_uid, - cid=resp.cid, + cid=status.cid, ) ) - # sync with callee termination - status: LockStatus = await ctx.result() - assert not status.locked + # sync with child-side root locker task + # completion + status: LockStatus = await ctx.result() + assert not status.locked + DebugStatus.lock_status = status - log.pdb( - 'TTY lock was released for subactor with msg\n\n' - f'{status}\n\n' - 'Exitting {ctx.side!r} side locking of locking ctx' + log.pdb( + 'TTY lock was released for subactor with msg\n\n' + f'{status}\n\n' + f'Exitting {ctx.side!r}-side of locking ctx' + ) + + except ( + tractor.ContextCancelled, + trio.Cancelled, + ): + log.exception( + 'Debug lock request CANCELLED?\n\n' + f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' + f'{pformat_cs(ctx._scope, var_name="ctx._scope")}\n\n' + f'{ctx}' ) + raise - except ContextCancelled: - log.warning('Root actor cancelled debug lock') - raise + except ( + BaseException, + ): + log.exception( + 'Failed during root TTY-lock dialog?\n' + f'{ctx}\n' - finally: - DebugStatus.repl_task = None - log.debug('Exiting debugger TTY lock request func from child') + f'Cancelling IPC ctx!\n' + ) + await ctx.cancel() + raise - log.cancel('Reverting SIGINT handler!') - DebugStatus.unshield_sigint() + except ( + tractor.ContextCancelled, + trio.Cancelled, + ): + log.cancel( + 'Debug lock request CANCELLED?\n' + f'{ctx}\n' + ) + raise + + except BaseException: + log.exception('Errored during root TTY-lock dialog?') + raise + + finally: + log.debug('Exiting debugger TTY lock request func from child') + # signal request task exit + DebugStatus.req_finished.set() - -def mk_mpdb() -> PdbREPL: +def mk_pdb() -> PdbREPL: ''' - Deliver a new `PdbREPL`: a multi-process safe `pdbp` - REPL using the magic of SC! + Deliver a new `PdbREPL`: a multi-process safe `pdbp.Pdb`-variant + using the magic of `tractor`'s SC-safe IPC. + + B) Our `pdb.Pdb` subtype accomplishes multi-process safe debugging by: - - mutexing access to the root process' TTY & stdstreams - via an IPC managed `Lock` singleton per process tree. + - mutexing access to the root process' std-streams (& thus parent + process TTY) via an IPC managed `Lock` singleton per + actor-process tree. - - temporarily overriding any subactor's SIGINT handler to shield during - live REPL sessions in sub-actors such that cancellation is - never (mistakenly) triggered by a ctrl-c and instead only - by either explicit requests in the runtime or + - temporarily overriding any subactor's SIGINT handler to shield + during live REPL sessions in sub-actors such that cancellation + is never (mistakenly) triggered by a ctrl-c and instead only by + explicit runtime API requests or after the + `pdb.Pdb.interaction()` call has returned. + + FURTHER, the `pdbp.Pdb` instance is configured to be `trio` + "compatible" from a SIGINT handling perspective; we mask out + the default `pdb` handler and instead apply `trio`s default + which mostly addresses all issues described in: + + - https://github.com/python-trio/trio/issues/1155 + + The instance returned from this factory should always be + preferred over the default `pdb[p].set_trace()` whenever using + a `pdb` REPL inside a `trio` based runtime. ''' pdb = PdbREPL() - # Always shield out SIGINTs for subactors when REPL is active. - # - # XXX detect whether we're running from a non-main thread - # in which case schedule the SIGINT shielding override - # to in the main thread. - # https://docs.python.org/3/library/signal.html#signals-and-threads - DebugStatus.shield_sigint() - # XXX: These are the important flags mentioned in # https://github.com/python-trio/trio/issues/1155 # which resolve the traceback spews to console. pdb.allow_kbdint = True pdb.nosigint = True - return pdb +def any_connected_locker_child() -> bool: + ''' + Predicate to determine if a reported child subactor in debug + is actually connected. + + Useful to detect stale `Lock` requests after IPC failure. + + ''' + actor: Actor = current_actor() + + if not is_root_process(): + raise RuntimeError('This is a root-actor only API!') + + if ( + (ctx := Lock.ctx_in_debug) + and + (uid_in_debug := ctx.chan.uid) + ): + chans: list[tractor.Channel] = actor._peers.get( + tuple(uid_in_debug) + ) + if chans: + return any( + chan.connected() + for chan in chans + ) + + return False + + def shield_sigint_handler( signum: int, frame: 'frame', # type: ignore # noqa @@ -938,10 +1139,7 @@ def shield_sigint_handler( ''' __tracebackhide__: bool = True - uid_in_debug: tuple[str, str]|None = Lock.global_actor_in_debug - actor: Actor = current_actor() - case_handled: bool = False def do_cancel(): # If we haven't tried to cancel the runtime then do that instead @@ -956,28 +1154,8 @@ def shield_sigint_handler( else: raise KeyboardInterrupt - # try to see if the supposed (sub)actor in debug still - # has an active connection to *this* actor, and if not - # it's likely they aren't using the TTY lock / debugger - # and we should propagate SIGINT normally. - any_connected: bool = False - if uid_in_debug is not None: - chans: list[tractor.Channel] = actor._peers.get( - tuple(uid_in_debug) - ) - if chans: - any_connected = any(chan.connected() for chan in chans) - if not any_connected: - log.warning( - 'A global actor reported to be in debug ' - 'but no connection exists for this child!?\n' - f'subactor_uid: {uid_in_debug}\n\n' - 'Allowing SIGINT propagation..' - ) - return do_cancel() - # only set in the actor actually running the REPL - repl: PdbREPL|None = Lock.repl + repl: PdbREPL|None = DebugStatus.repl # TODO: maybe we should flatten out all these cases using # a match/case? @@ -985,98 +1163,102 @@ def shield_sigint_handler( # root actor branch that reports whether or not a child # has locked debugger. if is_root_process(): - lock_cs: trio.CancelScope = Lock.get_locking_task_cs() + # try to see if the supposed (sub)actor in debug still + # has an active connection to *this* actor, and if not + # it's likely they aren't using the TTY lock / debugger + # and we should propagate SIGINT normally. + any_connected: bool = any_connected_locker_child() + # if not any_connected: + # return do_cancel() - log.warning( + problem = ( f'root {actor.uid} handling SIGINT\n' f'any_connected: {any_connected}\n\n' f'{Lock.repr()}\n' ) - maybe_stale_lock_cs: bool = ( - lock_cs is not None - # and not lock_cs.cancel_called - and uid_in_debug is None - ) - if maybe_stale_lock_cs: - log.warning( - 'Stale `Lock._locking_task_cs: CancelScope` DETECTED?\n' - f'|_{lock_cs}\n\n' - ) - lock_cs.cancel() - - if uid_in_debug: # "someone" is (ostensibly) using debug `Lock` + if ( + (ctx := Lock.ctx_in_debug) + and + (uid_in_debug := ctx.chan.uid) # "someone" is (ostensibly) using debug `Lock` + ): name_in_debug: str = uid_in_debug[0] - if ( - not repl # but it's NOT us, the root actor. - ): - # sanity: since no repl ref is set, we def shouldn't - # be the lock owner! - assert name_in_debug != 'root' + assert not repl + # if not repl: # but it's NOT us, the root actor. + # sanity: since no repl ref is set, we def shouldn't + # be the lock owner! + assert name_in_debug != 'root' + # IDEAL CASE: child has REPL as expected + if any_connected: # there are subactors we can contact # XXX: only if there is an existing connection to the # (sub-)actor in debug do we ignore SIGINT in this # parent! Otherwise we may hang waiting for an actor # which has already terminated to unlock. - if any_connected: # there are subactors we can contact - # NOTE: don't emit this with `.pdb()` level in - # root without a higher level. - log.debug( - f'Ignoring SIGINT while debug REPL in use by child\n' - f'subactor: {uid_in_debug}\n' - ) - # returns here minus tail logic - case_handled = True - - else: - message: str = ( - f'Ignoring SIGINT while debug REPL SUPPOSEDLY in use by child\n' - f'subactor: {uid_in_debug}\n\n' - f'BUT, no child actors are contactable!?!?\n\n' - - # f'Reverting to def `trio` SIGINT handler..\n' - ) - - if maybe_stale_lock_cs: - lock_cs.cancel() - message += ( - 'Maybe `Lock._locking_task_cs: CancelScope` is stale?\n' - f'|_{lock_cs}\n\n' - ) - - log.warning(message) - # Lock.unshield_sigint() - DebugStatus.unshield_sigint() - case_handled = True + # + # NOTE: don't emit this with `.pdb()` level in + # root without a higher level. + log.runtime( + f'Ignoring SIGINT while debug REPL in use by child ' + f'{uid_in_debug}\n' + ) + problem = None else: - assert name_in_debug == 'root' # we are the registered locker - assert repl # we have a pdb REPL engaged - log.pdb( - f'Ignoring SIGINT while debug REPL in use\n' - f'root actor: {uid_in_debug}\n' + problem += ( + '\n' + f'A `pdb` REPL is SUPPOSEDLY in use by child {uid_in_debug}\n' + f'BUT, no child actors are IPC contactable!?!?\n' ) - # returns here minus tail logic - case_handled = True - # root actor still has this SIGINT handler active without - # an actor using the `Lock` (a bug state) ?? - # => so immediately cancel any stale lock cs and revert - # the handler! + # IDEAL CASE: root has REPL as expected else: - # XXX revert back to ``trio`` handler since this handler shouldn't - # be enabled withtout an actor using a debug REPL! - log.warning( - 'Ignoring SIGINT in root actor but no actor using a `pdb` REPL?\n' - 'Reverting SIGINT handler to `trio` default!\n' - ) + # root actor still has this SIGINT handler active without + # an actor using the `Lock` (a bug state) ?? + # => so immediately cancel any stale lock cs and revert + # the handler! + if not repl: + # TODO: WHEN should we revert back to ``trio`` + # handler if this one is stale? + # -[ ] maybe after a counts work of ctl-c mashes? + # -[ ] use a state var like `stale_handler: bool`? + problem += ( + '\n' + 'No subactor is using a `pdb` REPL according `Lock.ctx_in_debug`?\n' + 'BUT, the root should be using it, WHY this handler ??\n' + ) + else: + log.pdb( + 'Ignoring SIGINT while pdb REPL in use by root actor..\n' + ) + problem = None + # XXX if one is set it means we ARE NOT operating an ideal + # case where a child subactor or us (the root) has the + # lock without any other detected problems. + if problem: + + # detect, report and maybe clear a stale lock request + # cancel scope. + lock_cs: trio.CancelScope = Lock.get_locking_task_cs() + maybe_stale_lock_cs: bool = ( + lock_cs is not None + and not lock_cs.cancel_called + ) if maybe_stale_lock_cs: + problem += ( + '\n' + 'Stale `Lock.ctx_in_debug._scope: CancelScope` detected?\n' + f'{Lock.ctx_in_debug}\n\n' + + '-> Calling ctx._scope.cancel()!\n' + ) lock_cs.cancel() - DebugStatus.unshield_sigint() - case_handled = True + # TODO: wen do we actually want/need this, see above. + # DebugStatus.unshield_sigint() + log.warning(problem) # child actor that has locked the debugger elif not is_root_process(): @@ -1092,14 +1274,13 @@ def shield_sigint_handler( not rent_chan.connected() ): log.warning( - 'A global sub-actor reported to be in debug ' + 'This sub-actor thinks it is debugging ' 'but it has no connection to its parent ??\n' - f'{uid_in_debug}\n' + f'{actor.uid}\n' 'Allowing SIGINT propagation..' ) DebugStatus.unshield_sigint() # do_cancel() - case_handled = True task: str|None = DebugStatus.repl_task if ( @@ -1107,13 +1288,11 @@ def shield_sigint_handler( and repl ): - # if repl: log.pdb( f'Ignoring SIGINT while local task using debug REPL\n' f'|_{task}\n' f' |_{repl}\n' ) - case_handled = True else: msg: str = ( 'SIGINT shield handler still active BUT, \n\n' @@ -1136,7 +1315,6 @@ def shield_sigint_handler( 'Reverting handler to `trio` default!\n' ) DebugStatus.unshield_sigint() - case_handled = True # XXX ensure that the reverted-to-handler actually is # able to rx what should have been **this** KBI ;) @@ -1156,7 +1334,7 @@ def shield_sigint_handler( # we want to alert the user that more input is expect since # nothing has been done dur to ignoring sigint. if ( - repl # only when this actor has a REPL engaged + repl # only when current actor has a REPL engaged ): # XXX: yah, mega hack, but how else do we catch this madness XD if repl.shname == 'xonsh': @@ -1174,72 +1352,19 @@ def shield_sigint_handler( # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040 # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py - if not case_handled: - log.critical( - f'{actor.uid} UNHANDLED SIGINT !?!?\n' - # TODO: pprint for `Lock`? - ) + # XXX only for tracing this handler + # log.warning('exiting SIGINT') _pause_msg: str = 'Attaching to pdb REPL in actor' -def _set_trace( - actor: tractor.Actor|None = None, - pdb: PdbREPL|None = None, - shield: bool = False, - - extra_frames_up_when_async: int = 1, - hide_tb: bool = True, -): - __tracebackhide__: bool = hide_tb - - actor: tractor.Actor = ( - actor - or - current_actor() - ) - - # always start 1 level up from THIS in user code. - frame: FrameType|None - if frame := sys._getframe(): - frame: FrameType = frame.f_back # type: ignore - - if ( - frame - and ( - pdb - and actor is not None - ) - ): - # TODO: maybe print the actor supervion tree up to the - # root here? Bo - - log.pdb( - f'{_pause_msg}\n' - '|\n' - # TODO: make an `Actor.__repr()__` - f'|_ {current_task()} @ {actor.uid}\n' - ) - # no f!#$&* idea, but when we're in async land - # we need 2x frames up? - for i in range(extra_frames_up_when_async): - frame: FrameType = frame.f_back - log.debug( - f'Going up frame_{i}:\n|_{frame}\n' - ) - - # engage ze REPL - # B~() - pdb.set_trace(frame=frame) - - async def _pause( - debug_func: Callable = _set_trace, + debug_func: Callable|None, # NOTE: must be passed in the `.pause_from_sync()` case! - pdb: PdbREPL|None = None, + repl: PdbREPL|None = None, # TODO: allow caller to pause despite task cancellation, # exactly the same as wrapping with: @@ -1249,11 +1374,15 @@ async def _pause( # is always show in the debugger on entry.. and there seems to # be no way to override it?.. # - shield: bool = False, + # shield: bool = False, hide_tb: bool = True, - extra_frames_up_when_async: int = 4, - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED + # bc, `debug_func()`, `_enter_repl_sync()` and `_pause()` + # extra_frames_up_when_async: int = 3, + + task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, + + **debug_func_kwargs, ) -> None: ''' @@ -1277,8 +1406,9 @@ async def _pause( 'for infected `asyncio` mode!' ) from rte - # task_name: str = task.name - + # TODO: this should be created as part of `DebugRequest()` init + # which should instead be a one-shot-use singleton much like + # the `PdbREPL`. if ( not DebugStatus.repl_release or @@ -1289,43 +1419,65 @@ async def _pause( if debug_func is not None: debug_func = partial(debug_func) - if pdb is None: - pdb: PdbREPL = mk_mpdb() + repl: PdbREPL = repl or mk_pdb() + # TODO: maybe make this a `PdbREPL` method or mod func? + # -[ ] factor out better, main reason for it is common logic for + # both root and sub repl entry def _enter_repl_sync( debug_func: Callable, ) -> None: __tracebackhide__: bool = hide_tb - try: - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - # - if debug_func is None: - task_status.started(Lock) - else: - # block here one (at the appropriate frame *up*) where - # ``breakpoint()`` was awaited and begin handling stdio. - log.debug('Entering sync world of the `pdb` REPL..') - try: - # log.critical( - # f'stack len: {len(pdb.stack)}\n' - # ) - debug_func( - actor, - pdb, - extra_frames_up_when_async=extra_frames_up_when_async, - shield=shield, - ) - except BaseException: - log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) - raise - except bdb.BdbQuit: - Lock.release() - raise + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + # + if debug_func is None: + task_status.started(DebugStatus) + else: + # block here one (at the appropriate frame *up*) where + # ``breakpoint()`` was awaited and begin handling stdio. + log.debug('Entering sync world of the `pdb` REPL..') + + # XXX used by the SIGINT handler to check if + # THIS actor is in REPL interaction + try: + # TODO: move this into a `open_debug_request()` @acm? + # -[ ] prolly makes the most send to do the request + # task spawn as part of an `@acm` api which + # delivers the `DebugRequest` instance and ensures + # encapsing all the pld-spec and debug-nursery? + # + # set local actor task to avoid recurrent + # entries/requests from the same local task + # (to the root process). + DebugStatus.repl_task = task + DebugStatus.repl = repl + DebugStatus.shield_sigint() + + # enter `PdbREPL` specific method + debug_func( + repl=repl, + hide_tb=hide_tb, + **debug_func_kwargs, + ) + except trio.Cancelled: + log.exception( + 'Cancelled during invoke of internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # NOTE: DON'T release lock yet + raise + + except BaseException: + log.exception( + 'Failed to invoke internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # NOTE: OW this is ONLY called from the + # `.set_continue/next` hooks! + DebugStatus.release() + raise try: if is_root_process(): @@ -1333,7 +1485,14 @@ async def _pause( # we also wait in the root-parent for any child that # may have the tty locked prior # TODO: wait, what about multiple root tasks acquiring it though? - if Lock.global_actor_in_debug == actor.uid: + ctx: Context|None = Lock.ctx_in_debug + if ( + ctx is None + and + DebugStatus.repl + and + DebugStatus.repl_task is task + ): # re-entrant root process already has it: noop. log.warning( f'{task.name}@{actor.uid} already has TTY lock\n' @@ -1347,8 +1506,8 @@ async def _pause( # callbacks. Can't think of a nicer way then this atm. if Lock._debug_lock.locked(): log.warning( - 'attempting to shield-acquire active TTY lock' - f' owned by {Lock.global_actor_in_debug}' + 'attempting to shield-acquire active TTY lock owned by\n' + f'{ctx}' ) # must shield here to avoid hitting a ``Cancelled`` and @@ -1359,10 +1518,6 @@ async def _pause( # may be cancelled await Lock._debug_lock.acquire() - Lock.global_actor_in_debug = actor.uid - DebugStatus.repl_task = task - DebugStatus.repl = Lock.repl = pdb - # enter REPL from root, no TTY locking IPC ctx necessary _enter_repl_sync(debug_func) return # next branch is mutex and for subactors @@ -1405,10 +1560,6 @@ async def _pause( await DebugStatus.repl_release.wait() await trio.sleep(0.1) - # mark local actor as "in debug mode" to avoid recurrent - # entries/requests to the root process - DebugStatus.repl_task = task - # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without # being restricted by the scope of a new task nursery. @@ -1420,88 +1571,106 @@ async def _pause( # actor._service_n.cancel_scope.shield = shield # ``` # but not entirely sure if that's a sane way to implement it? - - # NOTE: MUST it here bc multiple tasks are spawned by any - # one sub-actor AND there will be a race between when the - # root locking task delivers the `Started(pld=LockStatus)` - # and when the REPL is actually entered here. SO ensure - # the codec is set before either are run! - # - with ( - # _codec.limit_msg_spec( - # payload_spec=__msg_spec__, - # ) as debug_codec, - trio.CancelScope(shield=shield), - ): - # async with trio.open_nursery() as tn: - # tn.cancel_scope.shield = True - try: - # cs: trio.CancelScope = await tn.start( - cs: trio.CancelScope = await actor._service_n.start( - wait_for_parent_stdin_hijack, - actor.uid, - (task.name, id(task)), - ) - # our locker task should be the one in ctx - # with the root actor - assert DebugStatus.req_cs is cs - - # XXX used by the SIGINT handler to check if - # THIS actor is in REPL interaction - Lock.repl = pdb - - except RuntimeError: - Lock.release() - - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. - return - - raise + try: + # NOTE spawn the stdio locker request task inside the + # current `Context._scope_nursery` to entsure that + # the request never can outlive the task's (parent) + # lifetime. + curr_ctx: Context = current_ipc_ctx() + # TODO: see `_errors_relayed_via_ipc()` where we + # should dynamically open a `debug_tn` for use here, + # BUT it needs to be outside the normal error + # catching and `_maybe_enter_debugger()` call! + # ctx: Context = await curr_ctx._debug_tn.start( + ctx: Context = await actor._service_n.start( + request_root_stdio_lock, + actor.uid, + (task.name, id(task)), # task uuid (effectively) + ) + # our locker task should be the one in ctx + # with the root actor + assert ( + ctx + is + DebugStatus.req_ctx + is not + curr_ctx + ) # enter REPL + _enter_repl_sync(debug_func) - try: - _enter_repl_sync(debug_func) - finally: - DebugStatus.unshield_sigint() + except RuntimeError: + if actor._cancel_called: + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + return + + raise + + # TODO: prolly factor this plus the similar block from + # `_enter_repl_sync()` into a common @cm? + except BaseException as repl_err: + if isinstance(repl_err, bdb.BdbQuit): + log.devx( + 'REPL for pdb was quit!\n' + ) + else: + log.exception( + 'Failed to engage debugger via `_pause()` ??\n' + ) + + DebugStatus.release() + # sanity checks for ^ on request/status teardown + assert DebugStatus.repl is None + assert DebugStatus.repl_task is None + req_ctx: Context = DebugStatus.req_ctx + if req_ctx: + assert req_ctx._scope.cancel_called - except BaseException: - log.exception( - 'Failed to engage debugger via `_pause()` ??\n' - ) raise -# XXX: apparently we can't do this without showing this frame -# in the backtrace on first entry to the REPL? Seems like an odd -# behaviour that should have been fixed by now. This is also why -# we scrapped all the @cm approaches that were tried previously. -# finally: -# __tracebackhide__ = True -# # frame = sys._getframe() -# # last_f = frame.f_back -# # last_f.f_globals['__tracebackhide__'] = True -# # signal.signal = pdbp.hideframe(signal.signal) +def _set_trace( + repl: PdbREPL, # passed by `_pause()` + hide_tb: bool, + + # partial-ed in by `.pause()` + api_frame: FrameType, +): + __tracebackhide__: bool = hide_tb + actor: tractor.Actor = current_actor() + + # else: + # TODO: maybe print the actor supervion tree up to the + # root here? Bo + log.pdb( + f'{_pause_msg}\n' + '|\n' + # TODO: make an `Actor.__repr()__` + f'|_ {current_task()} @ {actor.uid}\n' + ) + # presuming the caller passed in the "api frame" + # (the last frame before user code - like `.pause()`) + # then we only step up one frame to where the user + # called our API. + caller_frame: FrameType = api_frame.f_back # type: ignore + + # engage ze REPL + # B~() + repl.set_trace(frame=caller_frame) async def pause( + *, + hide_tb: bool = True, + api_frame: FrameType|None = None, - debug_func: Callable|None = _set_trace, - - # TODO: allow caller to pause despite task cancellation, - # exactly the same as wrapping with: - # with CancelScope(shield=True): - # await pause() - # => the REMAINING ISSUE is that the scope's .__exit__() frame - # is always show in the debugger on entry.. and there seems to - # be no way to override it?.. - # + # TODO: figure out how to still make this work: + # -[ ] pass it direct to `_pause()`? + # -[ ] use it to set the `debug_nursery.cancel_scope.shield` shield: bool = False, - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, - **_pause_kwargs, ) -> None: @@ -1522,19 +1691,37 @@ async def pause( ''' __tracebackhide__: bool = True - with trio.CancelScope( - shield=shield, - ) as cs: + # always start 1 level up from THIS in user code since normally + # `tractor.pause()` is called explicitly by use-app code thus + # making it the highest up @api_frame. + api_frame: FrameType = api_frame or inspect.currentframe() + # XXX TODO: this was causing cs-stack corruption in trio due to + # usage within the `Context._scope_nursery` (which won't work + # based on scoping of it versus call to `_maybe_enter_debugger()` + # from `._rpc._invoke()`) + # with trio.CancelScope( + # shield=shield, + # ) as cs: # NOTE: so the caller can always manually cancel even # if shielded! - task_status.started(cs) - return await _pause( - debug_func=debug_func, - shield=shield, - task_status=task_status, - **_pause_kwargs - ) + # task_status.started(cs) + # log.critical( + # '`.pause() cancel-scope is:\n\n' + # f'{pformat_cs(cs, var_name="pause_cs")}\n\n' + # ) + await _pause( + debug_func=partial( + _set_trace, + api_frame=api_frame, + ), + + # task_status=task_status, + **_pause_kwargs + ) + # XXX avoid cs stack corruption when `PdbREPL.interaction()` + # raises `BdbQuit`. + # await DebugStatus.req_finished.wait() _gb_mod: None|ModuleType|False = None @@ -1626,7 +1813,7 @@ def pause_from_sync( # raises on not-found by default greenback: ModuleType = maybe_import_greenback() - mdb: PdbREPL = mk_mpdb() + mdb: PdbREPL = mk_pdb() # run async task which will lock out the root proc's TTY. if not Lock.is_main_trio_thread(): @@ -1664,10 +1851,10 @@ def pause_from_sync( # entering the global ``breakpoint()`` built-in from sync # code? _set_trace( + api_frame=inspect.current_frame(), actor=actor, pdb=mdb, hide_tb=hide_tb, - extra_frames_up_when_async=1, # TODO? will we ever need it? # -> the gb._await() won't be affected by cancellation? @@ -1691,8 +1878,8 @@ async def breakpoint(**kwargs): ) __tracebackhide__: bool = True await pause( - # extra_frames_up_when_async=6, - **kwargs + api_frame=inspect.currentframe(), + **kwargs, ) @@ -1702,12 +1889,15 @@ _crash_msg: str = ( def _post_mortem( - actor: tractor.Actor, - pdb: PdbREPL, - shield: bool = False, + # provided and passed by `_pause()` + repl: PdbREPL, - # only for compat with `._set_trace()`.. - extra_frames_up_when_async=1, + # XXX all `partial`-ed in by `post_mortem()` below! + tb: TracebackType, + api_frame: FrameType, + + shield: bool = False, + hide_tb: bool = False, ) -> None: ''' @@ -1715,6 +1905,9 @@ def _post_mortem( debugger instance. ''' + __tracebackhide__: bool = hide_tb + actor: tractor.Actor = current_actor() + # TODO: print the actor supervion tree up to the root # here! Bo log.pdb( @@ -1728,24 +1921,64 @@ def _post_mortem( # f'|_ {current_task()} @ {actor.name}\n' ) - # TODO: only replacing this to add the + # NOTE only replacing this from `pdbp.xpm()` to add the # `end=''` to the print XD - # pdbp.xpm(Pdb=lambda: pdb) - info = sys.exc_info() print(traceback.format_exc(), end='') - pdbp.post_mortem( - t=info[2], - Pdb=lambda: pdb, + + caller_frame: FrameType = api_frame.f_back + + # NOTE: see the impl details of followings to understand usage: + # - `pdbp.post_mortem()` + # - `pdbp.xps()` + # - `bdb.interaction()` + repl.reset() + repl.interaction( + frame=caller_frame, + # frame=None, + traceback=tb, ) -post_mortem = partial( - pause, - debug_func=_post_mortem, -) +async def post_mortem( + *, + tb: TracebackType|None = None, + api_frame: FrameType|None = None, + hide_tb: bool = False, + + # TODO: support shield here just like in `pause()`? + # shield: bool = False, + + **_pause_kwargs, + +) -> None: + __tracebackhide__: bool = hide_tb + + tb: TracebackType = tb or sys.exc_info()[2] + + # TODO: do upward stack scan for highest @api_frame and + # use its parent frame as the expected user-app code + # interact point. + api_frame: FrameType = api_frame or inspect.currentframe() + + await _pause( + debug_func=partial( + _post_mortem, + api_frame=api_frame, + tb=tb, + ), + hide_tb=hide_tb, + **_pause_kwargs + ) -async def _maybe_enter_pm(err): +async def _maybe_enter_pm( + err: BaseException, + *, + tb: TracebackType|None = None, + api_frame: FrameType|None = None, + hide_tb: bool = False, +): + from tractor._exceptions import is_multi_cancelled if ( debug_mode() @@ -1764,12 +1997,13 @@ async def _maybe_enter_pm(err): # might be a simpler check we can do? and not is_multi_cancelled(err) ): - log.debug("Actor crashed, entering debug mode") - try: - await post_mortem() - finally: - Lock.release() - return True + api_frame: FrameType = api_frame or inspect.currentframe() + tb: TracebackType = tb or sys.exc_info()[2] + await post_mortem( + api_frame=api_frame, + tb=tb, + ) + return True else: return False @@ -1796,12 +2030,12 @@ async def acquire_debug_lock( return async with trio.open_nursery() as n: - cs = await n.start( - wait_for_parent_stdin_hijack, + ctx: Context = await n.start( + request_root_stdio_lock, subactor_uid, ) - yield cs - cs.cancel() + yield ctx + ctx.cancel() async def maybe_wait_for_debugger( @@ -1830,8 +2064,8 @@ async def maybe_wait_for_debugger( # will make the pdb repl unusable. # Instead try to wait for pdb to be released before # tearing down. - in_debug: tuple[str, str]|None = Lock.global_actor_in_debug - + ctx_in_debug: Context|None = Lock.ctx_in_debug + in_debug: tuple[str, str]|None = ctx_in_debug.chan.uid if ctx_in_debug else None if in_debug == current_actor().uid: log.debug( msg @@ -1864,17 +2098,26 @@ async def maybe_wait_for_debugger( and not Lock.no_remote_has_tty.is_set() and in_debug is not None ): - log.pdb( + + # caller_frame_info: str = pformat_caller_frame() + log.debug( msg + - '\nRoot is waiting on tty lock to release..\n' + '\nRoot is waiting on tty lock to release from\n\n' + # f'{caller_frame_info}\n' ) + + if not any_connected_locker_child(): + Lock.get_locking_task_cs().cancel() + with trio.CancelScope(shield=True): await Lock.no_remote_has_tty.wait() + log.pdb( - f'Child subactor released debug lock\n' + f'Subactor released debug lock\n' f'|_{in_debug}\n' ) + break # is no subactor locking debugger currently? if ( @@ -1900,7 +2143,7 @@ async def maybe_wait_for_debugger( f'poll step: {istep}\n' f'poll delya: {poll_delay}' ) - with trio.CancelScope(shield=True): + with CancelScope(shield=True): await trio.sleep(poll_delay) continue -- 2.34.1 From 17cf3d45ba911b9cef0dc465810644b0a6b56d22 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 13:30:15 -0400 Subject: [PATCH 093/305] Move `_debug.pformat_cs()` into `devx.pformat` --- tractor/devx/_debug.py | 19 ++++--------------- tractor/devx/pformat.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 15 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 0567e42a..da322407 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -73,7 +73,10 @@ from tractor._state import ( debug_mode, current_ipc_ctx, ) -# from .pformat import pformat_caller_frame +from .pformat import ( + # pformat_caller_frame, + pformat_cs, +) if TYPE_CHECKING: from tractor._ipc import Channel @@ -868,20 +871,6 @@ def apply_debug_pldec() -> _codec.MsgCodec: f'{orig_pldec}\n' ) -# TODO: add this formatter to `.devx.pformat()`! -def pformat_cs( - cs: CancelScope, - var_name: str = 'cs', -) -> str: - return ( - f'{var_name}: {cs}\n' - f'{var_name}.cancel_called = {cs.cancel_called}\n' - f'{var_name}.cancelled_caught = {cs.cancelled_caught}\n' - f'{var_name}._cancel_status = {cs._cancel_status}\n' - f'{var_name}.shield = {cs.shield}\n' - ) - - async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], diff --git a/tractor/devx/pformat.py b/tractor/devx/pformat.py index 0b35feee..5fe9bc62 100644 --- a/tractor/devx/pformat.py +++ b/tractor/devx/pformat.py @@ -22,6 +22,8 @@ Mostly handy for logging and exception message content. import textwrap import traceback +from trio import CancelScope + def add_div( message: str, @@ -133,3 +135,34 @@ def pformat_caller_frame( indent='', ) return tb_str + + +def pformat_cs( + cs: CancelScope, + var_name: str = 'cs', + field_prefix: str = ' |_', +) -> str: + ''' + Pretty format info about a `trio.CancelScope` including most + of its public state and `._cancel_status`. + + The output can be modified to show a "var name" for the + instance as a field prefix, just a simple str before each + line more or less. + + ''' + + fields: str = textwrap.indent( + ( + f'cancel_called = {cs.cancel_called}\n' + f'cancelled_caught = {cs.cancelled_caught}\n' + f'_cancel_status = {cs._cancel_status}\n' + f'shield = {cs.shield}\n' + ), + prefix=field_prefix, + ) + return ( + f'{var_name}: {cs}\n' + + + fields + ) -- 2.34.1 From 0c57e1a808e4329efdeef0ae5d98e8fa6f36dc4f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 13:35:29 -0400 Subject: [PATCH 094/305] Even moar bitty `Context` refinements - set `._state._ctxvar_Context` just after `StartAck` inside `open_context_from_portal()` so that `current_ipc_ctx()` always works on the 'parent' side. - always set `.canceller` to any `MsgTypeError.src_uid` and otherwise to any maybe-detected `.src_uid` (i.e. for RAEs). - always set `.canceller` to us when we rx a ctxc which reports us as its canceller; this is a sanity check on definite "self cancellation". - adjust `._is_self_cancelled()` logic to only be `True` when `._remote_error` is both a ctxc with a `.canceller` set to us AND when `Context.canceller` is also set to us (since the change above) as a little bit of extra rigor. - fill-in/fix some `.repr_state` edge cases: - merge self-vs.-peer ctxc cases to one block and distinguish via nested `._is_self_cancelled()` check. - set 'errored' for all exception matched cases despite `.canceller`. - add pre-`Return` phase statuses: |_'pre-started' and 'syncing-to-child' depending on side and when `._stream` has not (yet) been set. |_'streaming' and 'streaming-finished' depending on side when `._stream` is set and whether it was stopped/closed. - tweak drainage log-message to use "outcome" instead of "result". - use new `.devx.pformat.pformat_cs()` inside `_maybe_cancel_and_set_remote_error()` but, IFF the log level is at least 'cancel'. --- tractor/_context.py | 200 ++++++++++++++++++++++++++++---------------- 1 file changed, 126 insertions(+), 74 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 20584979..a2adfa42 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -37,8 +37,9 @@ import inspect from pprint import pformat from typing import ( Any, - Callable, AsyncGenerator, + Callable, + Mapping, Type, TYPE_CHECKING, Union, @@ -59,7 +60,10 @@ from ._exceptions import ( pack_from_raise, unpack_error, ) -from .log import get_logger +from .log import ( + get_logger, + at_least_level, +) from .msg import ( Error, MsgType, @@ -83,6 +87,7 @@ from ._streaming import MsgStream from ._state import ( current_actor, debug_mode, + _ctxvar_Context, ) if TYPE_CHECKING: @@ -204,7 +209,7 @@ class Context: # cancelled that the other side is as well, so maybe we should # instead just have a `.canceller` pulled from the # `ContextCancelled`? - _canceller: tuple[str, str] | None = None + _canceller: tuple[str, str]|None = None # NOTE: we try to ensure assignment of a "cancel msg" since # there's always going to be an "underlying reason" that any @@ -384,8 +389,12 @@ class Context: re: BaseException|None = ( remote_error - or self._remote_error + or + self._remote_error ) + # XXX we only report "this context" as self-cancelled + # once we've received a ctxc from our direct-peer task + # (aka we're `.cancel_acked`). if not re: return False @@ -396,10 +405,10 @@ class Context: our_canceller = self.canceller return bool( - isinstance(re, ContextCancelled) + isinstance((ctxc := re), ContextCancelled) and from_uid == self.chan.uid - and re.canceller == our_uid - and our_canceller == from_uid + and ctxc.canceller == our_uid + and our_canceller == our_uid ) @property @@ -619,15 +628,27 @@ class Context: ) self._remote_error: BaseException = error + msgerr: bool = False + # self-cancel (ack) or, # peer propagated remote cancellation. - msgerr: bool = False if isinstance(error, ContextCancelled): + # NOTE in the case error is a ctxc the canceller will + # either be another peer or us. in the case where it's us + # we mark ourself as the canceller of ourselves (a ctx + # "self cancel" from this side's perspective), if instead + # the far end was cancelled by some other (inter-) peer, + # we want to mark our canceller as the actor that was + # cancelled, NOT their reported canceller. IOW in the + # latter case we're cancelled by someone else getting + # cancelled. + if (canc := error.canceller) == self._actor.uid: + whom: str = 'us' + self._canceller = canc + else: + whom = 'a remote peer (not us)' + self._canceller = error.src_uid - whom: str = ( - 'us' if error.canceller == self._actor.uid - else 'a remote peer (not us)' - ) log.cancel( f'IPC context was cancelled by {whom}!\n\n' f'{error}' @@ -635,6 +656,7 @@ class Context: elif isinstance(error, MsgTypeError): msgerr = True + self._canceller = error.src_uid log.error( f'IPC dialog error due to msg-type caused by {self.peer_side!r} side\n\n' f'{error}\n' @@ -642,28 +664,25 @@ class Context: ) else: + # always record the cancelling actor's uid since its + # cancellation state is linked and we want to know + # which process was the cause / requester of the + # cancellation. + maybe_error_src_uid: tuple = getattr( + error, + 'src_uid', + None, + ) + # we mark the source actor as our canceller + self._canceller = maybe_error_src_uid log.error( f'Remote context error:\n\n' # f'{pformat(self)}\n' f'{error}\n' ) - # always record the cancelling actor's uid since its - # cancellation state is linked and we want to know - # which process was the cause / requester of the - # cancellation. - maybe_error_src: tuple = getattr( - error, - 'src_uid', - None, - ) - self._canceller = ( - maybe_error_src - or - # XXX: in the case we get a non-boxed error? - # -> wait but this should never happen right? - self.chan.uid - ) + if self._canceller is None: + log.error('Ctx has no canceller set!?') # Cancel the local `._scope`, catch that # `._scope.cancelled_caught` and re-raise any remote error @@ -707,30 +726,34 @@ class Context: message: str = 'NOT cancelling `Context._scope` !\n\n' fmt_str: str = 'No `self._scope: CancelScope` was set/used ?' - if cs: + if ( + cs + and + at_least_level(log=log, level='cancel') + ): fmt_str: str = self.pformat( extra_fields={ '._is_self_cancelled()': self._is_self_cancelled(), '._cancel_on_msgerr': self._cancel_on_msgerr, - - '._scope': cs, - '._scope.cancel_called': cs.cancel_called, - '._scope.cancelled_caught': cs.cancelled_caught, - '._scope._cancel_status': cs._cancel_status, } ) + from .devx.pformat import pformat_cs + cs_fmt: str = pformat_cs( + cs, + var_name='Context._scope', + ) + fmt_str += ( + '\n' + + + cs_fmt + ) log.cancel( message + fmt_str ) - # TODO: maybe we should also call `._res_scope.cancel()` if it - # exists to support cancelling any drain loop hangs? - # NOTE: this usage actually works here B) - # from .devx._debug import breakpoint - # await breakpoint() - # TODO: add to `Channel`? + # TODO: also add to `Channel`? @property def dst_maddr(self) -> str: chan: Channel = self.chan @@ -1103,7 +1126,8 @@ class Context: f'ctx id: {self.cid}' ) - # TODO: replace all the instances of this!! XD + # TODO: replace all the `._maybe_raise_remote_err()` usage + # with instances of this!! def maybe_raise( self, hide_tb: bool = True, @@ -1114,6 +1138,7 @@ class Context: if re := self._remote_error: return self._maybe_raise_remote_err( re, + hide_tb=hide_tb, **kwargs, ) @@ -1215,7 +1240,6 @@ class Context: # runtime frames from the tb explicitly? # https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement # https://stackoverflow.com/a/24752607 - __tracebackhide__: bool = True raise remote_error # from None # TODO: change to `.wait_for_result()`? @@ -1266,8 +1290,15 @@ class Context: # wait for a final context result/error by "draining" # (by more or less ignoring) any bi-dir-stream "yield" # msgs still in transit from the far end. + # + # XXX NOTE XXX: this call shouldn't really ever raise + # (other then internal error), instead delivering an + # `Error`-msg and that being `.maybe_raise()`-ed below + # since every message should be delivered via the normal + # `._deliver_msg()` route which will appropriately set + # any `.maybe_error`. ( - return_msg, + outcome_msg, drained_msgs, ) = await msgops.drain_to_final_msg( ctx=self, @@ -1285,13 +1316,18 @@ class Context: f'{msg}\n' ) - log.cancel( - 'Ctx drained to final result msgs\n' - f'{return_msg}\n\n' - - f'pre-result drained msgs:\n' - f'{pformat(drained_msgs)}\n' + drained_status: str = ( + 'Ctx drained to final outcome msg\n\n' + f'{outcome_msg}\n' ) + if drained_msgs: + drained_status += ( + '\n' + f'The pre-drained msgs are\n' + f'{pformat(drained_msgs)}\n' + ) + + log.cancel(drained_status) self.maybe_raise( # NOTE: obvi we don't care if we @@ -1322,7 +1358,7 @@ class Context: @property def maybe_error(self) -> BaseException|None: - le: Exception|None = self._local_error + le: BaseException|None = self._local_error re: RemoteActorError|ContextCancelled|None = self._remote_error match (le, re): @@ -1350,7 +1386,7 @@ class Context: # ContextCancelled(canceller=), # ): - error: Exception|None = le or re + error: BaseException|None = le or re if error: return error @@ -1465,52 +1501,63 @@ class Context: ''' merr: Exception|None = self.maybe_error outcome: Unresolved|Exception|Any = self.outcome - + status: str|None = None match ( outcome, merr, ): + # "graceful" ctx cancellation case ( Unresolved, ContextCancelled(), - ) if self.cancel_acked: - status = 'self-cancelled' - - case ( - Unresolved, - ContextCancelled(), - ) if ( - self.canceller - and not self._cancel_called ): - status = 'peer-cancelled' + if self._is_self_cancelled(): + status = 'self-cancelled' + elif ( + self.canceller + and not self._cancel_called + ): + status = 'peer-cancelled' + # (remote) error condition case ( Unresolved, - BaseException(), - ) if self.canceller: + BaseException(), # any error-type + ): status = 'errored' + # result already returned case ( _, # any non-unresolved value None, ) if self._final_result_is_set(): status = 'returned' + # normal operation but still in a pre-`Return`-result + # dialog phase case ( - Unresolved, # noqa (weird.. ruff) - None, + Unresolved, # noqa (ruff, you so weird..) + None, # no (remote) error set ): if stream := self._stream: if stream.closed: status = 'streaming-finished' else: status = 'streaming' + elif self._started_called: status = 'started' - case _: - status = 'unknown!?' + else: + if self.side == 'child': + status = 'pre-started' + else: + status = 'syncing-to-child' + + if status is None: + status = '??unknown??' + # from tractor.devx import mk_pdb + # mk_pdb().set_trace() return status @@ -1741,7 +1788,6 @@ class Context: f'Delivering IPC ctx error from {self.peer_side!r} to {side!r} task\n\n' f'{flow_body}' - f'{pformat(re)}\n' ) self._cancel_msg: dict = msg @@ -2006,6 +2052,7 @@ async def open_context_from_portal( ) assert ctx._remote_func_type == 'context' assert ctx._caller_info + _ctxvar_Context.set(ctx) # XXX NOTE since `._scope` is NOT set BEFORE we retreive the # `Started`-msg any cancellation triggered @@ -2159,7 +2206,7 @@ async def open_context_from_portal( # CASE 2: context was cancelled by local task calling # `.cancel()`, we don't raise and the exit block should - # exit silently. + # finish silently. if ( ctx._cancel_called and @@ -2284,6 +2331,11 @@ async def open_context_from_portal( try: result_or_err: Exception|Any = await ctx.result() except BaseException as berr: + # cancelled before (or maybe during?) final result capture + # if isinstance(trio.Cancelled, berr): + # from .devx import mk_pdb + # mk_pdb.set_trace() + # on normal teardown, if we get some error # raised in `Context.result()` we still want to # save that error on the ctx's state to @@ -2479,12 +2531,12 @@ def mk_context( _caller_info=caller_info, **kwargs, ) - # TODO: we can drop the old placeholder yah? - # ctx._result: int | Any = id(ctx) ctx._result = Unresolved return ctx +# TODO: use the new type-parameters to annotate this in 3.13? +# -[ ] https://peps.python.org/pep-0718/#unknown-types def context(func: Callable) -> Callable: ''' Mark an (async) function as an SC-supervised, inter-`Actor`, @@ -2498,8 +2550,8 @@ def context(func: Callable) -> Callable: # https://github.com/python/mypy/issues/2087#issuecomment-769266912 func._tractor_context_function = True # type: ignore - sig = inspect.signature(func) - params = sig.parameters + sig: inspect.Signature = inspect.signature(func) + params: Mapping = sig.parameters if 'ctx' not in params: raise TypeError( "The first argument to the context function " -- 2.34.1 From c1747a290a991c387ee66da2cea0199d015814e1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 14:24:25 -0400 Subject: [PATCH 095/305] Rework and first draft of `.devx._frame_stack.py` Proto-ing a little suite of call-stack-frame annotation-for-scanning sub-systems for the purposes of both, - the `.devx._debug`er and its traceback and frame introspection needs when entering the REPL, - detailed trace-style logging such that we can explicitly report on "which and where" `tractor`'s APIs are used in the "app" code. Deats: - change mod name obvi from `._code` and adjust client mod imports. - using `wrapt` (for perf) implement a `@api_frame` annot decorator which both stashes per-call-stack-frame instances of `CallerInfo` in a table and marks the function such that API endpoints can be easily found via runtime stack scanning despite any internal impl changes. - add a global `_frame2callerinfo_cache: dict[FrameType, CallerInfo]` table for providing the per func-frame info caching. - Re-implement `CallerInfo` to require less (types of) inputs: |_ `_api_func: Callable`, a ref to the (singleton) func def. |_ `_api_frame: FrameType` taken from the `@api_frame` marked `tractor`-API func's runtime call-stack, from which we can determine the app code's `.caller_frame`. |_`_caller_frames_up: int|None` allowing the specific `@api_frame` to determine "how many frames up" the application / calling code is. And, a better set of derived attrs: |_`caller_frame: FrameType` which finds and caches the API-eps calling frame. |_`caller_frame: FrameType` which finds and caches the API-eps calling - add a new attempt at "getting a method ref from its runtime frame" with `get_ns_and_func_from_frame()` using a heuristic that the `CodeType.co_qualname: str` should have a "." in it for methods. - main issue is still that the func-ref lookup will require searching for the method's instance type by name, and that name isn't guaranteed to be defined in any particular ns.. |_rn we try to read it from the `FrameType.f_locals` but that is going to obvi fail any time the method is called in a module where it's type is not also defined/imported. - returns both the ns and the func ref FYI. --- tractor/_context.py | 4 +- tractor/devx/{_code.py => _frame_stack.py} | 229 +++++++++++---------- 2 files changed, 121 insertions(+), 112 deletions(-) rename tractor/devx/{_code.py => _frame_stack.py} (53%) diff --git a/tractor/_context.py b/tractor/_context.py index a2adfa42..b58010b6 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -94,7 +94,7 @@ if TYPE_CHECKING: from ._portal import Portal from ._runtime import Actor from ._ipc import MsgTransport - from .devx._code import ( + from .devx._frame_stack import ( CallerInfo, ) @@ -2513,7 +2513,7 @@ def mk_context( send_chan, recv_chan = trio.open_memory_channel(msg_buffer_size) # TODO: only scan caller-info if log level so high! - from .devx._code import find_caller_info + from .devx._frame_stack import find_caller_info caller_info: CallerInfo|None = find_caller_info() # TODO: when/how do we apply `.limit_plds()` from here? diff --git a/tractor/devx/_code.py b/tractor/devx/_frame_stack.py similarity index 53% rename from tractor/devx/_code.py rename to tractor/devx/_frame_stack.py index 8d55212b..89a9e849 100644 --- a/tractor/devx/_code.py +++ b/tractor/devx/_frame_stack.py @@ -20,11 +20,8 @@ as it pertains to improving the grok-ability of our runtime! ''' from __future__ import annotations +from functools import partial import inspect -# import msgspec -# from pprint import pformat -import textwrap -import traceback from types import ( FrameType, FunctionType, @@ -32,9 +29,8 @@ from types import ( # CodeType, ) from typing import ( - # Any, + Any, Callable, - # TYPE_CHECKING, Type, ) @@ -42,6 +38,7 @@ from tractor.msg import ( pretty_struct, NamespacePath, ) +import wrapt # TODO: yeah, i don't love this and we should prolly just @@ -83,6 +80,31 @@ def get_class_from_frame(fr: FrameType) -> ( return None +def get_ns_and_func_from_frame( + frame: FrameType, +) -> Callable: + ''' + Return the corresponding function object reference from + a `FrameType`, and return it and it's parent namespace `dict`. + + ''' + ns: dict[str, Any] + + # for a method, go up a frame and lookup the name in locals() + if '.' in (qualname := frame.f_code.co_qualname): + cls_name, _, func_name = qualname.partition('.') + ns = frame.f_back.f_locals[cls_name].__dict__ + + else: + func_name: str = frame.f_code.co_name + ns = frame.f_globals + + return ( + ns, + ns[func_name], + ) + + def func_ref_from_frame( frame: FrameType, ) -> Callable: @@ -98,34 +120,63 @@ def func_ref_from_frame( ) -# TODO: move all this into new `.devx._code`! -# -[ ] prolly create a `@runtime_api` dec? -# -[ ] ^- make it capture and/or accept buncha optional -# meta-data like a fancier version of `@pdbp.hideframe`. -# class CallerInfo(pretty_struct.Struct): - rt_fi: inspect.FrameInfo - call_frame: FrameType + # https://docs.python.org/dev/reference/datamodel.html#frame-objects + # https://docs.python.org/dev/library/inspect.html#the-interpreter-stack + _api_frame: FrameType @property - def api_func_ref(self) -> Callable|None: - return func_ref_from_frame(self.rt_fi.frame) + def api_frame(self) -> FrameType: + try: + self._api_frame.clear() + except RuntimeError: + # log.warning( + print( + f'Frame {self._api_frame} for {self.api_func} is still active!' + ) + + return self._api_frame + + _api_func: Callable + + @property + def api_func(self) -> Callable: + return self._api_func + + _caller_frames_up: int|None = 1 + _caller_frame: FrameType|None = None # cached after first stack scan @property def api_nsp(self) -> NamespacePath|None: - func: FunctionType = self.api_func_ref + func: FunctionType = self.api_func if func: return NamespacePath.from_ref(func) return '' @property - def caller_func_ref(self) -> Callable|None: - return func_ref_from_frame(self.call_frame) + def caller_frame(self) -> FrameType: + + # if not already cached, scan up stack explicitly by + # configured count. + if not self._caller_frame: + if self._caller_frames_up: + for _ in range(self._caller_frames_up): + caller_frame: FrameType|None = self.api_frame.f_back + + if not caller_frame: + raise ValueError( + 'No frame exists {self._caller_frames_up} up from\n' + f'{self.api_frame} @ {self.api_nsp}\n' + ) + + self._caller_frame = caller_frame + + return self._caller_frame @property def caller_nsp(self) -> NamespacePath|None: - func: FunctionType = self.caller_func_ref + func: FunctionType = self.api_func if func: return NamespacePath.from_ref(func) @@ -172,108 +223,66 @@ def find_caller_info( call_frame = call_frame.f_back return CallerInfo( - rt_fi=fi, - call_frame=call_frame, + _api_frame=rt_frame, + _api_func=func_ref_from_frame(rt_frame), + _caller_frames_up=go_up_iframes, ) return None -def pformat_boxed_tb( - tb_str: str, - fields_str: str|None = None, - field_prefix: str = ' |_', +_frame2callerinfo_cache: dict[FrameType, CallerInfo] = {} - tb_box_indent: int|None = None, - tb_body_indent: int = 1, -) -> str: - ''' - Create a "boxed" looking traceback string. +# TODO: -[x] move all this into new `.devx._code`! +# -[ ] consider rename to _callstack? +# -[ ] prolly create a `@runtime_api` dec? +# |_ @api_frame seems better? +# -[ ] ^- make it capture and/or accept buncha optional +# meta-data like a fancier version of `@pdbp.hideframe`. +# +def api_frame( + wrapped: Callable|None = None, + *, + caller_frames_up: int = 1, - Useful for emphasizing traceback text content as being an - embedded attribute of some other object (like - a `RemoteActorError` or other boxing remote error shuttle - container). +) -> Callable: - Any other parent/container "fields" can be passed in the - `fields_str` input along with other prefix/indent settings. + # handle the decorator called WITHOUT () case, + # i.e. just @api_frame, NOT @api_frame(extra=) + if wrapped is None: + return partial( + api_frame, + caller_frames_up=caller_frames_up, + ) - ''' - if ( - fields_str - and - field_prefix + @wrapt.decorator + async def wrapper( + wrapped: Callable, + instance: object, + args: tuple, + kwargs: dict, ): - fields: str = textwrap.indent( - fields_str, - prefix=field_prefix, - ) - else: - fields = fields_str or '' + # maybe cache the API frame for this call + global _frame2callerinfo_cache + this_frame: FrameType = inspect.currentframe() + api_frame: FrameType = this_frame.f_back - tb_body = tb_str - if tb_body_indent: - tb_body: str = textwrap.indent( - tb_str, - prefix=tb_body_indent * ' ', - ) + if not _frame2callerinfo_cache.get(api_frame): + _frame2callerinfo_cache[api_frame] = CallerInfo( + _api_frame=api_frame, + _api_func=wrapped, + _caller_frames_up=caller_frames_up, + ) - tb_box: str = ( + return wrapped(*args, **kwargs) - # orig - # f' |\n' - # f' ------ - ------\n\n' - # f'{tb_str}\n' - # f' ------ - ------\n' - # f' _|\n' - - f'|\n' - f' ------ - ------\n\n' - # f'{tb_str}\n' - f'{tb_body}' - f' ------ - ------\n' - f'_|\n' - ) - tb_box_indent: str = ( - tb_box_indent - or - 1 - - # (len(field_prefix)) - # ? ^-TODO-^ ? if you wanted another indent level - ) - if tb_box_indent > 0: - tb_box: str = textwrap.indent( - tb_box, - prefix=tb_box_indent * ' ', - ) - - return ( - fields - + - tb_box - ) - - -def pformat_caller_frame( - stack_limit: int = 1, - box_tb: bool = True, -) -> str: - ''' - Capture and return the traceback text content from - `stack_limit` call frames up. - - ''' - tb_str: str = ( - '\n'.join( - traceback.format_stack(limit=stack_limit) - ) - ) - if box_tb: - tb_str: str = pformat_boxed_tb( - tb_str=tb_str, - field_prefix=' ', - indent='', - ) - return tb_str + # annotate the function as a "api function", meaning it is + # a function for which the function above it in the call stack should be + # non-`tractor` code aka "user code". + # + # in the global frame cache for easy lookup from a given + # func-instance + wrapped._call_infos: dict[FrameType, CallerInfo] = _frame2callerinfo_cache + wrapped.__api_func__: bool = True + return wrapper(wrapped) -- 2.34.1 From 194bb8f7fbef7d9a625e7cd7841c90eb45862a6b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 14:53:45 -0400 Subject: [PATCH 096/305] Adjust `._runtime` to report `DebugStatus.req_ctx` - inside the `Actor.cancel()`'s maybe-wait-on-debugger delay, report the full debug request status and it's affiliated lock request IPC ctx. - use the new `.req_ctx.chan.uid` to do the local nursery lookup during channel teardown handling. - another couple log fmt tweaks. --- tractor/_runtime.py | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 84940222..1d931cd7 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -646,7 +646,7 @@ class Actor: peers_str: str = '' for uid, chans in self._peers.items(): peers_str += ( - f'|_ uid: {uid}\n' + f'uid: {uid}\n' ) for i, chan in enumerate(chans): peers_str += ( @@ -680,10 +680,12 @@ class Actor: # XXX => YES IT DOES, when i was testing ctl-c # from broken debug TTY locking due to # msg-spec races on application using RunVar... - pdb_user_uid: tuple = pdb_lock.global_actor_in_debug if ( - pdb_user_uid - and local_nursery + (ctx_in_debug := pdb_lock.ctx_in_debug) + and + (pdb_user_uid := ctx_in_debug.chan.uid) + and + local_nursery ): entry: tuple|None = local_nursery._children.get( tuple(pdb_user_uid) @@ -1171,13 +1173,17 @@ class Actor: # kill any debugger request task to avoid deadlock # with the root actor in this tree - dbcs = _debug.DebugStatus.req_cs - if dbcs is not None: + debug_req = _debug.DebugStatus + lock_req_ctx: Context = debug_req.req_ctx + if lock_req_ctx is not None: msg += ( '-> Cancelling active debugger request..\n' - f'|_{_debug.Lock.pformat()}' + f'|_{_debug.Lock.repr()}\n\n' + f'|_{lock_req_ctx}\n\n' ) - dbcs.cancel() + # lock_req_ctx._scope.cancel() + # TODO: wrap this in a method-API.. + debug_req.req_cs.cancel() # self-cancel **all** ongoing RPC tasks await self.cancel_rpc_tasks( @@ -1377,15 +1383,17 @@ class Actor: "IPC channel's " ) rent_chan_repr: str = ( - f'|_{parent_chan}' + f' |_{parent_chan}\n\n' if parent_chan else '' ) log.cancel( - f'Cancelling {descr} {len(tasks)} rpc tasks\n\n' - f'<= `Actor.cancel_rpc_tasks()`: {req_uid}\n' - f' {rent_chan_repr}\n' - # f'{self}\n' + f'Cancelling {descr} RPC tasks\n\n' + f'<= canceller: {req_uid}\n' + f'{rent_chan_repr}' + f'=> cancellee: {self.uid}\n' + f' |_{self}.cancel_rpc_tasks()\n' + f' |_tasks: {len(tasks)}\n' # f'{tasks_str}' ) for ( @@ -1415,7 +1423,7 @@ class Actor: if tasks: log.cancel( 'Waiting for remaining rpc tasks to complete\n' - f'|_{tasks}' + f'|_{tasks_str}' ) await self._ongoing_rpc_tasks.wait() @@ -1468,7 +1476,10 @@ class Actor: assert self._parent_chan, "No parent channel for this actor?" return Portal(self._parent_chan) - def get_chans(self, uid: tuple[str, str]) -> list[Channel]: + def get_chans( + self, + uid: tuple[str, str], + ) -> list[Channel]: ''' Return all IPC channels to the actor with provided `uid`. @@ -1631,6 +1642,9 @@ async def async_main( entered_debug: bool = await _debug._maybe_enter_pm(oserr) if not entered_debug: log.exception('Failed to init IPC channel server !?\n') + else: + log.runtime('Exited debug REPL..') + raise accept_addrs: list[tuple[str, int]] = actor.accept_addrs -- 2.34.1 From 8d5b40507c73cd09898cfc6a2d820e2ac4a104a7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 8 May 2024 15:07:34 -0400 Subject: [PATCH 097/305] Rename `.msg.types.Msg` -> `PayloadMsg` --- tractor/msg/types.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 7e10dab0..1b3733cb 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -56,8 +56,7 @@ log = get_logger('tractor.msgspec') PayloadT = TypeVar('PayloadT') -# TODO: PayloadMsg -class Msg( +class PayloadMsg( Struct, Generic[PayloadT], @@ -110,6 +109,10 @@ class Msg( pld: Raw +# TODO: complete rename +Msg = PayloadMsg + + class Aid( Struct, tag=True, -- 2.34.1 From 61183f6a97ff57ee4a52ff1028d11281ccb46214 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 09:37:47 -0400 Subject: [PATCH 098/305] Use `.recv_msg_w_pld()` for final `Portal.result()` Woops, due to a `None` test against the `._final_result`, any actual final `None` result would be received but not acked as such causing a spawning test to hang. Fix it by instead receiving and assigning both a `._final_result_msg: PayloadMsg` and `._final_result_pld`. NB: as mentioned in many recent comments surrounding this API layer, really this whole `Portal`-has-final-result interface/semantics should be entirely removed as should the `ActorNursery.run_in_actor()` API(s). Instead it should all be replaced by a wrapping "high level" API (`tractor.hilevel` ?) which combines a task nursery, `Portal.open_context()` and underlying `Context` APIs + an `outcome.Outcome` to accomplish the same "run a single task in a spawned actor and return it's result"; aka a "one-shot-task-actor". --- tractor/_portal.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 79a9dc5d..700f2fdc 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -47,6 +47,7 @@ from ._ipc import Channel from .log import get_logger from .msg import ( # Error, + PayloadMsg, NamespacePath, Return, ) @@ -98,7 +99,8 @@ class Portal: self.chan = channel # during the portal's lifetime - self._final_result: Any|None = None + self._final_result_pld: Any|None = None + self._final_result_msg: PayloadMsg|None = None # When set to a ``Context`` (when _submit_for_result is called) # it is expected that ``result()`` will be awaited at some @@ -132,7 +134,7 @@ class Portal: 'A pending main result has already been submitted' ) - self._expect_result_ctx = await self.actor.start_remote_task( + self._expect_result_ctx: Context = await self.actor.start_remote_task( self.channel, nsf=NamespacePath(f'{ns}:{func}'), kwargs=kwargs, @@ -163,13 +165,16 @@ class Portal: # expecting a "main" result assert self._expect_result_ctx - if self._final_result is None: - self._final_result: Any = await self._expect_result_ctx._pld_rx.recv_pld( - ctx=self._expect_result_ctx, + if self._final_result_msg is None: + ( + self._final_result_msg, + self._final_result_pld, + ) = await self._expect_result_ctx._pld_rx.recv_msg_w_pld( + ipc=self._expect_result_ctx, expect_msg=Return, ) - return self._final_result + return self._final_result_pld async def _cancel_streams(self): # terminate all locally running async generator -- 2.34.1 From bc660a533cb434af61083ed38839e16a0518da50 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 15:20:03 -0400 Subject: [PATCH 099/305] Hide some API frames, port to new `._debug` apis - start tossing in `__tracebackhide__`s to various eps which don't need to show in tbs or in the pdb REPL. - port final `._maybe_enter_pm()` to pass a `api_frame`. - start comment-marking up some API eps with `@api_frame` in prep for actually using the new frame-stack tracing. --- tractor/_root.py | 12 +++++++++--- tractor/_spawn.py | 19 ++++++++----------- tractor/_supervise.py | 4 ++-- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index 377f494e..4c0bb4f6 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -18,7 +18,7 @@ Root actor runtime ignition(s). ''' -from contextlib import asynccontextmanager +from contextlib import asynccontextmanager as acm from functools import partial import importlib import logging @@ -60,7 +60,7 @@ _default_lo_addrs: list[tuple[str, int]] = [( logger = log.get_logger('tractor') -@asynccontextmanager +@acm async def open_root_actor( *, @@ -97,6 +97,7 @@ async def open_root_actor( Runtime init entry point for ``tractor``. ''' + __tracebackhide__ = True # TODO: stick this in a `@cm` defined in `devx._debug`? # # Override the global debugger hook to make it play nice with @@ -363,7 +364,12 @@ async def open_root_actor( BaseExceptionGroup, ) as err: - entered: bool = await _debug._maybe_enter_pm(err) + import inspect + entered: bool = await _debug._maybe_enter_pm( + err, + api_frame=inspect.currentframe(), + ) + if ( not entered and diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 3f886c01..09d9aff8 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -142,7 +142,9 @@ async def exhaust_portal( ''' __tracebackhide__ = True try: - log.debug(f"Waiting on final result from {actor.uid}") + log.debug( + f'Waiting on final result from {actor.uid}' + ) # XXX: streams should never be reaped here since they should # always be established and shutdown using a context manager api @@ -195,7 +197,10 @@ async def cancel_on_completion( # if this call errors we store the exception for later # in ``errors`` which will be reraised inside # an exception group and we still send out a cancel request - result: Any|Exception = await exhaust_portal(portal, actor) + result: Any|Exception = await exhaust_portal( + portal, + actor, + ) if isinstance(result, Exception): errors[actor.uid]: Exception = result log.cancel( @@ -507,14 +512,6 @@ async def trio_proc( ) ) - # await chan.send({ - # '_parent_main_data': subactor._parent_main_data, - # 'enable_modules': subactor.enable_modules, - # 'reg_addrs': subactor.reg_addrs, - # 'bind_addrs': bind_addrs, - # '_runtime_vars': _runtime_vars, - # }) - # track subactor in current nursery curr_actor: Actor = current_actor() curr_actor._actoruid2nursery[subactor.uid] = actor_nursery @@ -558,8 +555,8 @@ async def trio_proc( # killing the process too early. if proc: log.cancel(f'Hard reap sequence starting for {subactor.uid}') - with trio.CancelScope(shield=True): + with trio.CancelScope(shield=True): # don't clobber an ongoing pdb if cancelled_during_spawn: # Try again to avoid TTY clobbering. diff --git a/tractor/_supervise.py b/tractor/_supervise.py index dc65cc65..59ec728b 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -346,8 +346,6 @@ async def _open_and_supervise_one_cancels_all_nursery( actor: Actor, ) -> typing.AsyncGenerator[ActorNursery, None]: - - # TODO: yay or nay? __tracebackhide__ = True # the collection of errors retreived from spawned sub-actors @@ -519,6 +517,7 @@ async def _open_and_supervise_one_cancels_all_nursery( @acm +# @api_frame async def open_nursery( **kwargs, @@ -538,6 +537,7 @@ async def open_nursery( which cancellation scopes correspond to each spawned subactor set. ''' + __tracebackhide__ = True implicit_runtime: bool = False actor: Actor = current_actor(err_on_no_runtime=False) an: ActorNursery|None = None -- 2.34.1 From 316afdec55f88aee7265e5cb60ebb9b7e1a8d3e0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 16:31:23 -0400 Subject: [PATCH 100/305] Update tests for `PldRx` and `Context` changes Mostly adjustments for the new pld-receiver semantics/shim-layer which results more often in the direct delivery of `RemoteActorError`s from IPC API primitives (like `Portal.result()`) instead of being embedded in an `ExceptionGroup` bundled from an embedded nursery. Tossed usage of the `debug_mode: bool` fixture to a couple problematic tests while i was working on them. Also includes detailed assertion updates to the inter-peer cancellation suite in terms of, - `Context.canceller` state correctly matching the true src actor when expecting a ctxc. - any rxed `ContextCancelled` should instance match the `Context._local/remote_error` as should the `.msgdata` and `._ipc_msg`. --- tests/test_cancellation.py | 25 +++++-- tests/test_infected_asyncio.py | 22 +++--- tests/test_inter_peer_cancellation.py | 99 +++++++++++++++++++-------- tests/test_spawning.py | 20 +++--- 4 files changed, 113 insertions(+), 53 deletions(-) diff --git a/tests/test_cancellation.py b/tests/test_cancellation.py index 18ad3615..92540ed4 100644 --- a/tests/test_cancellation.py +++ b/tests/test_cancellation.py @@ -89,17 +89,30 @@ def test_remote_error(reg_addr, args_err): assert excinfo.value.boxed_type == errtype else: - # the root task will also error on the `.result()` call - # so we expect an error from there AND the child. - with pytest.raises(BaseExceptionGroup) as excinfo: + # the root task will also error on the `Portal.result()` + # call so we expect an error from there AND the child. + # |_ tho seems like on new `trio` this doesn't always + # happen? + with pytest.raises(( + BaseExceptionGroup, + tractor.RemoteActorError, + )) as excinfo: trio.run(main) - # ensure boxed errors - for exc in excinfo.value.exceptions: + # ensure boxed errors are `errtype` + err: BaseException = excinfo.value + if isinstance(err, BaseExceptionGroup): + suberrs: list[BaseException] = err.exceptions + else: + suberrs: list[BaseException] = [err] + + for exc in suberrs: assert exc.boxed_type == errtype -def test_multierror(reg_addr): +def test_multierror( + reg_addr: tuple[str, int], +): ''' Verify we raise a ``BaseExceptionGroup`` out of a nursery where more then one actor errors. diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 8d34bef4..45722a63 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -444,6 +444,7 @@ def test_basic_interloop_channel_stream(reg_addr, fan_out): infect_asyncio=True, fan_out=fan_out, ) + # should raise RAE diectly await portal.result() trio.run(main) @@ -461,12 +462,11 @@ def test_trio_error_cancels_intertask_chan(reg_addr): # should trigger remote actor error await portal.result() - with pytest.raises(BaseExceptionGroup) as excinfo: + with pytest.raises(RemoteActorError) as excinfo: trio.run(main) - # ensure boxed errors - for exc in excinfo.value.exceptions: - assert exc.boxed_type == Exception + # ensure boxed error type + excinfo.value.boxed_type == Exception def test_trio_closes_early_and_channel_exits(reg_addr): @@ -477,7 +477,7 @@ def test_trio_closes_early_and_channel_exits(reg_addr): exit_early=True, infect_asyncio=True, ) - # should trigger remote actor error + # should raise RAE diectly await portal.result() # should be a quiet exit on a simple channel exit @@ -492,15 +492,17 @@ def test_aio_errors_and_channel_propagates_and_closes(reg_addr): aio_raise_err=True, infect_asyncio=True, ) - # should trigger remote actor error + # should trigger RAE directly, not an eg. await portal.result() - with pytest.raises(BaseExceptionGroup) as excinfo: + with pytest.raises( + # NOTE: bc we directly wait on `Portal.result()` instead + # of capturing it inside the `ActorNursery` machinery. + expected_exception=RemoteActorError, + ) as excinfo: trio.run(main) - # ensure boxed errors - for exc in excinfo.value.exceptions: - assert exc.boxed_type == Exception + excinfo.value.boxed_type == Exception @tractor.context diff --git a/tests/test_inter_peer_cancellation.py b/tests/test_inter_peer_cancellation.py index aa05e3c8..7bf9a2bd 100644 --- a/tests/test_inter_peer_cancellation.py +++ b/tests/test_inter_peer_cancellation.py @@ -55,9 +55,10 @@ from tractor._testing import ( @tractor.context -async def sleep_forever( +async def open_stream_then_sleep_forever( ctx: Context, expect_ctxc: bool = False, + ) -> None: ''' Sync the context, open a stream then just sleep. @@ -67,6 +68,10 @@ async def sleep_forever( ''' try: await ctx.started() + + # NOTE: the below means this child will send a `Stop` + # to it's parent-side task despite that side never + # opening a stream itself. async with ctx.open_stream(): await trio.sleep_forever() @@ -100,7 +105,7 @@ async def error_before_started( ''' async with tractor.wait_for_actor('sleeper') as p2: async with ( - p2.open_context(sleep_forever) as (peer_ctx, first), + p2.open_context(open_stream_then_sleep_forever) as (peer_ctx, first), peer_ctx.open_stream(), ): # NOTE: this WAS inside an @acm body but i factored it @@ -204,9 +209,13 @@ async def stream_ints( @tractor.context async def stream_from_peer( ctx: Context, + debug_mode: bool, peer_name: str = 'sleeper', ) -> None: + # sanity + assert tractor._state.debug_mode() == debug_mode + peer: Portal try: async with ( @@ -240,26 +249,54 @@ async def stream_from_peer( assert msg is not None print(msg) - # NOTE: cancellation of the (sleeper) peer should always - # cause a `ContextCancelled` raise in this streaming - # actor. - except ContextCancelled as ctxc: - ctxerr = ctxc + # NOTE: cancellation of the (sleeper) peer should always cause + # a `ContextCancelled` raise in this streaming actor. + except ContextCancelled as _ctxc: + ctxc = _ctxc - assert peer_ctx._remote_error is ctxerr - assert peer_ctx._remote_error.msgdata == ctxerr.msgdata + # print("TRYING TO ENTER PAUSSE!!!") + # await tractor.pause(shield=True) + re: ContextCancelled = peer_ctx._remote_error - # XXX YES, bc exact same msg instances - assert peer_ctx._remote_error._ipc_msg is ctxerr._ipc_msg + # XXX YES XXX, remote error should be unpacked only once! + assert ( + re + is + peer_ctx.maybe_error + is + ctxc + is + peer_ctx._local_error + ) + # NOTE: these errors should all match! + # ------ - ------ + # XXX [2024-05-03] XXX + # ------ - ------ + # broke this due to a re-raise inside `.msg._ops.drain_to_final_msg()` + # where the `Error()` msg was directly raising the ctxc + # instead of just returning up to the caller inside + # `Context.return()` which would results in a diff instance of + # the same remote error bubbling out above vs what was + # already unpacked and set inside `Context. + assert ( + peer_ctx._remote_error.msgdata + == + ctxc.msgdata + ) + # ^-XXX-^ notice the data is of course the exact same.. so + # the above larger assert makes sense to also always be true! - # XXX NO, bc new one always created for property accesss - assert peer_ctx._remote_error.ipc_msg != ctxerr.ipc_msg + # XXX YES XXX, bc should be exact same msg instances + assert peer_ctx._remote_error._ipc_msg is ctxc._ipc_msg + + # XXX NO XXX, bc new one always created for property accesss + assert peer_ctx._remote_error.ipc_msg != ctxc.ipc_msg # the peer ctx is the canceller even though it's canceller # is the "canceller" XD assert peer_name in peer_ctx.canceller - assert "canceller" in ctxerr.canceller + assert "canceller" in ctxc.canceller # caller peer should not be the cancel requester assert not ctx.cancel_called @@ -283,12 +320,13 @@ async def stream_from_peer( # TODO / NOTE `.canceller` won't have been set yet # here because that machinery is inside - # `.open_context().__aexit__()` BUT, if we had + # `Portal.open_context().__aexit__()` BUT, if we had # a way to know immediately (from the last # checkpoint) that cancellation was due to # a remote, we COULD assert this here..see, # https://github.com/goodboy/tractor/issues/368 # + # await tractor.pause() # assert 'canceller' in ctx.canceller # root/parent actor task should NEVER HAVE cancelled us! @@ -392,12 +430,13 @@ def test_peer_canceller( try: async with ( sleeper.open_context( - sleep_forever, + open_stream_then_sleep_forever, expect_ctxc=True, ) as (sleeper_ctx, sent), just_caller.open_context( stream_from_peer, + debug_mode=debug_mode, ) as (caller_ctx, sent), canceller.open_context( @@ -423,10 +462,11 @@ def test_peer_canceller( # should always raise since this root task does # not request the sleeper cancellation ;) - except ContextCancelled as ctxerr: + except ContextCancelled as _ctxc: + ctxc = _ctxc print( 'CAUGHT REMOTE CONTEXT CANCEL\n\n' - f'{ctxerr}\n' + f'{ctxc}\n' ) # canceller and caller peers should not @@ -437,7 +477,7 @@ def test_peer_canceller( # we were not the actor, our peer was assert not sleeper_ctx.cancel_acked - assert ctxerr.canceller[0] == 'canceller' + assert ctxc.canceller[0] == 'canceller' # XXX NOTE XXX: since THIS `ContextCancelled` # HAS NOT YET bubbled up to the @@ -448,7 +488,7 @@ def test_peer_canceller( # CASE_1: error-during-ctxc-handling, if error_during_ctxerr_handling: - raise RuntimeError('Simulated error during teardown') + raise RuntimeError('Simulated RTE re-raise during ctxc handling') # CASE_2: standard teardown inside in `.open_context()` block raise @@ -513,6 +553,9 @@ def test_peer_canceller( # should be cancelled by US. # if error_during_ctxerr_handling: + print(f'loc_err: {_loc_err}\n') + assert isinstance(loc_err, RuntimeError) + # since we do a rte reraise above, the # `.open_context()` error handling should have # raised a local rte, thus the internal @@ -521,9 +564,6 @@ def test_peer_canceller( # a `trio.Cancelled` due to a local # `._scope.cancel()` call. assert not sleeper_ctx._scope.cancelled_caught - - assert isinstance(loc_err, RuntimeError) - print(f'_loc_err: {_loc_err}\n') # assert sleeper_ctx._local_error is _loc_err # assert sleeper_ctx._local_error is _loc_err assert not ( @@ -560,9 +600,12 @@ def test_peer_canceller( else: # the other 2 ctxs assert ( - re.canceller - == - canceller.channel.uid + isinstance(re, ContextCancelled) + and ( + re.canceller + == + canceller.channel.uid + ) ) # since the sleeper errors while handling a @@ -811,8 +854,7 @@ async def serve_subactors( async with open_nursery() as an: # sanity - if debug_mode: - assert tractor._state.debug_mode() + assert tractor._state.debug_mode() == debug_mode await ctx.started(peer_name) async with ctx.open_stream() as ipc: @@ -1091,7 +1133,6 @@ def test_peer_spawns_and_cancels_service_subactor( '-> root checking `client_ctx.result()`,\n' f'-> checking that sub-spawn {peer_name} is down\n' ) - # else: try: res = await client_ctx.result(hide_tb=False) diff --git a/tests/test_spawning.py b/tests/test_spawning.py index 5995ed2d..99ec9abc 100644 --- a/tests/test_spawning.py +++ b/tests/test_spawning.py @@ -2,7 +2,9 @@ Spawning basics """ -from typing import Optional +from typing import ( + Any, +) import pytest import trio @@ -25,13 +27,11 @@ async def spawn( async with tractor.open_root_actor( arbiter_addr=reg_addr, ): - actor = tractor.current_actor() assert actor.is_arbiter == is_arbiter data = data_to_pass_down if actor.is_arbiter: - async with tractor.open_nursery() as nursery: # forks here @@ -95,7 +95,9 @@ async def test_movie_theatre_convo(start_method): await portal.cancel_actor() -async def cellar_door(return_value: Optional[str]): +async def cellar_door( + return_value: str|None, +): return return_value @@ -105,16 +107,18 @@ async def cellar_door(return_value: Optional[str]): ) @tractor_test async def test_most_beautiful_word( - start_method, - return_value + start_method: str, + return_value: Any, + debug_mode: bool, ): ''' The main ``tractor`` routine. ''' with trio.fail_after(1): - async with tractor.open_nursery() as n: - + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as n: portal = await n.run_in_actor( cellar_door, return_value=return_value, -- 2.34.1 From 2ddfe11d713c79b7234fa220f6bc3d625247560a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 May 2024 16:51:51 -0400 Subject: [PATCH 101/305] Modernize streaming example script - add typing, - apply multi-line call style, - use 'cancel' log level, - enable debug mode. --- examples/full_fledged_streaming_service.py | 45 +++++++++++++++------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/examples/full_fledged_streaming_service.py b/examples/full_fledged_streaming_service.py index c93df242..be4c372e 100644 --- a/examples/full_fledged_streaming_service.py +++ b/examples/full_fledged_streaming_service.py @@ -1,6 +1,11 @@ import time import trio import tractor +from tractor import ( + ActorNursery, + MsgStream, + Portal, +) # this is the first 2 actors, streamer_1 and streamer_2 @@ -12,14 +17,18 @@ async def stream_data(seed): # this is the third actor; the aggregator async def aggregate(seed): - """Ensure that the two streams we receive match but only stream + ''' + Ensure that the two streams we receive match but only stream a single set of values to the parent. - """ - async with tractor.open_nursery() as nursery: - portals = [] + + ''' + an: ActorNursery + async with tractor.open_nursery() as an: + portals: list[Portal] = [] for i in range(1, 3): - # fork point - portal = await nursery.start_actor( + + # fork/spawn call + portal = await an.start_actor( name=f'streamer_{i}', enable_modules=[__name__], ) @@ -43,7 +52,11 @@ async def aggregate(seed): async with trio.open_nursery() as n: for portal in portals: - n.start_soon(push_to_chan, portal, send_chan.clone()) + n.start_soon( + push_to_chan, + portal, + send_chan.clone(), + ) # close this local task's reference to send side await send_chan.aclose() @@ -60,7 +73,7 @@ async def aggregate(seed): print("FINISHED ITERATING in aggregator") - await nursery.cancel() + await an.cancel() print("WAITING on `ActorNursery` to finish") print("AGGREGATOR COMPLETE!") @@ -75,18 +88,21 @@ async def main() -> list[int]: ''' # yes, a nursery which spawns `trio`-"actors" B) - nursery: tractor.ActorNursery - async with tractor.open_nursery() as nursery: + an: ActorNursery + async with tractor.open_nursery( + loglevel='cancel', + debug_mode=True, + ) as an: seed = int(1e3) pre_start = time.time() - portal: tractor.Portal = await nursery.start_actor( + portal: Portal = await an.start_actor( name='aggregator', enable_modules=[__name__], ) - stream: tractor.MsgStream + stream: MsgStream async with portal.open_stream_from( aggregate, seed=seed, @@ -95,11 +111,12 @@ async def main() -> list[int]: start = time.time() # the portal call returns exactly what you'd expect # as if the remote "aggregate" function was called locally - result_stream = [] + result_stream: list[int] = [] async for value in stream: result_stream.append(value) - await portal.cancel_actor() + cancelled: bool = await portal.cancel_actor() + assert cancelled print(f"STREAM TIME = {time.time() - start}") print(f"STREAM + SPAWN TIME = {time.time() - pre_start}") -- 2.34.1 From 29a001c4ef7e7caf439ac56aa9d797e309a18b57 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 10 May 2024 13:15:45 -0400 Subject: [PATCH 102/305] Rename `.msg.types.Msg` -> `PayloadMsg` --- tractor/msg/__init__.py | 2 +- tractor/msg/_codec.py | 2 +- tractor/msg/types.py | 26 +++++++++++++------------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 13739cdb..44586f2d 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -44,7 +44,7 @@ from ._codec import ( # ) from .types import ( - Msg as Msg, + PayloadMsg as PayloadMsg, Aid as Aid, SpawnSpec as SpawnSpec, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 901c0da1..6ba23b78 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -432,7 +432,7 @@ class MsgCodec(Struct): # ) -> Any|Struct: - # msg: Msg = codec.dec.decode(msg) + # msg: PayloadMsg = codec.dec.decode(msg) # payload_tag: str = msg.header.payload_tag # payload_dec: msgpack.Decoder = codec._payload_decs[payload_tag] # return payload_dec.decode(msg.pld) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 1b3733cb..f8205c23 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -302,7 +302,7 @@ class StartAck( class Started( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -316,12 +316,12 @@ class Started( # TODO: instead of using our existing `Start` # for this (as we did with the original `{'cmd': ..}` style) -# class Cancel(Msg): +# class Cancel: # cid: str class Yield( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -348,7 +348,7 @@ class Stop( # TODO: is `Result` or `Out[come]` a better name? class Return( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -360,7 +360,7 @@ class Return( class CancelAck( - Msg, + PayloadMsg, Generic[PayloadT], ): ''' @@ -466,14 +466,14 @@ def from_dict_msg( # TODO: should be make a msg version of `ContextCancelled?` # and/or with a scope field or a full `ActorCancelled`? -# class Cancelled(Msg): +# class Cancelled(MsgType): # cid: str # TODO what about overruns? -# class Overrun(Msg): +# class Overrun(MsgType): # cid: str -_runtime_msgs: list[Msg] = [ +_runtime_msgs: list[Struct] = [ # identity handshake on first IPC `Channel` contact. Aid, @@ -499,9 +499,9 @@ _runtime_msgs: list[Msg] = [ ] # the no-outcome-yet IAC (inter-actor-communication) sub-set which -# can be `Msg.pld` payload field type-limited by application code +# can be `PayloadMsg.pld` payload field type-limited by application code # using `apply_codec()` and `limit_msg_spec()`. -_payload_msgs: list[Msg] = [ +_payload_msgs: list[PayloadMsg] = [ # first from `Context.started()` Started, @@ -544,8 +544,8 @@ def mk_msg_spec( ] = 'indexed_generics', ) -> tuple[ - Union[Type[Msg]], - list[Type[Msg]], + Union[MsgType], + list[MsgType], ]: ''' Create a payload-(data-)type-parameterized IPC message specification. @@ -557,7 +557,7 @@ def mk_msg_spec( determined by the input `payload_type_union: Union[Type]`. ''' - submsg_types: list[Type[Msg]] = Msg.__subclasses__() + submsg_types: list[MsgType] = Msg.__subclasses__() bases: tuple = ( # XXX NOTE XXX the below generic-parameterization seems to # be THE ONLY way to get this to work correctly in terms -- 2.34.1 From 6734dbb3cd1fb5718ab8e304f05371dc6e19c7c7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 May 2024 11:39:04 -0400 Subject: [PATCH 103/305] Always release debug request from `._post_mortem()` Since obviously the thread is likely expected to halt and raise after the REPL session exits; this was a regression from the prior impl. The main reason for this is that otherwise the request task will never unblock if the user steps through the crashed task using 'next' since the `.do_next()` handler doesn't by default release the request since in the `.pause()` case this would end the session too early. Other, - toss in draft `Pdb.user_exception()`, though doesn't seem to ever trigger? - only release `Lock._debug_lock` when already locked. --- tractor/devx/_debug.py | 43 +++++++++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index da322407..a789c6ce 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -249,7 +249,10 @@ class Lock: message: str = 'TTY lock not held by any child\n' except RuntimeError as rte: - message: str = 'TTY lock FAILED to release for child??\n' + message: str = ( + 'TTY lock FAILED to release for child??\n' + f'{current_task()}\n' + ) log.exception(message) # uhhh makes no sense but been seeing the non-owner @@ -755,6 +758,16 @@ class PdbREPL(pdbp.Pdb): status = DebugStatus + # NOTE: see details in stdlib's `bdb.py` + def user_exception(self, frame, exc_info): + ''' + Called when we stop on an exception. + ''' + log.warning( + 'Exception during REPL sesh\n\n' + f'{frame}\n\n' + f'{exc_info}\n\n' + ) # def preloop(self): # print('IN PRELOOP') @@ -780,7 +793,11 @@ class PdbREPL(pdbp.Pdb): # NOTE: for subactors the stdio lock is released via the # allocated RPC locker task, so for root we have to do it # manually. - if is_root_process(): + if ( + is_root_process() + and + Lock._debug_lock.locked() + ): Lock.release() def set_quit(self): @@ -791,7 +808,11 @@ class PdbREPL(pdbp.Pdb): cancel_req_task=False, ) - if is_root_process(): + if ( + is_root_process() + and + Lock._debug_lock.locked() + ): Lock.release() # TODO: special handling where we just want the next LOC and @@ -803,7 +824,7 @@ class PdbREPL(pdbp.Pdb): # try: # super().set_next(frame) # finally: - # Lock.release() + # pdbp.set_trace() # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch @@ -1251,7 +1272,7 @@ def shield_sigint_handler( # child actor that has locked the debugger elif not is_root_process(): - log.warning( + log.debug( f'Subactor {actor.uid} handling SIGINT\n\n' f'{Lock.repr()}\n' ) @@ -1484,8 +1505,11 @@ async def _pause( ): # re-entrant root process already has it: noop. log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'ignoring..' + f'This root actor task is already within an active REPL session\n' + f'Ignoring this re-entered `tractor.pause()`\n' + f'task: {task.name}\n' + f'REPL: {Lock.repl}\n' + # TODO: use `._frame_stack` scanner to find the @api_frame ) await trio.lowlevel.checkpoint() return @@ -1609,6 +1633,7 @@ async def _pause( log.exception( 'Failed to engage debugger via `_pause()` ??\n' ) + mk_pdb().set_trace() DebugStatus.release() # sanity checks for ^ on request/status teardown @@ -1926,6 +1951,10 @@ def _post_mortem( # frame=None, traceback=tb, ) + # Since we presume the post-mortem was enaged to a task-ending + # error, we MUST release the local REPL request so that not other + # local task nor the root remains blocked! + DebugStatus.release() async def post_mortem( -- 2.34.1 From 26d3ba7cc70e300a5db3cb870138d4d6374bcbea Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 14 May 2024 15:22:13 -0400 Subject: [PATCH 104/305] Make `request_root_stdio_lock()` post-mortem-able Finally got this working so that if/when an internal bug is introduced to this request task-func, we can actually REPL-debug the lock request task itself B) As in, if the subactor's lock request task internally errors we, - ensure the task always terminates (by calling `DebugStatus.release()`) and explicitly reports (via a `log.exception()`) the internal error. - capture the error instance and set as a new `DebugStatus.req_err` and always check for it on final teardown - in which case we also, - ensure it's reraised from a new `DebugRequestError`. - unhide the stack frames for `_pause()`, `_enter_repl_sync()` so that the dev can upward inspect the `_pause()` call stack sanely. Supporting internal impl changes, - add `DebugStatus.cancel()` and `.req_err`. - don't ever cancel the request task from `PdbREPL.set_[continue/quit]()` only when there's some internal error that would likely result in a hang and stale lock state with the root. - only release the root's lock when the current ask is also the owner (avoids bad release errors). - also show internal `._pause()`-related frames on any `repl_err`. Other temp-dev-tweaks, - make pld-dec change log msgs info level again while solving this final context-vars race stuff.. - drop the debug pld-dec instance match asserts for now since the problem is already caught (and now debug-able B) by an attr-error on the decoded-as-`dict` started msg, and instead add in a `log.exception()` trace to see which task is triggering the case where the debug `MsgDec` isn't set correctly vs. when we think it's being applied. --- tractor/devx/_debug.py | 341 +++++++++++++++++++++++++---------------- 1 file changed, 207 insertions(+), 134 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index a789c6ce..1e82122c 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -234,49 +234,26 @@ class Lock: cls, force: bool = False, ): - lock: trio.StrictFIFOLock = cls._debug_lock try: - if lock.locked(): + lock: trio.StrictFIFOLock = cls._debug_lock + owner: Task = lock.statistics().owner + if ( + lock.locked() + and + owner is current_task() + # ^-NOTE-^ if not will raise a RTE.. + ): if not DebugStatus.is_main_trio_thread(): trio.from_thread.run_sync( cls._debug_lock.release ) else: cls._debug_lock.release() + message: str = 'TTY lock released for child\n' - message: str = 'TTY lock released for child\n' else: message: str = 'TTY lock not held by any child\n' - except RuntimeError as rte: - message: str = ( - 'TTY lock FAILED to release for child??\n' - f'{current_task()}\n' - ) - log.exception(message) - - # uhhh makes no sense but been seeing the non-owner - # release error even though this is definitely the task - # that locked? - owner = cls._debug_lock.statistics().owner - # if ( - # owner - # and - # cls.remote_task_in_debug is None - # ): - # raise RuntimeError( - # 'Stale `Lock` detected, no remote task active!?\n' - # f'|_{owner}\n' - # # f'{cls}' - # ) from rte - - if owner: - raise rte - - # OW suppress, can't member why tho .. XD - # something somethin corrupts a cancel-scope - # somewhere.. - finally: # IFF there are no more requesting tasks queued up fire, the # "tty-unlocked" event thereby alerting any monitors of the lock that @@ -518,11 +495,23 @@ async def lock_tty_for_child( locked=False, ) - except BaseException: - log.exception( - 'Errored during root TTY-lock dialog?\n' - 'Forcing release since an internal error caused this!\n' + except BaseException as req_err: + message: str = ( + 'Forcing `Lock.release()` since likely an internal error!\n' ) + if isinstance(req_err, trio.Cancelled): + log.cancel( + 'Cancelled during root TTY-lock dialog?\n' + + + message + ) + else: + log.exception( + 'Errored during root TTY-lock dialog?\n' + + + message + ) + Lock.release(force=True) raise @@ -555,6 +544,7 @@ class DebugStatus: repl_release: trio.Event|None = None req_finished: trio.Event|None = None lock_status: LockStatus|None = None + req_err: BaseException|None = None _orig_sigint_handler: Callable|None = None _trio_handler: ( @@ -693,28 +683,37 @@ class DebugStatus: # is not threading.main_thread() # ) + @classmethod + def cancel(cls) -> bool: + if (req_cs := cls.req_cs): + req_cs.cancel() + return True + + return False + @classmethod @pdbp.hideframe def release( cls, - cancel_req_task: bool = True, + cancel_req_task: bool = False, ): + repl_release: trio.Event = cls.repl_release try: # sometimes the task might already be terminated in # which case this call will raise an RTE? - if cls.repl_release is not None: - cls.repl_release.set() + if repl_release is not None: + repl_release.set() finally: # if req_ctx := cls.req_ctx: # req_ctx._scope.cancel() - - if ( - cancel_req_task - and - (req_cs := cls.req_cs) - ): - req_cs.cancel() + if cancel_req_task: + cancelled: bool = cls.cancel() + if not cancelled: + log.warning( + 'Failed to cancel request task!?\n' + f'{cls.repl_task}\n' + ) # restore original sigint handler cls.unshield_sigint() @@ -759,16 +758,19 @@ class PdbREPL(pdbp.Pdb): status = DebugStatus # NOTE: see details in stdlib's `bdb.py` - def user_exception(self, frame, exc_info): - ''' - Called when we stop on an exception. - ''' - log.warning( - 'Exception during REPL sesh\n\n' - f'{frame}\n\n' - f'{exc_info}\n\n' - ) + # def user_exception(self, frame, exc_info): + # ''' + # Called when we stop on an exception. + # ''' + # log.warning( + # 'Exception during REPL sesh\n\n' + # f'{frame}\n\n' + # f'{exc_info}\n\n' + # ) + # NOTE: this actually hooks but i don't see anyway to detect + # if an error was caught.. this is why currently we just always + # call `DebugStatus.release` inside `_post_mortem()`. # def preloop(self): # print('IN PRELOOP') # super().preloop() @@ -804,10 +806,7 @@ class PdbREPL(pdbp.Pdb): try: super().set_quit() finally: - DebugStatus.release( - cancel_req_task=False, - ) - + DebugStatus.release() if ( is_root_process() and @@ -863,7 +862,6 @@ def apply_debug_pldec() -> _codec.MsgCodec: (only in the current task). ''' - from tractor.msg import ( _ops as msgops, ) @@ -874,8 +872,12 @@ def apply_debug_pldec() -> _codec.MsgCodec: with msgops.limit_plds( spec=__pld_spec__, ) as debug_dec: - assert debug_dec is msgops.current_pldrx().pld_dec - log.runtime( + assert ( + debug_dec + is + msgops.current_pldrx().pld_dec + ) + log.info( 'Applied `.devx._debug` pld-spec\n\n' f'{debug_dec}\n' ) @@ -887,11 +889,12 @@ def apply_debug_pldec() -> _codec.MsgCodec: and plrx.pld_dec is orig_pldec ) - log.runtime( + log.info( 'Reverted to previous pld-spec\n\n' f'{orig_pldec}\n' ) + async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], @@ -911,6 +914,10 @@ async def request_root_stdio_lock( entering the REPL at the same time. ''' + + log.pdb( + 'Initing stdio-lock request task with root actor' + ) # TODO: likely we can implement this mutex more generally as # a `._sync.Lock`? # -[ ] simply add the wrapping needed for the debugger specifics? @@ -923,6 +930,8 @@ async def request_root_stdio_lock( DebugStatus.req_finished = trio.Event() try: from tractor._discovery import get_root + from tractor.msg import _ops as msgops + debug_dec: msgops.MsgDec with ( # NOTE: we need this to ensure that this task exits # BEFORE the REPl instance raises an error like @@ -953,12 +962,13 @@ async def request_root_stdio_lock( # apply_debug_pldec() as debug_dec, ): - log.critical( - 'Request cancel-scope is:\n\n' - f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' - - ) + # XXX: was orig for debugging cs stack corruption.. + # log.info( + # 'Request cancel-scope is:\n\n' + # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' + # ) DebugStatus.req_cs = req_cs + req_ctx: Context|None = None try: # TODO: merge into single async with ? async with get_root() as portal: @@ -966,31 +976,37 @@ async def request_root_stdio_lock( async with portal.open_context( lock_tty_for_child, subactor_task_uid=task_uid, - ) as (ctx, status): + ) as (req_ctx, status): - DebugStatus.req_ctx = ctx + DebugStatus.req_ctx = req_ctx + + # sanity checks on pld-spec limit state + assert debug_dec + # curr_pldrx: msgops.PldRx = msgops.current_pldrx() + # assert ( + # curr_pldrx.pld_dec is debug_dec + # ) - from tractor.msg import ( - _ops as msgops, - ) - assert ( - msgops.current_pldrx().pld_dec is debug_dec - ) log.debug( 'Subactor locked TTY with msg\n\n' f'{status}\n' ) # mk_pdb().set_trace() - assert status.subactor_uid == actor_uid - assert status.cid + try: + assert status.subactor_uid == actor_uid + assert status.cid + except AttributeError: + log.exception('failed pldspec asserts!') + raise # set last rxed lock dialog status. DebugStatus.lock_status = status - async with ctx.open_stream() as stream: + async with req_ctx.open_stream() as stream: + assert DebugStatus.repl_release - task_status.started(ctx) + task_status.started(req_ctx) # wait for local task to exit its # `PdbREPL.interaction()`, call @@ -1006,25 +1022,25 @@ async def request_root_stdio_lock( # sync with child-side root locker task # completion - status: LockStatus = await ctx.result() + status: LockStatus = await req_ctx.result() assert not status.locked DebugStatus.lock_status = status log.pdb( 'TTY lock was released for subactor with msg\n\n' f'{status}\n\n' - f'Exitting {ctx.side!r}-side of locking ctx' + f'Exitting {req_ctx.side!r}-side of locking req_ctx' ) except ( tractor.ContextCancelled, trio.Cancelled, ): - log.exception( - 'Debug lock request CANCELLED?\n\n' - f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' - f'{pformat_cs(ctx._scope, var_name="ctx._scope")}\n\n' - f'{ctx}' + log.cancel( + 'Debug lock request was CANCELLED?\n\n' + f'{req_ctx}\n' + # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' + # f'{pformat_cs(req_ctx._scope, var_name="req_ctx._scope")}\n\n' ) raise @@ -1033,11 +1049,11 @@ async def request_root_stdio_lock( ): log.exception( 'Failed during root TTY-lock dialog?\n' - f'{ctx}\n' + f'{req_ctx}\n' f'Cancelling IPC ctx!\n' ) - await ctx.cancel() + await req_ctx.cancel() raise @@ -1047,13 +1063,26 @@ async def request_root_stdio_lock( ): log.cancel( 'Debug lock request CANCELLED?\n' - f'{ctx}\n' + f'{req_ctx}\n' ) raise - except BaseException: - log.exception('Errored during root TTY-lock dialog?') - raise + except BaseException as req_err: + # log.error('Failed to request root stdio-lock?') + DebugStatus.req_err = req_err + DebugStatus.release() + + # TODO: how to dev a test that ensures we actually drop + # into THIS internal frame on any internal error in the above + # code? + # -[ ] eg. on failed pld_dec assert above we should be able + # to REPL pm it. + # -[ ]FURTHER, after we 'continue', we should be able to + # ctl-c out of the currently hanging task! + raise DebugRequestError( + 'Failed to lock stdio from subactor IPC ctx!\n\n' + f'req_ctx: {req_ctx}\n' + ) from req_err finally: log.debug('Exiting debugger TTY lock request func from child') @@ -1369,6 +1398,13 @@ def shield_sigint_handler( _pause_msg: str = 'Attaching to pdb REPL in actor' +class DebugRequestError(RuntimeError): + ''' + Failed to request stdio lock from root actor! + + ''' + + async def _pause( debug_func: Callable|None, @@ -1480,15 +1516,18 @@ async def _pause( raise except BaseException: + __tracebackhide__: bool = False log.exception( 'Failed to invoke internal `debug_func = ' f'{debug_func.func.__name__}`\n' ) # NOTE: OW this is ONLY called from the # `.set_continue/next` hooks! - DebugStatus.release() + DebugStatus.release(cancel_req_task=True) + raise + repl_err: BaseException|None = None try: if is_root_process(): @@ -1584,43 +1623,45 @@ async def _pause( # actor._service_n.cancel_scope.shield = shield # ``` # but not entirely sure if that's a sane way to implement it? - try: - # NOTE spawn the stdio locker request task inside the - # current `Context._scope_nursery` to entsure that - # the request never can outlive the task's (parent) - # lifetime. - curr_ctx: Context = current_ipc_ctx() - # TODO: see `_errors_relayed_via_ipc()` where we - # should dynamically open a `debug_tn` for use here, - # BUT it needs to be outside the normal error - # catching and `_maybe_enter_debugger()` call! - # ctx: Context = await curr_ctx._debug_tn.start( - ctx: Context = await actor._service_n.start( - request_root_stdio_lock, - actor.uid, - (task.name, id(task)), # task uuid (effectively) - ) - # our locker task should be the one in ctx - # with the root actor - assert ( - ctx - is - DebugStatus.req_ctx - is not - curr_ctx - ) - # enter REPL - _enter_repl_sync(debug_func) + # NOTE currently we spawn the lock request task inside this + # subactor's global `Actor._service_n` so that the + # lifetime of the lock-request can outlive the current + # `._pause()` scope while the user steps through their + # application code and when they finally exit the + # session, via 'continue' or 'quit' cmds, the `PdbREPL` + # will manually call `DebugStatus.release()` to release + # the lock session with the root actor. + # + # TODO: ideally we can add a tighter scope for this + # request task likely by conditionally opening a "debug + # nursery" inside `_errors_relayed_via_ipc()`, see the + # todo in tht module, but + # -[ ] it needs to be outside the normal crash handling + # `_maybe_enter_debugger()` block-call. + # -[ ] we probably only need to allocate the nursery when + # we detect the runtime is already in debug mode. + # + # ctx: Context = await curr_ctx._debug_tn.start( + req_ctx: Context = await actor._service_n.start( + request_root_stdio_lock, + actor.uid, + (task.name, id(task)), # task uuid (effectively) + ) + # XXX sanity, our locker task should be the one which + # entered a new IPC ctx with the root actor, NOT the one + # that exists around the task calling into `._pause()`. + curr_ctx: Context = current_ipc_ctx() + assert ( + req_ctx + is + DebugStatus.req_ctx + is not + curr_ctx + ) - except RuntimeError: - if actor._cancel_called: - # service nursery won't be usable and we - # don't want to lock up the root either way since - # we're in (the midst of) cancellation. - return - - raise + # enter REPL + _enter_repl_sync(debug_func) # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? @@ -1629,13 +1670,31 @@ async def _pause( log.devx( 'REPL for pdb was quit!\n' ) + + # when the actor is mid-runtime cancellation the + # `Actor._service_n` might get closed before we can spawn + # the request task, so just ignore expected RTE. + elif ( + isinstance(repl_err, RuntimeError) + and + actor._cancel_called + ): + # service nursery won't be usable and we + # don't want to lock up the root either way since + # we're in (the midst of) cancellation. + log.warning( + 'Service nursery likely closed due to actor-runtime cancellation..\n' + 'Ignoring failed debugger lock request task spawn..\n' + ) + return + else: log.exception( 'Failed to engage debugger via `_pause()` ??\n' ) - mk_pdb().set_trace() - DebugStatus.release() + DebugStatus.release(cancel_req_task=True) + # sanity checks for ^ on request/status teardown assert DebugStatus.repl is None assert DebugStatus.repl_task is None @@ -1645,6 +1704,16 @@ async def _pause( raise + finally: + # always show frame when request fails due to internal + # failure in the above code (including an `BdbQuit`). + if ( + DebugStatus.req_err + or + repl_err + ): + __tracebackhide__: bool = False + def _set_trace( repl: PdbREPL, # passed by `_pause()` @@ -1703,7 +1772,7 @@ async def pause( https://en.wikipedia.org/wiki/Breakpoint ''' - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb # always start 1 level up from THIS in user code since normally # `tractor.pause()` is called explicitly by use-app code thus @@ -1885,12 +1954,15 @@ def pause_from_sync( # NOTE prefer a new "pause" semantic since it better describes # "pausing the actor's runtime" for this particular # paralell task to do debugging in a REPL. -async def breakpoint(**kwargs): +async def breakpoint( + hide_tb: bool = True, + **kwargs, +): log.warning( '`tractor.breakpoint()` is deprecated!\n' 'Please use `tractor.pause()` instead!\n' ) - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb await pause( api_frame=inspect.currentframe(), **kwargs, @@ -1951,6 +2023,7 @@ def _post_mortem( # frame=None, traceback=tb, ) + # XXX NOTE XXX: absolutely required to avoid hangs! # Since we presume the post-mortem was enaged to a task-ending # error, we MUST release the local REPL request so that not other # local task nor the root remains blocked! -- 2.34.1 From 8881219eae2af4beba22907b2bcaf0e93f0b67c9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 14:34:50 -0400 Subject: [PATCH 105/305] Allocate a `PldRx` per `Context`, new pld-spec API Since the state mgmt becomes quite messy with multiple sub-tasks inside an IPC ctx, AND bc generally speaking the payload-type-spec should map 1-to-1 with the `Context`, it doesn't make a lot of sense to be using `ContextVar`s to modify the `Context.pld_rx: PldRx` instance. Instead, always allocate a full instance inside `mk_context()` with the default `.pld_rx: PldRx` set to use the `msg._ops._def_any_pldec: MsgDec` In support, simplify the `.msg._ops` impl and APIs: - drop `_ctxvar_PldRx`, `_def_pld_rx` and `current_pldrx()`. - rename `PldRx._pldec` -> `._pld_dec`. - rename the unused `PldRx.apply_to_ipc()` -> `.wraps_ipc()`. - add a required `PldRx._ctx: Context` attr since it is needed internally in some meths and each pld-rx now maps to a specific ctx. - modify all recv methods to accept a `ipc: Context|MsgStream` (instead of a `ctx` arg) since both have a ref to the same `._rx_chan` and there are only a couple spots (in `.dec_msg()`) where we need the `ctx` explicitly (which can now be easily accessed via a new `MsgStream.ctx` property, see below). - always show the `.dec_msg()` frame in tbs if there's a reference error when calling `_raise_from_unexpected_msg()` in the fallthrough case. - implement `limit_plds()` as light wrapper around getting the `current_ipc_ctx()` and mutating its `MsgDec` via `Context.pld_rx.limit_plds()`. - add a `maybe_limit_plds()` which just provides an `@acm` equivalent of `limit_plds()` handy for composing in a `async with ():` style block (avoiding additional indent levels in the body of async funcs). Obvi extend the `Context` and `MsgStream` interfaces as needed to match the above: - add a `Context.pld_rx` pub prop. - new private refs to `Context._started_msg: Started` and a `._started_pld` (mostly for internal debugging / testing / logging) and set inside `.open_context()` immediately after the syncing phase. - a `Context.has_outcome() -> bool:` predicate which can be used to more easily determine if the ctx errored or has a final result. - pub props for `MsgStream.ctx: Context` and `.chan: Channel` providing full `ipc`-arg compat with the `PldRx` method signatures. --- tractor/_context.py | 145 ++++++++++++++++++++---------- tractor/_streaming.py | 25 +++++- tractor/msg/_ops.py | 205 +++++++++++++++++++----------------------- 3 files changed, 212 insertions(+), 163 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index b58010b6..152efdee 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -41,6 +41,7 @@ from typing import ( Callable, Mapping, Type, + TypeAlias, TYPE_CHECKING, Union, ) @@ -155,6 +156,41 @@ class Context: # payload receiver _pld_rx: msgops.PldRx + @property + def pld_rx(self) -> msgops.PldRx: + ''' + The current `tractor.Context`'s msg-payload-receiver. + + A payload receiver is the IPC-msg processing sub-sys which + filters inter-actor-task communicated payload data, i.e. the + `PayloadMsg.pld: PayloadT` field value, AFTER its container + shuttlle msg (eg. `Started`/`Yield`/`Return) has been + delivered up from `tractor`'s transport layer but BEFORE the + data is yielded to `tractor` application code. + + The "IPC-primitive API" is normally one of a `Context` (this)` or a `MsgStream` + or some higher level API using one of them. + + For ex. `pld_data: PayloadT = MsgStream.receive()` implicitly + calls into the stream's parent `Context.pld_rx.recv_pld().` to + receive the latest `PayloadMsg.pld` value. + + Modification of the current payload spec via `limit_plds()` + allows a `tractor` application to contextually filter IPC + payload content with a type specification as supported by the + interchange backend. + + - for `msgspec` see . + + Note that the `PldRx` itself is a per-`Context` instance that + normally only changes when some (sub-)task, on a given "side" + of the IPC ctx (either a "child"-side RPC or inside + a "parent"-side `Portal.open_context()` block), modifies it + using the `.msg._ops.limit_plds()` API. + + ''' + return self._pld_rx + # full "namespace-path" to target RPC function _nsf: NamespacePath @@ -231,6 +267,8 @@ class Context: # init and streaming state _started_called: bool = False + _started_msg: MsgType|None = None + _started_pld: Any = None _stream_opened: bool = False _stream: MsgStream|None = None @@ -623,7 +661,7 @@ class Context: log.runtime( 'Setting remote error for ctx\n\n' f'<= {self.peer_side!r}: {self.chan.uid}\n' - f'=> {self.side!r}\n\n' + f'=> {self.side!r}: {self._actor.uid}\n\n' f'{error}' ) self._remote_error: BaseException = error @@ -678,7 +716,7 @@ class Context: log.error( f'Remote context error:\n\n' # f'{pformat(self)}\n' - f'{error}\n' + f'{error}' ) if self._canceller is None: @@ -724,8 +762,10 @@ class Context: ) else: message: str = 'NOT cancelling `Context._scope` !\n\n' + # from .devx import mk_pdb + # mk_pdb().set_trace() - fmt_str: str = 'No `self._scope: CancelScope` was set/used ?' + fmt_str: str = 'No `self._scope: CancelScope` was set/used ?\n' if ( cs and @@ -805,6 +845,7 @@ class Context: # f'{ci.api_nsp}()\n' # ) + # TODO: use `.dev._frame_stack` scanning to find caller! return 'Portal.open_context()' async def cancel( @@ -1304,17 +1345,6 @@ class Context: ctx=self, hide_tb=hide_tb, ) - for msg in drained_msgs: - - # TODO: mask this by default.. - if isinstance(msg, Return): - # from .devx import pause - # await pause() - # raise InternalError( - log.warning( - 'Final `return` msg should never be drained !?!?\n\n' - f'{msg}\n' - ) drained_status: str = ( 'Ctx drained to final outcome msg\n\n' @@ -1435,6 +1465,10 @@ class Context: self._result ) + @property + def has_outcome(self) -> bool: + return bool(self.maybe_error) or self._final_result_is_set() + # @property def repr_outcome( self, @@ -1637,8 +1671,6 @@ class Context: ) if rt_started != started_msg: - # TODO: break these methods out from the struct subtype? - # TODO: make that one a mod func too.. diff = pretty_struct.Struct.__sub__( rt_started, @@ -1674,6 +1706,8 @@ class Context: ) from verr self._started_called = True + self._started_msg = started_msg + self._started_pld = value async def _drain_overflows( self, @@ -1961,6 +1995,7 @@ async def open_context_from_portal( portal: Portal, func: Callable, + pld_spec: TypeAlias|None = None, allow_overruns: bool = False, # TODO: if we set this the wrapping `@acm` body will @@ -2026,7 +2061,7 @@ async def open_context_from_portal( # XXX NOTE XXX: currenly we do NOT allow opening a contex # with "self" since the local feeder mem-chan processing # is not built for it. - if portal.channel.uid == portal.actor.uid: + if (uid := portal.channel.uid) == portal.actor.uid: raise RuntimeError( '** !! Invalid Operation !! **\n' 'Can not open an IPC ctx with the local actor!\n' @@ -2054,32 +2089,45 @@ async def open_context_from_portal( assert ctx._caller_info _ctxvar_Context.set(ctx) - # XXX NOTE since `._scope` is NOT set BEFORE we retreive the - # `Started`-msg any cancellation triggered - # in `._maybe_cancel_and_set_remote_error()` will - # NOT actually cancel the below line! - # -> it's expected that if there is an error in this phase of - # the dialog, the `Error` msg should be raised from the `msg` - # handling block below. - first: Any = await ctx._pld_rx.recv_pld( - ctx=ctx, - expect_msg=Started, - ) - ctx._started_called: bool = True - - uid: tuple = portal.channel.uid - cid: str = ctx.cid - # placeholder for any exception raised in the runtime # or by user tasks which cause this context's closure. scope_err: BaseException|None = None ctxc_from_callee: ContextCancelled|None = None try: - async with trio.open_nursery() as nurse: + async with ( + trio.open_nursery() as tn, + msgops.maybe_limit_plds( + ctx=ctx, + spec=pld_spec, + ) as maybe_msgdec, + ): + if maybe_msgdec: + assert maybe_msgdec.pld_spec == pld_spec - # NOTE: used to start overrun queuing tasks - ctx._scope_nursery: trio.Nursery = nurse - ctx._scope: trio.CancelScope = nurse.cancel_scope + # XXX NOTE since `._scope` is NOT set BEFORE we retreive the + # `Started`-msg any cancellation triggered + # in `._maybe_cancel_and_set_remote_error()` will + # NOT actually cancel the below line! + # -> it's expected that if there is an error in this phase of + # the dialog, the `Error` msg should be raised from the `msg` + # handling block below. + started_msg, first = await ctx._pld_rx.recv_msg_w_pld( + ipc=ctx, + expect_msg=Started, + passthrough_non_pld_msgs=False, + ) + + # from .devx import pause + # await pause() + ctx._started_called: bool = True + ctx._started_msg: bool = started_msg + ctx._started_pld: bool = first + + # NOTE: this in an implicit runtime nursery used to, + # - start overrun queuing tasks when as well as + # for cancellation of the scope opened by the user. + ctx._scope_nursery: trio.Nursery = tn + ctx._scope: trio.CancelScope = tn.cancel_scope # deliver context instance and .started() msg value # in enter tuple. @@ -2126,13 +2174,13 @@ async def open_context_from_portal( # when in allow_overruns mode there may be # lingering overflow sender tasks remaining? - if nurse.child_tasks: + if tn.child_tasks: # XXX: ensure we are in overrun state # with ``._allow_overruns=True`` bc otherwise # there should be no tasks in this nursery! if ( not ctx._allow_overruns - or len(nurse.child_tasks) > 1 + or len(tn.child_tasks) > 1 ): raise InternalError( 'Context has sub-tasks but is ' @@ -2304,8 +2352,8 @@ async def open_context_from_portal( ): log.warning( 'IPC connection for context is broken?\n' - f'task:{cid}\n' - f'actor:{uid}' + f'task: {ctx.cid}\n' + f'actor: {uid}' ) raise # duh @@ -2455,9 +2503,8 @@ async def open_context_from_portal( and ctx.cancel_acked ): log.cancel( - 'Context cancelled by {ctx.side!r}-side task\n' + f'Context cancelled by {ctx.side!r}-side task\n' f'|_{ctx._task}\n\n' - f'{repr(scope_err)}\n' ) @@ -2485,7 +2532,7 @@ async def open_context_from_portal( f'cid: {ctx.cid}\n' ) portal.actor._contexts.pop( - (uid, cid), + (uid, ctx.cid), None, ) @@ -2516,8 +2563,9 @@ def mk_context( from .devx._frame_stack import find_caller_info caller_info: CallerInfo|None = find_caller_info() - # TODO: when/how do we apply `.limit_plds()` from here? - pld_rx: msgops.PldRx = msgops.current_pldrx() + pld_rx = msgops.PldRx( + _pld_dec=msgops._def_any_pldec, + ) ctx = Context( chan=chan, @@ -2531,13 +2579,16 @@ def mk_context( _caller_info=caller_info, **kwargs, ) + pld_rx._ctx = ctx ctx._result = Unresolved return ctx # TODO: use the new type-parameters to annotate this in 3.13? # -[ ] https://peps.python.org/pep-0718/#unknown-types -def context(func: Callable) -> Callable: +def context( + func: Callable, +) -> Callable: ''' Mark an (async) function as an SC-supervised, inter-`Actor`, child-`trio.Task`, IPC endpoint otherwise known more diff --git a/tractor/_streaming.py b/tractor/_streaming.py index dd4cd0e1..a008eaf5 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -52,6 +52,7 @@ from tractor.msg import ( if TYPE_CHECKING: from ._context import Context + from ._ipc import Channel log = get_logger(__name__) @@ -65,10 +66,10 @@ log = get_logger(__name__) class MsgStream(trio.abc.Channel): ''' A bidirectional message stream for receiving logically sequenced - values over an inter-actor IPC ``Channel``. + values over an inter-actor IPC `Channel`. This is the type returned to a local task which entered either - ``Portal.open_stream_from()`` or ``Context.open_stream()``. + `Portal.open_stream_from()` or `Context.open_stream()`. Termination rules: @@ -95,6 +96,22 @@ class MsgStream(trio.abc.Channel): self._eoc: bool|trio.EndOfChannel = False self._closed: bool|trio.ClosedResourceError = False + @property + def ctx(self) -> Context: + ''' + This stream's IPC `Context` ref. + + ''' + return self._ctx + + @property + def chan(self) -> Channel: + ''' + Ref to the containing `Context`'s transport `Channel`. + + ''' + return self._ctx.chan + # TODO: could we make this a direct method bind to `PldRx`? # -> receive_nowait = PldRx.recv_pld # |_ means latter would have to accept `MsgStream`-as-`self`? @@ -109,7 +126,7 @@ class MsgStream(trio.abc.Channel): ): ctx: Context = self._ctx return ctx._pld_rx.recv_pld_nowait( - ctx=ctx, + ipc=self, expect_msg=expect_msg, ) @@ -148,7 +165,7 @@ class MsgStream(trio.abc.Channel): try: ctx: Context = self._ctx - return await ctx._pld_rx.recv_pld(ctx=ctx) + return await ctx._pld_rx.recv_pld(ipc=self) # XXX: the stream terminates on either of: # - via `self._rx_chan.receive()` raising after manual closure diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 3b0b8339..3014c15b 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -22,10 +22,9 @@ operational helpers for processing transaction flows. ''' from __future__ import annotations from contextlib import ( - # asynccontextmanager as acm, + asynccontextmanager as acm, contextmanager as cm, ) -from contextvars import ContextVar from typing import ( Any, Type, @@ -50,6 +49,7 @@ from tractor._exceptions import ( _mk_msg_type_err, pack_from_raise, ) +from tractor._state import current_ipc_ctx from ._codec import ( mk_dec, MsgDec, @@ -75,7 +75,7 @@ if TYPE_CHECKING: log = get_logger(__name__) -_def_any_pldec: MsgDec = mk_dec() +_def_any_pldec: MsgDec[Any] = mk_dec() class PldRx(Struct): @@ -104,15 +104,19 @@ class PldRx(Struct): ''' # TODO: better to bind it here? # _rx_mc: trio.MemoryReceiveChannel - _pldec: MsgDec + _pld_dec: MsgDec + _ctx: Context|None = None _ipc: Context|MsgStream|None = None @property def pld_dec(self) -> MsgDec: - return self._pldec + return self._pld_dec + # TODO: a better name? + # -[ ] when would this be used as it avoids needingn to pass the + # ipc prim to every method @cm - def apply_to_ipc( + def wraps_ipc( self, ipc_prim: Context|MsgStream, @@ -140,49 +144,50 @@ class PldRx(Struct): exit. ''' - orig_dec: MsgDec = self._pldec + orig_dec: MsgDec = self._pld_dec limit_dec: MsgDec = mk_dec(spec=spec) try: - self._pldec = limit_dec + self._pld_dec = limit_dec yield limit_dec finally: - self._pldec = orig_dec + self._pld_dec = orig_dec @property def dec(self) -> msgpack.Decoder: - return self._pldec.dec + return self._pld_dec.dec def recv_pld_nowait( self, # TODO: make this `MsgStream` compat as well, see above^ # ipc_prim: Context|MsgStream, - ctx: Context, + ipc: Context|MsgStream, ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, - + hide_tb: bool = False, **dec_msg_kwargs, ) -> Any|Raw: - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb msg: MsgType = ( ipc_msg or # sync-rx msg from underlying IPC feeder (mem-)chan - ctx._rx_chan.receive_nowait() + ipc._rx_chan.receive_nowait() ) return self.dec_msg( msg, - ctx=ctx, + ipc=ipc, expect_msg=expect_msg, + hide_tb=hide_tb, **dec_msg_kwargs, ) async def recv_pld( self, - ctx: Context, + ipc: Context|MsgStream, ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, hide_tb: bool = True, @@ -200,11 +205,11 @@ class PldRx(Struct): or # async-rx msg from underlying IPC feeder (mem-)chan - await ctx._rx_chan.receive() + await ipc._rx_chan.receive() ) return self.dec_msg( msg=msg, - ctx=ctx, + ipc=ipc, expect_msg=expect_msg, **dec_msg_kwargs, ) @@ -212,7 +217,7 @@ class PldRx(Struct): def dec_msg( self, msg: MsgType, - ctx: Context, + ipc: Context|MsgStream, expect_msg: Type[MsgType]|None, raise_error: bool = True, @@ -225,6 +230,9 @@ class PldRx(Struct): ''' __tracebackhide__: bool = hide_tb + + _src_err = None + src_err: BaseException|None = None match msg: # payload-data shuttle msg; deliver the `.pld` value # directly to IPC (primitive) client-consumer code. @@ -234,7 +242,7 @@ class PldRx(Struct): |Return(pld=pld) # termination phase ): try: - pld: PayloadT = self._pldec.decode(pld) + pld: PayloadT = self._pld_dec.decode(pld) log.runtime( 'Decoded msg payload\n\n' f'{msg}\n\n' @@ -243,25 +251,30 @@ class PldRx(Struct): ) return pld - # XXX pld-type failure - except ValidationError as src_err: + # XXX pld-value type failure + except ValidationError as valerr: + # pack mgterr into error-msg for + # reraise below; ensure remote-actor-err + # info is displayed nicely? msgterr: MsgTypeError = _mk_msg_type_err( msg=msg, codec=self.pld_dec, - src_validation_error=src_err, + src_validation_error=valerr, is_invalid_payload=True, ) msg: Error = pack_from_raise( local_err=msgterr, cid=msg.cid, - src_uid=ctx.chan.uid, + src_uid=ipc.chan.uid, ) + src_err = valerr # XXX some other decoder specific failure? # except TypeError as src_error: # from .devx import mk_pdb # mk_pdb().set_trace() # raise src_error + # ^-TODO-^ can remove? # a runtime-internal RPC endpoint response. # always passthrough since (internal) runtime @@ -299,6 +312,7 @@ class PldRx(Struct): return src_err case Stop(cid=cid): + ctx: Context = getattr(ipc, 'ctx', ipc) message: str = ( f'{ctx.side!r}-side of ctx received stream-`Stop` from ' f'{ctx.peer_side!r} peer ?\n' @@ -341,14 +355,21 @@ class PldRx(Struct): # |_https://docs.python.org/3.11/library/exceptions.html#BaseException.add_note # # fallthrough and raise from `src_err` - _raise_from_unexpected_msg( - ctx=ctx, - msg=msg, - src_err=src_err, - log=log, - expect_msg=expect_msg, - hide_tb=hide_tb, - ) + try: + _raise_from_unexpected_msg( + ctx=getattr(ipc, 'ctx', ipc), + msg=msg, + src_err=src_err, + log=log, + expect_msg=expect_msg, + hide_tb=hide_tb, + ) + except UnboundLocalError: + # XXX if there's an internal lookup error in the above + # code (prolly on `src_err`) we want to show this frame + # in the tb! + __tracebackhide__: bool = False + raise async def recv_msg_w_pld( self, @@ -378,52 +399,13 @@ class PldRx(Struct): # msg instance? pld: PayloadT = self.dec_msg( msg, - ctx=ipc, + ipc=ipc, expect_msg=expect_msg, **kwargs, ) return msg, pld -# Always maintain a task-context-global `PldRx` -_def_pld_rx: PldRx = PldRx( - _pldec=_def_any_pldec, -) -_ctxvar_PldRx: ContextVar[PldRx] = ContextVar( - 'pld_rx', - default=_def_pld_rx, -) - - -def current_pldrx() -> PldRx: - ''' - Return the current `trio.Task.context`'s msg-payload-receiver. - - A payload receiver is the IPC-msg processing sub-sys which - filters inter-actor-task communicated payload data, i.e. the - `PayloadMsg.pld: PayloadT` field value, AFTER it's container - shuttlle msg (eg. `Started`/`Yield`/`Return) has been delivered - up from `tractor`'s transport layer but BEFORE the data is - yielded to application code, normally via an IPC primitive API - like, for ex., `pld_data: PayloadT = MsgStream.receive()`. - - Modification of the current payload spec via `limit_plds()` - allows a `tractor` application to contextually filter IPC - payload content with a type specification as supported by - the interchange backend. - - - for `msgspec` see . - - NOTE that the `PldRx` itself is a per-`Context` global sub-system - that normally does not change other then the applied pld-spec - for the current `trio.Task`. - - ''' - # ctx: context = current_ipc_ctx() - # return ctx._pld_rx - return _ctxvar_PldRx.get() - - @cm def limit_plds( spec: Union[Type[Struct]], @@ -439,29 +421,55 @@ def limit_plds( ''' __tracebackhide__: bool = True try: - # sanity on orig settings - orig_pldrx: PldRx = current_pldrx() - orig_pldec: MsgDec = orig_pldrx.pld_dec + curr_ctx: Context = current_ipc_ctx() + rx: PldRx = curr_ctx._pld_rx + orig_pldec: MsgDec = rx.pld_dec - with orig_pldrx.limit_plds( + with rx.limit_plds( spec=spec, **kwargs, ) as pldec: - log.info( + log.runtime( 'Applying payload-decoder\n\n' f'{pldec}\n' ) yield pldec finally: - log.info( + log.runtime( 'Reverted to previous payload-decoder\n\n' f'{orig_pldec}\n' ) - assert ( - (pldrx := current_pldrx()) is orig_pldrx - and - pldrx.pld_dec is orig_pldec - ) + # sanity on orig settings + assert rx.pld_dec is orig_pldec + + +@acm +async def maybe_limit_plds( + ctx: Context, + spec: Union[Type[Struct]]|None = None, + **kwargs, +) -> MsgDec|None: + ''' + Async compat maybe-payload type limiter. + + Mostly for use inside other internal `@acm`s such that a separate + indent block isn't needed when an async one is already being + used. + + ''' + if spec is None: + yield None + return + + # sanity on scoping + curr_ctx: Context = current_ipc_ctx() + assert ctx is curr_ctx + + with ctx._pld_rx.limit_plds(spec=spec) as msgdec: + yield msgdec + + curr_ctx: Context = current_ipc_ctx() + assert ctx is curr_ctx async def drain_to_final_msg( @@ -543,21 +551,12 @@ async def drain_to_final_msg( match msg: # final result arrived! - case Return( - # cid=cid, - # pld=res, - ): - # ctx._result: Any = res - ctx._result: Any = pld + case Return(): log.runtime( 'Context delivered final draining msg:\n' f'{pretty_struct.pformat(msg)}' ) - # XXX: only close the rx mem chan AFTER - # a final result is retreived. - # if ctx._rx_chan: - # await ctx._rx_chan.aclose() - # TODO: ^ we don't need it right? + ctx._result: Any = pld result_msg = msg break @@ -664,24 +663,6 @@ async def drain_to_final_msg( result_msg = msg break # OOOOOF, yeah obvi we need this.. - # XXX we should never really get here - # right! since `._deliver_msg()` should - # always have detected an {'error': ..} - # msg and already called this right!?! - # elif error := unpack_error( - # msg=msg, - # chan=ctx._portal.channel, - # hide_tb=False, - # ): - # log.critical('SHOULD NEVER GET HERE!?') - # assert msg is ctx._cancel_msg - # assert error.msgdata == ctx._remote_error.msgdata - # assert error.ipc_msg == ctx._remote_error.ipc_msg - # from .devx._debug import pause - # await pause() - # ctx._maybe_cancel_and_set_remote_error(error) - # ctx._maybe_raise_remote_err(error) - else: # bubble the original src key error raise -- 2.34.1 From e0dc1d73b2953047d9f2466e93c0b972b62f7c38 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 15:47:01 -0400 Subject: [PATCH 106/305] Expose `tractor.current_ipc_ctx()` at pkg level --- tractor/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tractor/__init__.py b/tractor/__init__.py index bd9b8610..c15a391b 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -43,6 +43,7 @@ from ._supervise import ( from ._state import ( current_actor as current_actor, is_root_process as is_root_process, + current_ipc_ctx as current_ipc_ctx, ) from ._exceptions import ( ContextCancelled as ContextCancelled, -- 2.34.1 From 452094df278282b1bb931ff3b378c2827a98bcee Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:07:57 -0400 Subject: [PATCH 107/305] Adjust `Portal` usage of `Context.pld_rx` Pass the new `ipc` arg and try to show api frames when an unexpected internal error is detected. --- tractor/_portal.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 700f2fdc..2c676e12 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -166,13 +166,19 @@ class Portal: assert self._expect_result_ctx if self._final_result_msg is None: - ( - self._final_result_msg, - self._final_result_pld, - ) = await self._expect_result_ctx._pld_rx.recv_msg_w_pld( - ipc=self._expect_result_ctx, - expect_msg=Return, - ) + try: + ( + self._final_result_msg, + self._final_result_pld, + ) = await self._expect_result_ctx._pld_rx.recv_msg_w_pld( + ipc=self._expect_result_ctx, + expect_msg=Return, + ) + except BaseException as err: + # TODO: wrap this into `@api_frame` optionally with + # some kinda filtering mechanism like log levels? + __tracebackhide__: bool = False + raise err return self._final_result_pld @@ -306,7 +312,7 @@ class Portal: portal=self, ) return await ctx._pld_rx.recv_pld( - ctx=ctx, + ipc=ctx, expect_msg=Return, ) @@ -325,6 +331,8 @@ class Portal: remote rpc task or a local async generator instance. ''' + __runtimeframe__: int = 1 # noqa + if isinstance(func, str): warnings.warn( "`Portal.run(namespace: str, funcname: str)` is now" @@ -358,7 +366,7 @@ class Portal: portal=self, ) return await ctx._pld_rx.recv_pld( - ctx=ctx, + ipc=ctx, expect_msg=Return, ) -- 2.34.1 From 09948d71c6973dc32b786374a35467f68645351d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:11:59 -0400 Subject: [PATCH 108/305] Shield channel closing in `_connect_chan()` --- tractor/_ipc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 70774bed..511a053c 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -716,4 +716,5 @@ async def _connect_chan( chan = Channel((host, port)) await chan.connect() yield chan - await chan.aclose() + with trio.CancelScope(shield=True): + await chan.aclose() -- 2.34.1 From e133911a447820ea6a1afbe11615ada0f587e1cc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:12:51 -0400 Subject: [PATCH 109/305] Add error suppress flag to `current_ipc_ctx()` --- tractor/_state.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tractor/_state.py b/tractor/_state.py index a3729833..8c5cca14 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -124,9 +124,15 @@ _ctxvar_Context: ContextVar[Context] = ContextVar( ) -def current_ipc_ctx() -> Context: +def current_ipc_ctx( + error_on_not_set: bool = False, +) -> Context|None: ctx: Context = _ctxvar_Context.get() - if not ctx: + + if ( + not ctx + and error_on_not_set + ): from ._exceptions import InternalError raise InternalError( 'No IPC context has been allocated for this task yet?\n' -- 2.34.1 From 5668328c8f2da47c84fc63a81841c23f97c21336 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 16:18:42 -0400 Subject: [PATCH 110/305] Set `_ctxvar_Context` for child-side RPC tasks Just inside `._invoke()` after the `ctx: Context` is retrieved. Also try our best to *not hide* internal frames when a non-user-code crash happens, normally either due to a runtime RPC EP bug or a transport failure. --- tractor/_rpc.py | 55 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index b8dc42b6..0f200d0e 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -69,7 +69,6 @@ from .msg import ( from tractor.msg.types import ( CancelAck, Error, - Msg, MsgType, Return, Start, @@ -248,10 +247,17 @@ async def _errors_relayed_via_ipc( ] = trio.TASK_STATUS_IGNORED, ) -> None: + # NOTE: we normally always hide this frame in call-stack tracebacks + # if the crash originated from an RPC task (since normally the + # user is only going to care about their own code not this + # internal runtime frame) and we DID NOT + # fail due to an IPC transport error! __tracebackhide__: bool = hide_tb + # TODO: a debug nursery when in debug mode! # async with maybe_open_debugger_nursery() as debug_tn: # => see matching comment in side `._debug._pause()` + rpc_err: BaseException|None = None try: yield # run RPC invoke body @@ -262,16 +268,7 @@ async def _errors_relayed_via_ipc( BaseExceptionGroup, KeyboardInterrupt, ) as err: - - # NOTE: always hide this frame from debug REPL call stack - # if the crash originated from an RPC task and we DID NOT - # fail due to an IPC transport error! - if ( - is_rpc - and - chan.connected() - ): - __tracebackhide__: bool = hide_tb + rpc_err = err # TODO: maybe we'll want different "levels" of debugging # eventualy such as ('app', 'supervisory', 'runtime') ? @@ -316,11 +313,19 @@ async def _errors_relayed_via_ipc( api_frame=inspect.currentframe(), ) if not entered_debug: + # if we prolly should have entered the REPL but + # didn't, maybe there was an internal error in + # the above code and we do want to show this + # frame! + if _state.debug_mode(): + __tracebackhide__: bool = False + log.exception( 'RPC task crashed\n' f'|_{ctx}' ) + # ALWAYS try to ship RPC errors back to parent/caller task if is_rpc: @@ -353,6 +358,20 @@ async def _errors_relayed_via_ipc( # `Actor._service_n`, we add "handles" to each such that # they can be individually ccancelled. finally: + + # if the error is not from user code and instead a failure + # of a runtime RPC or transport failure we do prolly want to + # show this frame + if ( + rpc_err + and ( + not is_rpc + or + not chan.connected() + ) + ): + __tracebackhide__: bool = False + try: ctx: Context func: Callable @@ -442,9 +461,10 @@ async def _invoke( # open the stream with this option. # allow_overruns=True, ) - context: bool = False + context_ep_func: bool = False - assert not _state._ctxvar_Context.get() + # set the current IPC ctx var for this RPC task + _state._ctxvar_Context.set(ctx) # TODO: deprecate this style.. if getattr(func, '_tractor_stream_function', False): @@ -473,7 +493,7 @@ async def _invoke( # handle decorated ``@tractor.context`` async function elif getattr(func, '_tractor_context_function', False): kwargs['ctx'] = ctx - context = True + context_ep_func = True # errors raised inside this block are propgated back to caller async with _errors_relayed_via_ipc( @@ -499,7 +519,7 @@ async def _invoke( raise # TODO: impl all these cases in terms of the `Context` one! - if not context: + if not context_ep_func: await _invoke_non_context( actor, cancel_scope, @@ -569,7 +589,6 @@ async def _invoke( async with trio.open_nursery() as tn: ctx._scope_nursery = tn ctx._scope = tn.cancel_scope - _state._ctxvar_Context.set(ctx) task_status.started(ctx) # TODO: should would be nice to have our @@ -829,7 +848,7 @@ async def process_messages( (as utilized inside `Portal.cancel_actor()` ). ''' - assert actor._service_n # state sanity + assert actor._service_n # runtime state sanity # TODO: once `trio` get's an "obvious way" for req/resp we # should use it? @@ -842,7 +861,7 @@ async def process_messages( # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L1175 # - https://github.com/aiortc/aioquic/blob/main/src/aioquic/quic/connection.py#L659 nursery_cancelled_before_task: bool = False - msg: Msg|None = None + msg: MsgType|None = None try: # NOTE: this internal scope allows for keeping this # message loop running despite the current task having -- 2.34.1 From 3b5970f12b8c5cd448a3d9887e3761e59e582022 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 20 May 2024 17:04:30 -0400 Subject: [PATCH 111/305] Show runtime nursery frames on internal errors Much like other recent changes attempt to detect runtime-bug-causing crashes and only show the runtime-endpoint frame when present. Adds a `ActorNursery._scope_error: BaseException|None` attr to aid with detection. Also toss in some todo notes for removing and replacing the `.run_in_actor()` method API. --- tractor/_supervise.py | 50 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/tractor/_supervise.py b/tractor/_supervise.py index 59ec728b..8f3574bb 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -84,6 +84,7 @@ class ActorNursery: ria_nursery: trio.Nursery, da_nursery: trio.Nursery, errors: dict[tuple[str, str], BaseException], + ) -> None: # self.supervisor = supervisor # TODO self._actor: Actor = actor @@ -105,6 +106,7 @@ class ActorNursery: self._at_least_one_child_in_debug: bool = False self.errors = errors self.exited = trio.Event() + self._scope_error: BaseException|None = None # NOTE: when no explicit call is made to # `.open_root_actor()` by application code, @@ -117,7 +119,9 @@ class ActorNursery: async def start_actor( self, name: str, + *, + bind_addrs: list[tuple[str, int]] = [_default_bind_addr], rpc_module_paths: list[str]|None = None, enable_modules: list[str]|None = None, @@ -125,6 +129,7 @@ class ActorNursery: nursery: trio.Nursery|None = None, debug_mode: bool|None = None, infect_asyncio: bool = False, + ) -> Portal: ''' Start a (daemon) actor: an process that has no designated @@ -189,6 +194,13 @@ class ActorNursery: ) ) + # TODO: DEPRECATE THIS: + # -[ ] impl instead as a hilevel wrapper on + # top of a `@context` style invocation. + # |_ dynamic @context decoration on child side + # |_ implicit `Portal.open_context() as (ctx, first):` + # and `return first` on parent side. + # -[ ] use @api_frame on the wrapper async def run_in_actor( self, @@ -221,7 +233,7 @@ class ActorNursery: # use the explicit function name if not provided name = fn.__name__ - portal = await self.start_actor( + portal: Portal = await self.start_actor( name, enable_modules=[mod_path] + ( enable_modules or rpc_module_paths or [] @@ -250,6 +262,7 @@ class ActorNursery: ) return portal + # @api_frame async def cancel( self, hard_kill: bool = False, @@ -346,7 +359,12 @@ async def _open_and_supervise_one_cancels_all_nursery( actor: Actor, ) -> typing.AsyncGenerator[ActorNursery, None]: - __tracebackhide__ = True + + # normally don't need to show user by default + __tracebackhide__: bool = True + + outer_err: BaseException|None = None + inner_err: BaseException|None = None # the collection of errors retreived from spawned sub-actors errors: dict[tuple[str, str], BaseException] = {} @@ -356,7 +374,7 @@ async def _open_and_supervise_one_cancels_all_nursery( # handling errors that are generated by the inner nursery in # a supervisor strategy **before** blocking indefinitely to wait for # actors spawned in "daemon mode" (aka started using - # ``ActorNursery.start_actor()``). + # `ActorNursery.start_actor()`). # errors from this daemon actor nursery bubble up to caller async with trio.open_nursery() as da_nursery: @@ -391,7 +409,8 @@ async def _open_and_supervise_one_cancels_all_nursery( ) an._join_procs.set() - except BaseException as inner_err: + except BaseException as _inner_err: + inner_err = _inner_err errors[actor.uid] = inner_err # If we error in the root but the debugger is @@ -469,8 +488,10 @@ async def _open_and_supervise_one_cancels_all_nursery( Exception, BaseExceptionGroup, trio.Cancelled + ) as _outer_err: + outer_err = _outer_err - ) as err: + an._scope_error = outer_err or inner_err # XXX: yet another guard before allowing the cancel # sequence in case a (single) child is in debug. @@ -485,7 +506,7 @@ async def _open_and_supervise_one_cancels_all_nursery( if an._children: log.cancel( 'Actor-nursery cancelling due error type:\n' - f'{err}\n' + f'{outer_err}\n' ) with trio.CancelScope(shield=True): await an.cancel() @@ -512,6 +533,13 @@ async def _open_and_supervise_one_cancels_all_nursery( else: raise list(errors.values())[0] + # show frame on any (likely) internal error + if ( + not an.cancelled + and an._scope_error + ): + __tracebackhide__: bool = False + # da_nursery scope end - nursery checkpoint # final exit @@ -537,7 +565,7 @@ async def open_nursery( which cancellation scopes correspond to each spawned subactor set. ''' - __tracebackhide__ = True + __tracebackhide__: bool = True implicit_runtime: bool = False actor: Actor = current_actor(err_on_no_runtime=False) an: ActorNursery|None = None @@ -588,6 +616,14 @@ async def open_nursery( an.exited.set() finally: + # show frame on any internal runtime-scope error + if ( + an + and not an.cancelled + and an._scope_error + ): + __tracebackhide__: bool = False + msg: str = ( 'Actor-nursery exited\n' f'|_{an}\n' -- 2.34.1 From f0912c98596fc823badb055c31f38b064d519737 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 21 May 2024 09:19:56 -0400 Subject: [PATCH 112/305] Resolve remaining debug-request race causing hangs More or less by pedantically separating and managing root and subactor request syncing events to always be managed by the locking IPC context task-funcs: - for the root's "child"-side, `lock_tty_for_child()` directly creates and sets a new `Lock.req_handler_finished` inside a `finally:` - for the sub's "parent"-side, `request_root_stdio_lock()` does the same with a new `DebugStatus.req_finished` event and separates it from the `.repl_release` event (which indicates a "c" or "q" from user and thus exit of the REPL session) as well as sets a new `.req_task: trio.Task` to explicitly distinguish from the app-user-task that enters the REPL vs. the paired bg task used to request the global root's stdio mutex alongside it. - apply the `__pld_spec__` on "child"-side of the ctx using the new `Portal.open_context(pld_spec)` parameter support; drops use of any `ContextVar` malarky used prior for `PldRx` mgmt. - removing `Lock.no_remote_has_tty` since it was a nebulous name and from the prior "everything is in a `Lock`" design.. ------ - ------ More rigorous impl to handle various edge cases in `._pause()`: - rejig `_enter_repl_sync()` to wrap the `debug_func == None` case inside maybe-internal-error handler blocks. - better logic for recurrent vs. multi-task contention for REPL entry in subactors, by guarding using `DebugStatus.req_task` and by now waiting on the new `DebugStatus.req_finished` for the multi-task contention case. - even better internal error handling and reporting for when this code is hacked on and possibly broken ;p ------ - ------ Updates to `.pause_from_sync()` support: - add optional `actor`, `task` kwargs to `_set_trace()` to allow compat with the new explicit `debug_func` calling in `._pause()` and pass a `threading.Thread` for `task` in the `.to_thread()` usage case. - add an `except` block that tries to show the frame on any internal error. ------ - ------ Relatedly includes a buncha cleanups/simplifications somewhat in prep for some coming refinements (around `DebugStatus`): - use all the new attrs mentioned above as needed in the SIGINT shielder. - wait on `Lock.req_handler_finished` in `maybe_wait_for_debugger()`. - dropping a ton of masked legacy code left in during the recent reworks. - better comments, like on the use of `Context._scope` for shielding on the "child"-side to avoid the need to manage yet another cs. - add/change-to lotsa `log.devx()` level emissions for those infos which are handy while hacking on the debugger but not ideal/necessary to be user visible. - obvi add lotsa follow up todo notes! --- tractor/devx/_debug.py | 824 ++++++++++++++++++++++------------------- 1 file changed, 446 insertions(+), 378 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 1e82122c..877d2de6 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -73,10 +73,10 @@ from tractor._state import ( debug_mode, current_ipc_ctx, ) -from .pformat import ( - # pformat_caller_frame, - pformat_cs, -) +# from .pformat import ( +# pformat_caller_frame, +# pformat_cs, +# ) if TYPE_CHECKING: from tractor._ipc import Channel @@ -190,8 +190,8 @@ class Lock: # a stale lock condition (eg. IPC failure with the locking # child ctx_in_debug: Context|None = None + req_handler_finished: trio.Event|None = None - no_remote_has_tty: trio.Event|None = None _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() _blocked: set[ tuple[str, str] # `Actor.uid` for per actor @@ -209,13 +209,12 @@ class Lock: if is_root_process(): lock_stats: trio.LockStatistics = cls._debug_lock.statistics() fields += ( - f'no_remote_has_tty: {cls.no_remote_has_tty}\n' + f'req_handler_finished: {cls.req_handler_finished}\n' + f'_blocked: {cls._blocked}\n\n' - - f'ctx_in_debug: {cls.ctx_in_debug}\n\n' - f'_debug_lock: {cls._debug_lock}\n' f'lock_stats: {lock_stats}\n' + ) body: str = textwrap.indent( @@ -225,7 +224,9 @@ class Lock: return ( f'<{cls.__name__}(\n' f'{body}' - ')>' + ')>\n\n' + + f'{cls.ctx_in_debug}\n' ) @classmethod @@ -234,16 +235,23 @@ class Lock: cls, force: bool = False, ): + message: str = 'TTY lock not held by any child\n' + + if not (is_trio_main := DebugStatus.is_main_trio_thread()): + task: threading.Thread = threading.current_thread() + else: + task: trio.Task = current_task() + try: lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( lock.locked() and - owner is current_task() + owner is task # ^-NOTE-^ if not will raise a RTE.. ): - if not DebugStatus.is_main_trio_thread(): + if not is_trio_main: trio.from_thread.run_sync( cls._debug_lock.release ) @@ -251,45 +259,27 @@ class Lock: cls._debug_lock.release() message: str = 'TTY lock released for child\n' - else: - message: str = 'TTY lock not held by any child\n' - finally: # IFF there are no more requesting tasks queued up fire, the # "tty-unlocked" event thereby alerting any monitors of the lock that # we are now back in the "tty unlocked" state. This is basically # and edge triggered signal around an empty queue of sub-actor # tasks that may have tried to acquire the lock. - stats = cls._debug_lock.statistics() + lock_stats = cls._debug_lock.statistics() + req_handler_finished: trio.Event|None = Lock.req_handler_finished if ( - not stats.owner + not lock_stats.owner or force - # and cls.no_remote_has_tty is not None + and req_handler_finished is None ): message += '-> No more child ctx tasks hold the TTY lock!\n' - # set and release - if cls.no_remote_has_tty is not None: - cls.no_remote_has_tty.set() - cls.no_remote_has_tty = None - - # cls.remote_task_in_debug = None - - else: - message += ( - f'-> Not signalling `Lock.no_remote_has_tty` since it has value:{cls.no_remote_has_tty}\n' - ) - - else: - # wakeup any waiters since the lock was released - # (presumably) temporarily. - if no_remote_has_tty := cls.no_remote_has_tty: - no_remote_has_tty.set() - no_remote_has_tty = trio.Event() - + elif req_handler_finished: + req_stats = req_handler_finished.statistics() message += ( f'-> A child ctx task still owns the `Lock` ??\n' - f' |_owner task: {stats.owner}\n' + f' |_lock_stats: {lock_stats}\n' + f' |_req_stats: {req_stats}\n' ) cls.ctx_in_debug = None @@ -299,8 +289,6 @@ class Lock: async def acquire( cls, ctx: Context, - # subactor_uid: tuple[str, str], - # remote_task_uid: str, ) -> AsyncIterator[trio.StrictFIFOLock]: ''' @@ -328,7 +316,6 @@ class Lock: ) stats = cls._debug_lock.statistics() if owner := stats.owner: - # and cls.no_remote_has_tty is not None pre_msg += ( f'\n' f'`Lock` already held by local task?\n' @@ -347,12 +334,6 @@ class Lock: await cls._debug_lock.acquire() cls.ctx_in_debug = ctx we_acquired = True - if cls.no_remote_has_tty is None: - # mark the tty lock as being in use so that the runtime - # can try to avoid clobbering any connection from a child - # that's currently relying on it. - cls.no_remote_has_tty = trio.Event() - # cls.remote_task_in_debug = remote_task_uid log.runtime( f'TTY lock acquired for sub-actor\n' @@ -373,11 +354,7 @@ class Lock: finally: message :str = 'Exiting `Lock.acquire()` on behalf of sub-actor\n' - if ( - we_acquired - # and - # cls._debug_lock.locked() - ): + if we_acquired: message += '-> TTY lock released by child\n' cls.release() @@ -392,7 +369,6 @@ class Lock: @tractor.context async def lock_tty_for_child( - ctx: Context, subactor_task_uid: tuple[str, int], @@ -409,13 +385,11 @@ async def lock_tty_for_child( ''' subactor_uid: tuple[str, str] = ctx.chan.uid - # NOTE: we use the IPC ctx's cancel scope directly in order to - # ensure that on any transport failure, or cancellation request - # from the child we expect - # `Context._maybe_cancel_and_set_remote_error()` to cancel this - # scope despite the shielding we apply below. - debug_lock_cs: CancelScope = ctx._scope + # mark the tty lock as being in use so that the runtime + # can try to avoid clobbering any connection from a child + # that's currently relying on it. + we_finished = Lock.req_handler_finished = trio.Event() try: if ctx.cid in Lock._blocked: raise RuntimeError( @@ -437,18 +411,15 @@ async def lock_tty_for_child( f'remote task: {subactor_task_uid}\n' ) ctx._enter_debugger_on_cancel: bool = False - await ctx.cancel(f'Debug lock blocked for {subactor_uid}') - # TODO: remove right? - # return LockStatus( - # subactor_uid=subactor_uid, - # cid=ctx.cid, - # locked=False, - # ) + message: str = ( + f'Debug lock blocked for {subactor_uid}\n' + 'Cancelling debug request!\n' + ) + log.cancel(message) + await ctx.cancel() + raise DebugRequestError(message) - # TODO: when we get to true remote debugging - # this will deliver stdin data? - - log.debug( + log.devx( 'Subactor attempting to acquire TTY lock\n' f'root task: {root_task_name}\n' f'subactor_uid: {subactor_uid}\n' @@ -456,13 +427,33 @@ async def lock_tty_for_child( ) DebugStatus.shield_sigint() Lock._blocked.add(ctx.cid) - with ( - # enable the locking msgspec - apply_debug_pldec(), - ): + + # NOTE: we use the IPC ctx's cancel scope directly in order to + # ensure that on any transport failure, or cancellation request + # from the child we expect + # `Context._maybe_cancel_and_set_remote_error()` to cancel this + # scope despite the shielding we apply below. + debug_lock_cs: CancelScope = ctx._scope + + # TODO: use `.msg._ops.maybe_limit_plds()` here instead so we + # can merge into a single async with, with the + # `Lock.acquire()` enter below? + # + # enable the locking msgspec + with apply_debug_pldec(): async with Lock.acquire(ctx=ctx): debug_lock_cs.shield = True + log.devx( + 'Subactor acquired debugger request lock!\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n\n' + + 'Sending `ctx.started(LockStatus)`..\n' + + ) + # indicate to child that we've locked stdio await ctx.started( LockStatus( @@ -472,7 +463,9 @@ async def lock_tty_for_child( ) ) - log.debug( f'Actor {subactor_uid} acquired TTY lock') + log.devx( + f'Actor {subactor_uid} acquired `Lock` via debugger request' + ) # wait for unlock pdb by child async with ctx.open_stream() as stream: @@ -480,14 +473,16 @@ async def lock_tty_for_child( # TODO: security around only releasing if # these match? - log.pdb( + log.devx( f'TTY lock released requested\n\n' f'{release_msg}\n' ) assert release_msg.cid == ctx.cid assert release_msg.subactor_uid == tuple(subactor_uid) - log.debug(f'Actor {subactor_uid} released TTY lock') + log.devx( + f'Actor {subactor_uid} released TTY lock' + ) return LockStatus( subactor_uid=subactor_uid, @@ -497,29 +492,33 @@ async def lock_tty_for_child( except BaseException as req_err: message: str = ( - 'Forcing `Lock.release()` since likely an internal error!\n' + 'Forcing `Lock.release()` for req-ctx since likely an ' + 'internal error!\n\n' + f'{ctx}' ) if isinstance(req_err, trio.Cancelled): - log.cancel( + message = ( 'Cancelled during root TTY-lock dialog?\n' + message ) else: - log.exception( + message = ( 'Errored during root TTY-lock dialog?\n' + message ) + log.exception(message) Lock.release(force=True) raise finally: Lock._blocked.remove(ctx.cid) - if (no_locker := Lock.no_remote_has_tty): - no_locker.set() + # wakeup any waiters since the lock was (presumably) + # released, possibly only temporarily. + we_finished.set() DebugStatus.unshield_sigint() @@ -538,14 +537,23 @@ class DebugStatus: ''' repl: PdbREPL|None = None + + # TODO: yet again this looks like a task outcome where we need + # to sync to the completion of one task (and get its result) + # being used everywhere for syncing.. + # -[ ] see if we can get our proto oco task-mngr to work for + # this? repl_task: Task|None = None + repl_release: trio.Event|None = None + + req_task: Task|None = None req_ctx: Context|None = None req_cs: CancelScope|None = None - repl_release: trio.Event|None = None req_finished: trio.Event|None = None - lock_status: LockStatus|None = None req_err: BaseException|None = None + lock_status: LockStatus|None = None + _orig_sigint_handler: Callable|None = None _trio_handler: ( Callable[[int, FrameType|None], Any] @@ -715,13 +723,13 @@ class DebugStatus: f'{cls.repl_task}\n' ) - # restore original sigint handler - cls.unshield_sigint() - # actor-local state, irrelevant for non-root. cls.repl_task = None cls.repl = None + # restore original sigint handler + cls.unshield_sigint() + class TractorConfig(pdbp.DefaultConfig): ''' @@ -814,17 +822,6 @@ class PdbREPL(pdbp.Pdb): ): Lock.release() - # TODO: special handling where we just want the next LOC and - # not to resume to the next pause/crash point? - # def set_next( - # self, - # frame: FrameType - # ) -> None: - # try: - # super().set_next(frame) - # finally: - # pdbp.set_trace() - # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch # my d$%&. @@ -855,6 +852,9 @@ class PdbREPL(pdbp.Pdb): return None +# TODO: prolly remove this and instead finally get our @context API +# supporting a msg/pld-spec via type annots as per, +# https://github.com/goodboy/tractor/issues/365 @cm def apply_debug_pldec() -> _codec.MsgCodec: ''' @@ -865,8 +865,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: from tractor.msg import ( _ops as msgops, ) - orig_plrx: msgops.PldRx = msgops.current_pldrx() - orig_pldec: msgops.MsgDec = orig_plrx.pld_dec + cctx: Context = current_ipc_ctx() + rx: msgops.PldRx = cctx.pld_rx + orig_pldec: msgops.MsgDec = rx.pld_dec try: with msgops.limit_plds( @@ -875,9 +876,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: assert ( debug_dec is - msgops.current_pldrx().pld_dec + rx.pld_dec ) - log.info( + log.runtime( 'Applied `.devx._debug` pld-spec\n\n' f'{debug_dec}\n' ) @@ -885,11 +886,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: finally: assert ( - (plrx := msgops.current_pldrx()) is orig_plrx - and - plrx.pld_dec is orig_pldec + rx.pld_dec is orig_pldec ) - log.info( + log.runtime( 'Reverted to previous pld-spec\n\n' f'{orig_pldec}\n' ) @@ -898,7 +897,9 @@ def apply_debug_pldec() -> _codec.MsgCodec: async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], - task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED + + shield: bool = False, + task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED, ): ''' Connect to the root actor of this process tree and RPC-invoke @@ -915,7 +916,7 @@ async def request_root_stdio_lock( ''' - log.pdb( + log.devx( 'Initing stdio-lock request task with root actor' ) # TODO: likely we can implement this mutex more generally as @@ -928,40 +929,22 @@ async def request_root_stdio_lock( # -[ ] technically we need a `RLock` since re-acquire should be a noop # - https://docs.python.org/3.8/library/multiprocessing.html#multiprocessing.RLock DebugStatus.req_finished = trio.Event() + DebugStatus.req_task = current_task() try: from tractor._discovery import get_root - from tractor.msg import _ops as msgops - debug_dec: msgops.MsgDec - with ( - # NOTE: we need this to ensure that this task exits - # BEFORE the REPl instance raises an error like - # `bdb.BdbQuit` directly, OW you get a trio cs stack - # corruption! - # Further, the since this task is spawned inside the - # `Context._scope_nursery: trio.Nursery`, once an RPC - # task errors that cs is cancel_called and so if we want - # to debug the TPC task that failed we need to shield - # against that expected `.cancel()` call and instead - # expect all of the `PdbREPL`.set_[continue/quit/]()` - # methods to unblock this task by setting the - # `.repl_release: # trio.Event`. - trio.CancelScope(shield=True) as req_cs, - - # NOTE: set it here in the locker request task bc it's - # possible for multiple such requests for the lock in any - # single sub-actor AND there will be a race between when the - # root locking task delivers the `Started(pld=LockStatus)` - # and when the REPL is actually entered by the requesting - # application task who called - # `.pause()`/`.post_mortem()`. - # - # SO, applying the pld-spec here means it is only applied to - # this IPC-ctx request task, NOT any other task(s) - # including the one that actually enters the REPL. This - # is oc desired bc ow the debugged task will msg-type-error. - # - apply_debug_pldec() as debug_dec, - ): + # NOTE: we need this to ensure that this task exits + # BEFORE the REPl instance raises an error like + # `bdb.BdbQuit` directly, OW you get a trio cs stack + # corruption! + # Further, the since this task is spawned inside the + # `Context._scope_nursery: trio.Nursery`, once an RPC + # task errors that cs is cancel_called and so if we want + # to debug the TPC task that failed we need to shield + # against that expected `.cancel()` call and instead + # expect all of the `PdbREPL`.set_[continue/quit/]()` + # methods to unblock this task by setting the + # `.repl_release: # trio.Event`. + with trio.CancelScope(shield=shield) as req_cs: # XXX: was orig for debugging cs stack corruption.. # log.info( # 'Request cancel-scope is:\n\n' @@ -972,46 +955,49 @@ async def request_root_stdio_lock( try: # TODO: merge into single async with ? async with get_root() as portal: - async with portal.open_context( lock_tty_for_child, subactor_task_uid=task_uid, + # NOTE: set it here in the locker request task bc it's + # possible for multiple such requests for the lock in any + # single sub-actor AND there will be a race between when the + # root locking task delivers the `Started(pld=LockStatus)` + # and when the REPL is actually entered by the requesting + # application task who called + # `.pause()`/`.post_mortem()`. + # + # SO, applying the pld-spec here means it is only applied to + # this IPC-ctx request task, NOT any other task(s) + # including the one that actually enters the REPL. This + # is oc desired bc ow the debugged task will msg-type-error. + pld_spec=__pld_spec__, + ) as (req_ctx, status): DebugStatus.req_ctx = req_ctx - - # sanity checks on pld-spec limit state - assert debug_dec - # curr_pldrx: msgops.PldRx = msgops.current_pldrx() - # assert ( - # curr_pldrx.pld_dec is debug_dec - # ) - - log.debug( + log.devx( 'Subactor locked TTY with msg\n\n' f'{status}\n' ) - # mk_pdb().set_trace() - try: - assert status.subactor_uid == actor_uid - assert status.cid - except AttributeError: - log.exception('failed pldspec asserts!') - raise + # try: + assert status.subactor_uid == actor_uid + assert status.cid + # except AttributeError: + # log.exception('failed pldspec asserts!') + # mk_pdb().set_trace() + # raise # set last rxed lock dialog status. DebugStatus.lock_status = status async with req_ctx.open_stream() as stream: - - assert DebugStatus.repl_release task_status.started(req_ctx) - # wait for local task to exit its - # `PdbREPL.interaction()`, call - # `DebugStatus.release()` and then - # unblock here. + # wait for local task to exit + # `PdbREPL.interaction()`, normally via + # a `DebugStatus.release()`call, and + # then unblock us here. await DebugStatus.repl_release.wait() await stream.send( LockRelease( @@ -1026,10 +1012,10 @@ async def request_root_stdio_lock( assert not status.locked DebugStatus.lock_status = status - log.pdb( + log.devx( 'TTY lock was released for subactor with msg\n\n' f'{status}\n\n' - f'Exitting {req_ctx.side!r}-side of locking req_ctx' + f'Exitting {req_ctx.side!r}-side of locking req_ctx\n' ) except ( @@ -1081,13 +1067,14 @@ async def request_root_stdio_lock( # ctl-c out of the currently hanging task! raise DebugRequestError( 'Failed to lock stdio from subactor IPC ctx!\n\n' - f'req_ctx: {req_ctx}\n' + f'req_ctx: {DebugStatus.req_ctx}\n' ) from req_err finally: - log.debug('Exiting debugger TTY lock request func from child') + log.devx('Exiting debugger TTY lock request func from child') # signal request task exit DebugStatus.req_finished.set() + DebugStatus.req_task = None def mk_pdb() -> PdbREPL: @@ -1321,31 +1308,40 @@ def shield_sigint_handler( DebugStatus.unshield_sigint() # do_cancel() - task: str|None = DebugStatus.repl_task + repl_task: str|None = DebugStatus.repl_task + req_task: str|None = DebugStatus.req_task if ( - task + repl_task and repl ): log.pdb( f'Ignoring SIGINT while local task using debug REPL\n' - f'|_{task}\n' + f'|_{repl_task}\n' f' |_{repl}\n' ) + elif req_task: + log.pdb( + f'Ignoring SIGINT while debug request task is open\n' + f'|_{req_task}\n' + ) else: msg: str = ( 'SIGINT shield handler still active BUT, \n\n' ) - if task is None: + if repl_task is None: msg += ( - f'- No local task claims to be in debug?\n' - f' |_{task}\n\n' + '- No local task claims to be in debug?\n' ) if repl is None: msg += ( - f'- No local REPL is currently active?\n' - f' |_{repl}\n\n' + '- No local REPL is currently active?\n' + ) + + if req_task is None: + msg += ( + '- No debug request task is active?\n' ) log.warning( @@ -1358,7 +1354,6 @@ def shield_sigint_handler( # XXX ensure that the reverted-to-handler actually is # able to rx what should have been **this** KBI ;) do_cancel() - # raise KeyboardInterrupt # TODO: how to handle the case of an intermediary-child actor # that **is not** marked in debug mode? See oustanding issue: @@ -1392,7 +1387,7 @@ def shield_sigint_handler( # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py # XXX only for tracing this handler - # log.warning('exiting SIGINT') + log.devx('exiting SIGINT') _pause_msg: str = 'Attaching to pdb REPL in actor' @@ -1420,14 +1415,9 @@ async def _pause( # is always show in the debugger on entry.. and there seems to # be no way to override it?.. # - # shield: bool = False, - hide_tb: bool = True, - - # bc, `debug_func()`, `_enter_repl_sync()` and `_pause()` - # extra_frames_up_when_async: int = 3, - + shield: bool = False, + hide_tb: bool = False, task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, - **debug_func_kwargs, ) -> None: @@ -1452,6 +1442,87 @@ async def _pause( 'for infected `asyncio` mode!' ) from rte + if debug_func is not None: + debug_func = partial(debug_func) + + repl: PdbREPL = repl or mk_pdb() + + # XXX NOTE XXX set it here to avoid ctl-c from cancelling a debug + # request from a subactor BEFORE the REPL is entered by that + # process. + DebugStatus.shield_sigint() + + # TODO: move this into a `open_debug_request()` @acm? + # -[ ] prolly makes the most sense to do the request + # task spawn as part of an `@acm` api which delivers the + # `DebugRequest` instance and ensures encapsing all the + # pld-spec and debug-nursery? + # -[ ] maybe make this a `PdbREPL` method or mod func? + # -[ ] factor out better, main reason for it is common logic for + # both root and sub repl entry + def _enter_repl_sync( + debug_func: Callable, + ) -> None: + __tracebackhide__: bool = hide_tb + + try: + # set local actor task to avoid recurrent + # entries/requests from the same local task (to the root + # process). + DebugStatus.repl_task = task + DebugStatus.repl = repl + + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + if debug_func is None: + task_status.started(DebugStatus) + + else: + log.warning( + 'Entering REPL for task fuck you!\n' + f'{task}\n' + ) + # block here one (at the appropriate frame *up*) where + # ``breakpoint()`` was awaited and begin handling stdio. + log.devx( + 'Entering sync world of the `pdb` REPL for task..\n' + f'{repl}\n' + f' |_{task}\n' + ) + + # invoke the low-level REPL activation routine which itself + # should call into a `Pdb.set_trace()` of some sort. + debug_func( + repl=repl, + hide_tb=hide_tb, + **debug_func_kwargs, + ) + + except trio.Cancelled: + log.exception( + 'Cancelled during invoke of internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # XXX NOTE: DON'T release lock yet + raise + + except BaseException: + __tracebackhide__: bool = False + log.exception( + 'Failed to invoke internal `debug_func = ' + f'{debug_func.func.__name__}`\n' + ) + # NOTE: OW this is ONLY called from the + # `.set_continue/next` hooks! + DebugStatus.release(cancel_req_task=True) + + raise + + log.devx( + 'Entering `._pause()` for requesting task\n' + f'|_{task}\n' + ) + # TODO: this should be created as part of `DebugRequest()` init # which should instead be a one-shot-use singleton much like # the `PdbREPL`. @@ -1461,71 +1532,9 @@ async def _pause( DebugStatus.repl_release.is_set() ): DebugStatus.repl_release = trio.Event() - - if debug_func is not None: - debug_func = partial(debug_func) - - repl: PdbREPL = repl or mk_pdb() - - # TODO: maybe make this a `PdbREPL` method or mod func? - # -[ ] factor out better, main reason for it is common logic for - # both root and sub repl entry - def _enter_repl_sync( - debug_func: Callable, - ) -> None: - __tracebackhide__: bool = hide_tb - - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - # - if debug_func is None: - task_status.started(DebugStatus) - else: - # block here one (at the appropriate frame *up*) where - # ``breakpoint()`` was awaited and begin handling stdio. - log.debug('Entering sync world of the `pdb` REPL..') - - # XXX used by the SIGINT handler to check if - # THIS actor is in REPL interaction - try: - # TODO: move this into a `open_debug_request()` @acm? - # -[ ] prolly makes the most send to do the request - # task spawn as part of an `@acm` api which - # delivers the `DebugRequest` instance and ensures - # encapsing all the pld-spec and debug-nursery? - # - # set local actor task to avoid recurrent - # entries/requests from the same local task - # (to the root process). - DebugStatus.repl_task = task - DebugStatus.repl = repl - DebugStatus.shield_sigint() - - # enter `PdbREPL` specific method - debug_func( - repl=repl, - hide_tb=hide_tb, - **debug_func_kwargs, - ) - except trio.Cancelled: - log.exception( - 'Cancelled during invoke of internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) - # NOTE: DON'T release lock yet - raise - - except BaseException: - __tracebackhide__: bool = False - log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' - ) - # NOTE: OW this is ONLY called from the - # `.set_continue/next` hooks! - DebugStatus.release(cancel_req_task=True) - - raise + # ^-NOTE-^ this must be created BEFORE scheduling any subactor + # debug-req task since it needs to wait on it just after + # `.started()`-ing back its wrapping `.req_cs: CancelScope`. repl_err: BaseException|None = None try: @@ -1579,38 +1588,61 @@ async def _pause( not is_root_process() and actor._parent_chan # a connected child ): - if DebugStatus.repl_task: + repl_task: Task|None = DebugStatus.repl_task + req_task: Task|None = DebugStatus.req_task + if req_task: + log.warning( + f'Already an ongoing repl request?\n' + f'|_{req_task}\n\n' - # Recurrence entry case: this task already has the lock and - # is likely recurrently entering a breakpoint + f'REPL task is\n' + f'|_{repl_task}\n\n' + + ) + # Recurrent entry case. + # this task already has the lock and is likely + # recurrently entering a `.pause()`-point either bc, + # - someone is hacking on runtime internals and put + # one inside code that get's called on the way to + # this code, + # - a legit app task uses the 'next' command while in + # a REPL sesh, and actually enters another + # `.pause()` (in a loop or something). # - # NOTE: noop on recurrent entry case but we want to trigger - # a checkpoint to allow other actors error-propagate and - # potetially avoid infinite re-entries in some - # subactor that would otherwise not bubble until the - # next checkpoint was hit. + # XXX Any other cose is likely a bug. if ( - (repl_task := DebugStatus.repl_task) - and - repl_task is task + repl_task + ): + if repl_task is task: + log.warning( + f'{task.name}@{actor.uid} already has TTY lock\n' + f'ignoring..' + ) + await trio.lowlevel.checkpoint() + return + + else: + # if **this** actor is already in debug REPL we want + # to maintain actor-local-task mutex access, so block + # here waiting for the control to be released - this + # -> allows for recursive entries to `tractor.pause()` + log.warning( + f'{task}@{actor.uid} already has TTY lock\n' + f'waiting for release..' + ) + await DebugStatus.repl_release.wait() + await trio.sleep(0.1) + + elif ( + req_task ): log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'ignoring..' - ) - await trio.lowlevel.checkpoint() - return + 'Local task already has active debug request\n' + f'|_{task}\n\n' - # if **this** actor is already in debug REPL we want - # to maintain actor-local-task mutex access, so block - # here waiting for the control to be released - this - # -> allows for recursive entries to `tractor.pause()` - log.warning( - f'{task.name}@{actor.uid} already has TTY lock\n' - f'waiting for release..' - ) - await DebugStatus.repl_release.wait() - await trio.sleep(0.1) + 'Waiting for previous request to complete..\n' + ) + await DebugStatus.req_finished.wait() # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without @@ -1642,16 +1674,23 @@ async def _pause( # -[ ] we probably only need to allocate the nursery when # we detect the runtime is already in debug mode. # - # ctx: Context = await curr_ctx._debug_tn.start( + curr_ctx: Context = current_ipc_ctx() + # req_ctx: Context = await curr_ctx._debug_tn.start( + log.devx( + 'Starting request task\n' + f'|_{task}\n' + ) req_ctx: Context = await actor._service_n.start( - request_root_stdio_lock, - actor.uid, - (task.name, id(task)), # task uuid (effectively) + partial( + request_root_stdio_lock, + actor_uid=actor.uid, + task_uid=(task.name, id(task)), # task uuid (effectively) + shield=shield, + ) ) # XXX sanity, our locker task should be the one which # entered a new IPC ctx with the root actor, NOT the one # that exists around the task calling into `._pause()`. - curr_ctx: Context = current_ipc_ctx() assert ( req_ctx is @@ -1665,8 +1704,8 @@ async def _pause( # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? - except BaseException as repl_err: - if isinstance(repl_err, bdb.BdbQuit): + except BaseException as pause_err: + if isinstance(pause_err, bdb.BdbQuit): log.devx( 'REPL for pdb was quit!\n' ) @@ -1675,7 +1714,7 @@ async def _pause( # `Actor._service_n` might get closed before we can spawn # the request task, so just ignore expected RTE. elif ( - isinstance(repl_err, RuntimeError) + isinstance(pause_err, RuntimeError) and actor._cancel_called ): @@ -1698,13 +1737,22 @@ async def _pause( # sanity checks for ^ on request/status teardown assert DebugStatus.repl is None assert DebugStatus.repl_task is None - req_ctx: Context = DebugStatus.req_ctx - if req_ctx: - assert req_ctx._scope.cancel_called + + # sanity, for when hackin on all this? + if not isinstance(pause_err, trio.Cancelled): + req_ctx: Context = DebugStatus.req_ctx + if req_ctx: + # XXX, bc the child-task in root might cancel it? + # assert req_ctx._scope.cancel_called + assert req_ctx.maybe_error raise finally: + # set in finally block of func.. this can be synced-to + # eventually with a debug_nursery somehow? + # assert DebugStatus.req_task is None + # always show frame when request fails due to internal # failure in the above code (including an `BdbQuit`). if ( @@ -1721,9 +1769,15 @@ def _set_trace( # partial-ed in by `.pause()` api_frame: FrameType, + + # optionally passed in to provide support for + # `pause_from_sync()` where + actor: tractor.Actor|None = None, + task: trio.Task|None = None, ): __tracebackhide__: bool = hide_tb - actor: tractor.Actor = current_actor() + actor: tractor.Actor = actor or current_actor() + task: task or current_task() # else: # TODO: maybe print the actor supervion tree up to the @@ -1731,8 +1785,10 @@ def _set_trace( log.pdb( f'{_pause_msg}\n' '|\n' - # TODO: make an `Actor.__repr()__` - f'|_ {current_task()} @ {actor.uid}\n' + # TODO: more compact pformating? + # -[ ] make an `Actor.__repr()__` + # -[ ] should we use `log.pformat_task_uid()`? + f'|_ {task} @ {actor.uid}\n' ) # presuming the caller passed in the "api frame" # (the last frame before user code - like `.pause()`) @@ -1747,7 +1803,7 @@ def _set_trace( async def pause( *, - hide_tb: bool = True, + hide_tb: bool = False, api_frame: FrameType|None = None, # TODO: figure out how to still make this work: @@ -1798,8 +1854,7 @@ async def pause( _set_trace, api_frame=api_frame, ), - - # task_status=task_status, + shield=shield, **_pause_kwargs ) # XXX avoid cs stack corruption when `PdbREPL.interaction()` @@ -1867,88 +1922,97 @@ async def maybe_init_greenback( # normally by remapping python's builtin breakpoint() hook to this # runtime aware version which takes care of all . def pause_from_sync( + hide_tb: bool = False, + # proxied to `_pause()` + + **_pause_kwargs, + # for eg. + # shield: bool = False, + # api_frame: FrameType|None = None, + ) -> None: __tracebackhide__: bool = hide_tb - actor: tractor.Actor = current_actor( - err_on_no_runtime=False, - ) - log.debug( - f'{actor.uid}: JUST ENTERED `tractor.pause_from_sync()`' - f'|_{actor}\n' - ) - if not actor: - raise RuntimeError( - 'Not inside the `tractor`-runtime?\n' - '`tractor.pause_from_sync()` is not functional without a wrapping\n' - '- `async with tractor.open_nursery()` or,\n' - '- `async with tractor.open_root_actor()`\n' + try: + actor: tractor.Actor = current_actor( + err_on_no_runtime=False, ) - - # NOTE: once supported, remove this AND the one - # inside `._pause()`! - if actor.is_infected_aio(): - raise RuntimeError( - '`tractor.pause[_from_sync]()` not yet supported ' - 'for infected `asyncio` mode!' + log.debug( + f'{actor.uid}: JUST ENTERED `tractor.pause_from_sync()`' + f'|_{actor}\n' ) - - # raises on not-found by default - greenback: ModuleType = maybe_import_greenback() - mdb: PdbREPL = mk_pdb() - - # run async task which will lock out the root proc's TTY. - if not Lock.is_main_trio_thread(): - - # TODO: we could also check for a non-`.to_thread` context - # using `trio.from_thread.check_cancelled()` (says - # oremanj) wherein we get the following outputs: - # - # `RuntimeError`: non-`.to_thread` spawned thread - # noop: non-cancelled `.to_thread` - # `trio.Cancelled`: cancelled `.to_thread` - # - trio.from_thread.run( - partial( - pause, - debug_func=None, - pdb=mdb, - hide_tb=hide_tb, + if not actor: + raise RuntimeError( + 'Not inside the `tractor`-runtime?\n' + '`tractor.pause_from_sync()` is not functional without a wrapping\n' + '- `async with tractor.open_nursery()` or,\n' + '- `async with tractor.open_root_actor()`\n' ) - ) - # TODO: maybe the `trio.current_task()` id/name if avail? - DebugStatus.repl_task: str = str(threading.current_thread()) - else: # we are presumably the `trio.run()` + main thread - greenback.await_( - pause( - debug_func=None, - pdb=mdb, - hide_tb=hide_tb, + # NOTE: once supported, remove this AND the one + # inside `._pause()`! + if actor.is_infected_aio(): + raise RuntimeError( + '`tractor.pause[_from_sync]()` not yet supported ' + 'for infected `asyncio` mode!' ) + + # raises on not-found by default + greenback: ModuleType = maybe_import_greenback() + mdb: PdbREPL = mk_pdb() + + # run async task which will lock out the root proc's TTY. + if not DebugStatus.is_main_trio_thread(): + + # TODO: we could also check for a non-`.to_thread` context + # using `trio.from_thread.check_cancelled()` (says + # oremanj) wherein we get the following outputs: + # + # `RuntimeError`: non-`.to_thread` spawned thread + # noop: non-cancelled `.to_thread` + # `trio.Cancelled`: cancelled `.to_thread` + # + trio.from_thread.run( + partial( + _pause, + debug_func=None, + repl=mdb, + **_pause_kwargs + ), + ) + task: threading.Thread = threading.current_thread() + + else: # we are presumably the `trio.run()` + main thread + task: trio.Task = current_task() + greenback.await_( + _pause( + debug_func=None, + repl=mdb, + **_pause_kwargs, + ) + ) + DebugStatus.repl_task: str = current_task() + + # TODO: ensure we aggressively make the user aware about + # entering the global ``breakpoint()`` built-in from sync + # code? + _set_trace( + api_frame=inspect.currentframe(), + repl=mdb, + hide_tb=hide_tb, + actor=actor, + task=task, ) - DebugStatus.repl_task: str = current_task() - - # TODO: ensure we aggressively make the user aware about - # entering the global ``breakpoint()`` built-in from sync - # code? - _set_trace( - api_frame=inspect.current_frame(), - actor=actor, - pdb=mdb, - hide_tb=hide_tb, - - # TODO? will we ever need it? - # -> the gb._await() won't be affected by cancellation? - # shield=shield, - ) - # LEGACY NOTE on next LOC's frame showing weirdness.. - # - # XXX NOTE XXX no other LOC can be here without it - # showing up in the REPL's last stack frame !?! - # -[ ] tried to use `@pdbp.hideframe` decoration but - # still doesn't work + # LEGACY NOTE on next LOC's frame showing weirdness.. + # + # XXX NOTE XXX no other LOC can be here without it + # showing up in the REPL's last stack frame !?! + # -[ ] tried to use `@pdbp.hideframe` decoration but + # still doesn't work + except BaseException as err: + __tracebackhide__: bool = False + raise err # NOTE prefer a new "pause" semantic since it better describes @@ -2135,6 +2199,7 @@ async def maybe_wait_for_debugger( child_in_debug: bool = False, header_msg: str = '', + _ll: str = 'devx', ) -> bool: # was locked and we polled? @@ -2144,6 +2209,7 @@ async def maybe_wait_for_debugger( ): return False + logmeth: Callable = getattr(log, _ll) msg: str = header_msg if ( @@ -2156,7 +2222,11 @@ async def maybe_wait_for_debugger( # Instead try to wait for pdb to be released before # tearing down. ctx_in_debug: Context|None = Lock.ctx_in_debug - in_debug: tuple[str, str]|None = ctx_in_debug.chan.uid if ctx_in_debug else None + in_debug: tuple[str, str]|None = ( + ctx_in_debug.chan.uid + if ctx_in_debug + else None + ) if in_debug == current_actor().uid: log.debug( msg @@ -2176,7 +2246,7 @@ async def maybe_wait_for_debugger( # XXX => but it doesn't seem to work.. # await trio.testing.wait_all_tasks_blocked(cushion=0) else: - log.debug( + logmeth( msg + 'Root immediately acquired debug TTY LOCK' @@ -2185,13 +2255,13 @@ async def maybe_wait_for_debugger( for istep in range(poll_steps): if ( - Lock.no_remote_has_tty is not None - and not Lock.no_remote_has_tty.is_set() + Lock.req_handler_finished is not None + and not Lock.req_handler_finished.is_set() and in_debug is not None ): # caller_frame_info: str = pformat_caller_frame() - log.debug( + logmeth( msg + '\nRoot is waiting on tty lock to release from\n\n' @@ -2202,7 +2272,7 @@ async def maybe_wait_for_debugger( Lock.get_locking_task_cs().cancel() with trio.CancelScope(shield=True): - await Lock.no_remote_has_tty.wait() + await Lock.req_handler_finished.wait() log.pdb( f'Subactor released debug lock\n' @@ -2214,11 +2284,11 @@ async def maybe_wait_for_debugger( if ( in_debug is None and ( - Lock.no_remote_has_tty is None - or Lock.no_remote_has_tty.is_set() + Lock.req_handler_finished is None + or Lock.req_handler_finished.is_set() ) ): - log.pdb( + logmeth( msg + 'Root acquired tty lock!' @@ -2226,13 +2296,11 @@ async def maybe_wait_for_debugger( break else: - # TODO: don't need this right? - # await trio.lowlevel.checkpoint() - - log.debug( + logmeth( 'Root polling for debug:\n' f'poll step: {istep}\n' - f'poll delya: {poll_delay}' + f'poll delya: {poll_delay}\n\n' + f'{Lock.repr()}\n' ) with CancelScope(shield=True): await trio.sleep(poll_delay) -- 2.34.1 From 904c6895f7da96b5c276731ff882dccfb7b28b7d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 10:22:51 -0400 Subject: [PATCH 113/305] Better context aware `RemoteActorError.pformat()` Such that when displaying with `.__str__()` we do not show the type header (style) since normally python's raising machinery already prints the type path like `'tractor._exceptions.RemoteActorError:'`, so doing it 2x is a bit ugly ;p In support, - include `.relay_uid` in `RemoteActorError.extra_body_fields`. - offer a `with_type_header: bool` to `.pformat()` and only put the opening type path and closing `')>'` tail line when `True`. - add `.is_inception() -> bool:` for an easy way to determine if the error is multi-hop relayed. - only repr the `'|_relay_uid='` field when an error is an inception. - tweak the invalid-payload case in `_mk_msg_type_err()` to explicitly state in the `message` how the `any_pld` value does not match the `MsgDec.pld_spec` by decoding the invalid `.pld` with an any-dec. - allow `_mk_msg_type_err(**mte_kwargs)` passthrough. - pass `boxed_type=cls` inside `MsgTypeError.from_decode()`. --- tractor/_exceptions.py | 101 +++++++++++++++++++++++++++++++++++------ 1 file changed, 87 insertions(+), 14 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 83675069..179b49a1 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -187,6 +187,9 @@ class RemoteActorError(Exception): ] extra_body_fields: list[str] = [ 'cid', + # NOTE: we only show this on relayed errors (aka + # "inceptions"). + 'relay_uid', 'boxed_type', ] @@ -273,7 +276,7 @@ class RemoteActorError(Exception): @property def ipc_msg(self) -> Struct: ''' - Re-render the underlying `._ipc_msg: Msg` as + Re-render the underlying `._ipc_msg: MsgType` as a `pretty_struct.Struct` for introspection such that the returned value is a read-only copy of the original. @@ -344,7 +347,7 @@ class RemoteActorError(Exception): return str(bt.__name__) @property - def boxed_type(self) -> str: + def boxed_type(self) -> Type[BaseException]: ''' Error type boxed by last actor IPC hop. @@ -409,7 +412,14 @@ class RemoteActorError(Exception): end_char: str = '\n', ) -> str: _repr: str = '' + for key in fields: + if ( + key == 'relay_uid' + and not self.is_inception() + ): + continue + val: Any|None = ( getattr(self, key, None) or @@ -427,6 +437,7 @@ class RemoteActorError(Exception): if val: _repr += f'{key}={val_str}{end_char}' + return _repr def reprol(self) -> str: @@ -455,15 +466,45 @@ class RemoteActorError(Exception): _repr ) - def pformat(self) -> str: + def is_inception(self) -> bool: + ''' + Predicate which determines if the shuttled error type + is the same as the container error type; IOW is this + an "error within and error" which points to some original + source error that was relayed through multiple + actor hops. + + Ex. a relayed remote error will generally be some form of + `RemoteActorError[RemoteActorError]` with a `.src_type` which + is not of that same type. + + ''' + # if a single hop boxed error it was not relayed + # more then one hop directly from the src actor. + if ( + self.boxed_type + is + self.src_type + ): + return False + + return True + + def pformat( + self, + with_type_header: bool = True, + ) -> str: ''' Nicely formatted boxed error meta data + traceback, OR just the normal message from `.args` (for eg. as you'd want shown by a locally raised `ContextCancelled`). ''' - tb_str: str = self.tb_str - if tb_str: + header: str = '' + if with_type_header: + header: str = f'<{type(self).__name__}(\n' + + if tb_str := self.tb_str: fields: str = self._mk_fields_str( _body_fields + @@ -481,19 +522,35 @@ class RemoteActorError(Exception): # |___ .. tb_body_indent=1, ) + if not with_type_header: + body = '\n' + body else: body: str = textwrap.indent( self._message, prefix=' ', ) + '\n' + + if with_type_header: + tail: str = ')>' + else: + tail = '' + return ( - f'<{type(self).__name__}(\n' + header + + f'{body}' - ')>' + + + tail ) __repr__ = pformat - __str__ = pformat + + # NOTE: apparently we need this so that + # the full fields show in debugger tests? + # |_ i guess `pexepect` relies on `str`-casing + # of output? + def __str__(self) -> str: + return self.pformat(with_type_header=False) def unwrap( self, @@ -682,6 +739,7 @@ class MsgTypeError( ) -> MsgTypeError: return cls( message=message, + boxed_type=cls, # NOTE: original "vanilla decode" of the msg-bytes # is placed inside a value readable from @@ -949,10 +1007,11 @@ def _raise_from_unexpected_msg( if isinstance(msg, Error): # match msg: # case Error(): - raise unpack_error( + exc: RemoteActorError = unpack_error( msg, ctx.chan, - ) from src_err + ) + raise exc from src_err # `MsgStream` termination msg. # TODO: does it make more sense to pack @@ -966,10 +1025,11 @@ def _raise_from_unexpected_msg( or isinstance(msg, Stop) ): - log.debug( + message: str = ( f'Context[{cid}] stream was stopped by remote side\n' f'cid: {cid}\n' ) + log.debug(message) # TODO: if the a local task is already blocking on # a `Context.result()` and thus a `.receive()` on the @@ -983,6 +1043,8 @@ def _raise_from_unexpected_msg( f'Context stream ended due to msg:\n\n' f'{pformat(msg)}\n' ) + eoc.add_note(message) + # XXX: important to set so that a new `.receive()` # call (likely by another task using a broadcast receiver) # doesn't accidentally pull the `return` message @@ -1007,6 +1069,7 @@ def _raise_from_unexpected_msg( ' BUT received a non-error msg:\n\n' f'{struct_format(msg)}' ) from src_err + # ^-TODO-^ maybe `MsgDialogError` is better? _raise_from_no_key_in_msg = _raise_from_unexpected_msg @@ -1023,6 +1086,8 @@ def _mk_msg_type_err( src_type_error: TypeError|None = None, is_invalid_payload: bool = False, + **mte_kwargs, + ) -> MsgTypeError: ''' Compose a `MsgTypeError` from an input runtime context. @@ -1081,12 +1146,20 @@ def _mk_msg_type_err( else: if is_invalid_payload: msg_type: str = type(msg) + any_pld: Any = msgpack.decode(msg.pld) message: str = ( f'invalid `{msg_type.__qualname__}` payload\n\n' - f'<{type(msg).__qualname__}(\n' - f' |_pld: {codec.pld_spec_str} = {msg.pld!r}' - f')>\n' + f'value: `{any_pld!r}` does not match type-spec: ' #\n' + f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' + # f'<{type(msg).__qualname__}(\n' + # f' |_pld: {codec.pld_spec_str}\n'# != {any_pld!r}\n' + # f')>\n\n' ) + # TODO: should we just decode the msg to a dict despite + # only the payload being wrong? + # -[ ] maybe the better design is to break this construct + # logic into a separate explicit helper raiser-func? + msg_dict: dict = {} else: # decode the msg-bytes using the std msgpack -- 2.34.1 From d530002d664d5622aa95f552f7ecce7e784eb4a0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 14:56:54 -0400 Subject: [PATCH 114/305] Move runtime frame hiding into helper func Call it `hide_runtime_frames()` and stick all the lines from the top of the `._debug` mod in there along with a little `log.devx()` emission on what gets hidden by default ;) Other, - fix ref-error where internal-error handler might trigger despite the debug `req_ctx` not yet having init-ed, such that we don't try to cancel or log about it when it never was fully created/initialize.. - fix assignment typo iniside `_set_trace()` for `task`.. lel --- tractor/devx/_debug.py | 131 +++++++++++++++++++++++++++-------------- 1 file changed, 86 insertions(+), 45 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 877d2de6..753c1985 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -48,9 +48,11 @@ from typing import ( TYPE_CHECKING, ) from types import ( + FunctionType, FrameType, ModuleType, TracebackType, + CodeType, ) from msgspec import Struct @@ -90,43 +92,72 @@ if TYPE_CHECKING: log = get_logger(__name__) -# XXX HACKZONE XXX -# hide exit stack frames on nurseries and cancel-scopes! -# |_ so avoid seeing it when the `pdbp` REPL is first engaged from -# inside a `trio.open_nursery()` scope (with no line after it -# in before the block end??). -# -# TODO: FINALLY got this workin originally with -# `@pdbp.hideframe` around the `wrapper()` def embedded inside -# `_ki_protection_decoratior()`.. which is in the module: -# /home/goodboy/.virtualenvs/tractor311/lib/python3.11/site-packages/trio/_core/_ki.py -# -# -[ ] make an issue and patch for `trio` core? maybe linked -# to the long outstanding `pdb` one below? -# |_ it's funny that there's frame hiding throughout `._run.py` -# but not where it matters on the below exit funcs.. -# -# -[ ] provide a patchset for the lonstanding -# |_ https://github.com/python-trio/trio/issues/1155 -# -# -[ ] make a linked issue to ^ and propose allowing all the -# `._core._run` code to have their `__tracebackhide__` value -# configurable by a `RunVar` to allow getting scheduler frames -# if desired through configuration? -# -# -[ ] maybe dig into the core `pdb` issue why the extra frame is shown -# at all? -# -pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) -pdbp.hideframe(trio._core._run.CancelScope.__exit__) -pdbp.hideframe(_GeneratorContextManager.__exit__) -pdbp.hideframe(_AsyncGeneratorContextManager.__aexit__) -pdbp.hideframe(trio.Event.wait) -__all__ = [ - 'breakpoint', - 'post_mortem', -] +def hide_runtime_frames() -> dict[FunctionType, CodeType]: + ''' + Hide call-stack frames for various std-lib and `trio`-API primitives + such that the tracebacks presented from our runtime are as minimized + as possible, particularly from inside a `PdbREPL`. + + ''' + # XXX HACKZONE XXX + # hide exit stack frames on nurseries and cancel-scopes! + # |_ so avoid seeing it when the `pdbp` REPL is first engaged from + # inside a `trio.open_nursery()` scope (with no line after it + # in before the block end??). + # + # TODO: FINALLY got this workin originally with + # `@pdbp.hideframe` around the `wrapper()` def embedded inside + # `_ki_protection_decoratior()`.. which is in the module: + # /home/goodboy/.virtualenvs/tractor311/lib/python3.11/site-packages/trio/_core/_ki.py + # + # -[ ] make an issue and patch for `trio` core? maybe linked + # to the long outstanding `pdb` one below? + # |_ it's funny that there's frame hiding throughout `._run.py` + # but not where it matters on the below exit funcs.. + # + # -[ ] provide a patchset for the lonstanding + # |_ https://github.com/python-trio/trio/issues/1155 + # + # -[ ] make a linked issue to ^ and propose allowing all the + # `._core._run` code to have their `__tracebackhide__` value + # configurable by a `RunVar` to allow getting scheduler frames + # if desired through configuration? + # + # -[ ] maybe dig into the core `pdb` issue why the extra frame is shown + # at all? + # + funcs: list[FunctionType] = [ + trio._core._run.NurseryManager.__aexit__, + trio._core._run.CancelScope.__exit__, + _GeneratorContextManager.__exit__, + _AsyncGeneratorContextManager.__aexit__, + _AsyncGeneratorContextManager.__aenter__, + trio.Event.wait, + ] + func_list_str: str = textwrap.indent( + "\n".join(f.__qualname__ for f in funcs), + prefix=' |_ ', + ) + log.devx( + 'Hiding the following runtime frames by default:\n' + f'{func_list_str}\n' + ) + + codes: dict[FunctionType, CodeType] = {} + for ref in funcs: + # stash a pre-modified version of each ref's code-obj + # so it can be reverted later if needed. + codes[ref] = ref.__code__ + pdbp.hideframe(ref) + # + # pdbp.hideframe(trio._core._run.NurseryManager.__aexit__) + # pdbp.hideframe(trio._core._run.CancelScope.__exit__) + # pdbp.hideframe(_GeneratorContextManager.__exit__) + # pdbp.hideframe(_AsyncGeneratorContextManager.__aexit__) + # pdbp.hideframe(_AsyncGeneratorContextManager.__aenter__) + # pdbp.hideframe(trio.Event.wait) + return codes class LockStatus( @@ -1032,15 +1063,24 @@ async def request_root_stdio_lock( except ( BaseException, - ): - log.exception( - 'Failed during root TTY-lock dialog?\n' - f'{req_ctx}\n' - - f'Cancelling IPC ctx!\n' + ) as ctx_err: + message: str = ( + 'Failed during debug request dialog with root actor?\n\n' ) - await req_ctx.cancel() - raise + + if req_ctx: + message += ( + f'{req_ctx}\n' + f'Cancelling IPC ctx!\n' + ) + await req_ctx.cancel() + + else: + message += 'Failed during `Portal.open_context()` ?\n' + + log.exception(message) + ctx_err.add_note(message) + raise ctx_err except ( @@ -1067,6 +1107,7 @@ async def request_root_stdio_lock( # ctl-c out of the currently hanging task! raise DebugRequestError( 'Failed to lock stdio from subactor IPC ctx!\n\n' + f'req_ctx: {DebugStatus.req_ctx}\n' ) from req_err @@ -1777,7 +1818,7 @@ def _set_trace( ): __tracebackhide__: bool = hide_tb actor: tractor.Actor = actor or current_actor() - task: task or current_task() + task: trio.Task = task or current_task() # else: # TODO: maybe print the actor supervion tree up to the -- 2.34.1 From 6c992a2feac662ddb499a229a888a59b765f8ca9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:01:31 -0400 Subject: [PATCH 115/305] Update debugger tests to expect new pformatting Mostly the result of the `RemoteActorError.pformat()` and our new `_pause/crash_msg: str`s which include the `trio.Task.__repr__()` in the `log.pdb()` message. Obvi use the `in_prompt_msg()` to accomplish where not used prior. ToDo later: -[ ] still some outstanding questions on how detailed inceptions should look, eg. in `test_multi_nested_subactors_error_through_nurseries()` |_maybe we should be more pedantic at checking `.src_uid` vs. `.relay_uid` fields? -[ ] staged a placeholder test for verifying correct call-stack frame on crash handler REPL entry. -[ ] also need a test to verify that you can't pause from an already paused actor task such as can happen if you try to step through runtime code that has a recurrent entry to `._debug.pause()`. --- tests/test_debugger.py | 108 +++++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 30 deletions(-) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 0de2020d..6aa3bd53 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -144,9 +144,10 @@ def in_prompt_msg( log/REPL output for a given `pdb` interact point. ''' + __tracebackhide__: bool = False + for part in parts: if part not in prompt: - if pause_on_false: import pdbp pdbp.set_trace() @@ -165,6 +166,7 @@ def assert_before( **kwargs, ) -> None: + __tracebackhide__: bool = False # as in before the prompt end before: str = str(child.before.decode()) @@ -217,7 +219,10 @@ def ctlc( ], ids=lambda item: f'{item[0]} -> {item[1]}', ) -def test_root_actor_error(spawn, user_in_out): +def test_root_actor_error( + spawn, + user_in_out, +): ''' Demonstrate crash handler entering pdb from basic error in root actor. @@ -463,8 +468,12 @@ def test_subactor_breakpoint( child.expect(PROMPT) before = str(child.before.decode()) - assert "RemoteActorError: ('breakpoint_forever'" in before - assert 'bdb.BdbQuit' in before + assert in_prompt_msg( + before, + ['RemoteActorError:', + "('breakpoint_forever'", + 'bdb.BdbQuit',] + ) if ctlc: do_ctlc(child) @@ -476,8 +485,12 @@ def test_subactor_breakpoint( child.expect(pexpect.EOF) before = str(child.before.decode()) - assert "RemoteActorError: ('breakpoint_forever'" in before - assert 'bdb.BdbQuit' in before + assert in_prompt_msg( + before, + ['RemoteActorError:', + "('breakpoint_forever'", + 'bdb.BdbQuit',] + ) @has_nested_actors @@ -745,8 +758,9 @@ def test_multi_daemon_subactors( # boxed error raised in root task # "Attaching to pdb in crashed actor: ('root'", _crash_msg, - "('root'", - "_exceptions.RemoteActorError: ('name_error'", + "('root'", # should attach in root + "_exceptions.RemoteActorError:", # with an embedded RAE for.. + "('name_error'", # the src subactor which raised ] ) @@ -847,10 +861,11 @@ def test_multi_nested_subactors_error_through_nurseries( # https://github.com/goodboy/tractor/issues/320 # ctlc: bool, ): - """Verify deeply nested actors that error trigger debugger entries + ''' + Verify deeply nested actors that error trigger debugger entries at each actor nurserly (level) all the way up the tree. - """ + ''' # NOTE: previously, inside this script was a bug where if the # parent errors before a 2-levels-lower actor has released the lock, # the parent tries to cancel it but it's stuck in the debugger? @@ -870,22 +885,31 @@ def test_multi_nested_subactors_error_through_nurseries( except EOF: break - assert_before(child, [ + assert_before( + child, + [ # boxed source errors + "NameError: name 'doggypants' is not defined", + "tractor._exceptions.RemoteActorError:", + "('name_error'", + "bdb.BdbQuit", - # boxed source errors - "NameError: name 'doggypants' is not defined", - "tractor._exceptions.RemoteActorError: ('name_error'", - "bdb.BdbQuit", + # first level subtrees + # "tractor._exceptions.RemoteActorError: ('spawner0'", + "src_uid=('spawner0'", - # first level subtrees - "tractor._exceptions.RemoteActorError: ('spawner0'", - # "tractor._exceptions.RemoteActorError: ('spawner1'", + # "tractor._exceptions.RemoteActorError: ('spawner1'", - # propagation of errors up through nested subtrees - "tractor._exceptions.RemoteActorError: ('spawn_until_0'", - "tractor._exceptions.RemoteActorError: ('spawn_until_1'", - "tractor._exceptions.RemoteActorError: ('spawn_until_2'", - ]) + # propagation of errors up through nested subtrees + # "tractor._exceptions.RemoteActorError: ('spawn_until_0'", + # "tractor._exceptions.RemoteActorError: ('spawn_until_1'", + # "tractor._exceptions.RemoteActorError: ('spawn_until_2'", + # ^-NOTE-^ old RAE repr, new one is below with a field + # showing the src actor's uid. + "src_uid=('spawn_until_0'", + "relay_uid=('spawn_until_1'", + "src_uid=('spawn_until_2'", + ] + ) @pytest.mark.timeout(15) @@ -1019,13 +1043,16 @@ def test_different_debug_mode_per_actor( # msg reported back from the debug mode actor is processed. # assert "tractor._exceptions.RemoteActorError: ('debugged_boi'" in before - assert "tractor._exceptions.RemoteActorError: ('crash_boi'" in before - # the crash boi should not have made a debugger request but # instead crashed completely - assert "tractor._exceptions.RemoteActorError: ('crash_boi'" in before - assert "RuntimeError" in before - + assert_before( + child, + [ + "tractor._exceptions.RemoteActorError:", + "src_uid=('crash_boi'", + "RuntimeError", + ] + ) def test_pause_from_sync( @@ -1044,13 +1071,15 @@ def test_pause_from_sync( assert_before( child, [ - '`greenback` portal opened!', # pre-prompt line - _pause_msg, "('root'", + _pause_msg, + " Date: Wed, 22 May 2024 15:10:39 -0400 Subject: [PATCH 116/305] Port `Actor._stream_handler()` to use `.has_outcome`, fix indent bug.. --- tractor/_runtime.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 1d931cd7..f267ff67 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -694,21 +694,21 @@ class Actor: proc: trio.Process _, proc, _ = entry - if ( - (poll := getattr(proc, 'poll', None)) - and poll() is None - ): - log.cancel( - 'Root actor reports no-more-peers, BUT\n' - 'a DISCONNECTED child still has the debug ' - 'lock!\n\n' - # f'root uid: {self.uid}\n' - f'last disconnected child uid: {uid}\n' - f'locking child uid: {pdb_user_uid}\n' - ) - await _debug.maybe_wait_for_debugger( - child_in_debug=True - ) + if ( + (poll := getattr(proc, 'poll', None)) + and poll() is None + ): + log.cancel( + 'Root actor reports no-more-peers, BUT\n' + 'a DISCONNECTED child still has the debug ' + 'lock!\n\n' + # f'root uid: {self.uid}\n' + f'last disconnected child uid: {uid}\n' + f'locking child uid: {pdb_user_uid}\n' + ) + await _debug.maybe_wait_for_debugger( + child_in_debug=True + ) # TODO: just bc a child's transport dropped # doesn't mean it's not still using the pdb @@ -1142,7 +1142,6 @@ class Actor: requester_type, req_chan, log_meth, - ) = ( req_chan.uid, 'peer', @@ -1175,7 +1174,11 @@ class Actor: # with the root actor in this tree debug_req = _debug.DebugStatus lock_req_ctx: Context = debug_req.req_ctx - if lock_req_ctx is not None: + if ( + lock_req_ctx + and + lock_req_ctx.has_outcome + ): msg += ( '-> Cancelling active debugger request..\n' f'|_{_debug.Lock.repr()}\n\n' -- 2.34.1 From eb88511a8ccdd8c36edf366374cdaaf70bd9ae4c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:11:21 -0400 Subject: [PATCH 117/305] Call `.devx._debug.hide_runtime_frames()` by default From both `open_root_actor()` and `._entry._trio_main()`. Other `breakpoint()`-from-sync-func fixes: - properly disable the default hook using `"0"` XD - offer a `hide_tb: bool` from `open_root_actor()`. - disable hiding the `._trio_main()` frame, bc pretty sure it doesn't help anyone (either way) when REPL-ing/tb-ing from a subactor..? --- tractor/_entry.py | 7 ++++--- tractor/_root.py | 13 ++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index 750dc59f..e22a4f1f 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -33,6 +33,7 @@ from .log import ( get_logger, ) from . import _state +from .devx import _debug from .to_asyncio import run_as_asyncio_guest from ._runtime import ( async_main, @@ -96,7 +97,6 @@ def _mp_main( def _trio_main( - actor: Actor, *, parent_addr: tuple[str, int] | None = None, @@ -107,7 +107,9 @@ def _trio_main( Entry point for a `trio_run_in_process` subactor. ''' - __tracebackhide__: bool = True + # __tracebackhide__: bool = True + _debug.hide_runtime_frames() + _state._current_actor = actor trio_main = partial( async_main, @@ -146,7 +148,6 @@ def _trio_main( + actor_info ) - finally: log.info( 'Subactor terminated\n' diff --git a/tractor/_root.py b/tractor/_root.py index 4c0bb4f6..4ddfde51 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -92,12 +92,16 @@ async def open_root_actor( # and that this call creates it. ensure_registry: bool = False, + hide_tb: bool = True, + ) -> Actor: ''' Runtime init entry point for ``tractor``. ''' - __tracebackhide__ = True + __tracebackhide__: bool = hide_tb + _debug.hide_runtime_frames() + # TODO: stick this in a `@cm` defined in `devx._debug`? # # Override the global debugger hook to make it play nice with @@ -126,7 +130,7 @@ async def open_root_actor( # usage by a clobbered TTY's stdstreams! def block_bps(*args, **kwargs): raise RuntimeError( - 'Trying to use `breakpoint()` eh?\n' + 'Trying to use `breakpoint()` eh?\n\n' 'Welp, `tractor` blocks `breakpoint()` built-in calls by default!\n' 'If you need to use it please install `greenback` and set ' '`debug_mode=True` when opening the runtime ' @@ -134,7 +138,9 @@ async def open_root_actor( ) sys.breakpointhook = block_bps - # os.environ['PYTHONBREAKPOINT'] = None + # lol ok, + # https://docs.python.org/3/library/sys.html#sys.breakpointhook + os.environ['PYTHONBREAKPOINT'] = "0" # attempt to retreive ``trio``'s sigint handler and stash it # on our debugger lock state. @@ -204,6 +210,7 @@ async def open_root_actor( ): loglevel = 'PDB' + elif debug_mode: raise RuntimeError( "Debug mode is only supported for the `trio` backend!" -- 2.34.1 From 18de9c1693ca1346f7f2d3bf5776b012bb40c405 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:18:45 -0400 Subject: [PATCH 118/305] Mk `MsgDec.spec_str` have a more compact ` --- tractor/msg/_codec.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 6ba23b78..cd86552f 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -162,7 +162,10 @@ class MsgDec(Struct): # TODO: would get moved into `FieldSpec.__str__()` right? @property def spec_str(self) -> str: - return pformat_msgspec(codec=self) + return pformat_msgspec( + codec=self, + join_char='|', + ) pld_spec_str = spec_str @@ -211,7 +214,7 @@ def mk_msgspec_table( msgtypes = [msgspec] msgt_table: dict[str, MsgType] = { - msgt: str(msgt) + msgt: str(msgt.__name__) for msgt in msgtypes } if msg: -- 2.34.1 From 5b14baaf582b4387309c6451edadb28e5fa91302 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:21:01 -0400 Subject: [PATCH 119/305] Add debug check-n-wait inside `._spawn.soft_kill()` And IFF the `await wait_func(proc)` is cancelled such that we avoid clobbering some subactor that might be REPL-ing even though its parent actor is in the midst of (gracefully) cancelling it. --- tractor/_spawn.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 09d9aff8..481e2981 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -43,6 +43,7 @@ from tractor._state import ( is_main_process, is_root_process, debug_mode, + _runtime_vars, ) from tractor.log import get_logger from tractor._portal import Portal @@ -303,7 +304,6 @@ async def hard_kill( async def soft_kill( - proc: ProcessType, wait_func: Callable[ [ProcessType], @@ -333,6 +333,18 @@ async def soft_kill( await wait_func(proc) except trio.Cancelled: + with trio.CancelScope(shield=True): + await maybe_wait_for_debugger( + child_in_debug=_runtime_vars.get( + '_debug_mode', False + ), + header_msg=( + 'Delaying `soft_kill()` subproc reaper while debugger locked..\n' + ), + # TODO: need a diff value then default? + # poll_steps=9999999, + ) + # if cancelled during a soft wait, cancel the child # actor before entering the hard reap sequence # below. This means we try to do a graceful teardown -- 2.34.1 From 4520183cdc93f46efebb98a051944266f6718ab1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 22 May 2024 15:26:48 -0400 Subject: [PATCH 120/305] Even smarter `RemoteActorError.pformat()`-ing Related to the prior patch, re the new `with_type_header: bool`: - in the `with_type_header == True` use case make sure we keep the first `._message: str` line non-indented since it'll show just after the header-line's type path with ':'. - when `False` drop the `)>` `repr()`-instance style as well so that we just get the ascii boxed traceback as though it's the error message-`str` not the `repr()` of the error obj. Other, - hide `pack_from_raise()` call frame since it'll show in debug mode crash handling.. - mk `MsgTypeError.from_decode()` explicitly accept and proxy an optional `ipc_msg` and change `msgdict` to also be optional, only reading out the `**extra_msgdata` when provided. - expose a `_mk_msg_type_err(src_err_msg: Error|None = None,)` for callers who which to inject a `._ipc_msg: Msgtype` to the MTE. |_ add a note how we can't use it due to a causality-dilemma when pld validating `Started` on the send side.. --- tractor/_exceptions.py | 84 +++++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 179b49a1..9a94bbdb 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -35,7 +35,6 @@ import trio from msgspec import ( defstruct, msgpack, - Raw, structs, ValidationError, ) @@ -44,11 +43,12 @@ from tractor._state import current_actor from tractor.log import get_logger from tractor.msg import ( Error, + PayloadMsg, MsgType, - Stop, - types as msgtypes, MsgCodec, MsgDec, + Stop, + types as msgtypes, ) from tractor.msg.pretty_struct import ( iter_fields, @@ -156,6 +156,7 @@ def pack_from_raise( `Error`-msg using `pack_error()` to extract the tb info. ''' + __tracebackhide__: bool = True try: raise local_err except type(local_err) as local_err: @@ -525,10 +526,26 @@ class RemoteActorError(Exception): if not with_type_header: body = '\n' + body else: - body: str = textwrap.indent( - self._message, - prefix=' ', - ) + '\n' + first: str = '' + message: str = self._message + + # split off the first line so it isn't indented + # the same like the "boxed content". + if not with_type_header: + lines: list[str] = message.splitlines() + first = lines[0] + message = ''.join(lines[1:]) + + body: str = ( + first + + + textwrap.indent( + message, + prefix=' ', + ) + + + '\n' + ) if with_type_header: tail: str = ')>' @@ -734,25 +751,38 @@ class MsgTypeError( def from_decode( cls, message: str, - msgdict: dict, + + ipc_msg: PayloadMsg|None = None, + msgdict: dict|None = None, ) -> MsgTypeError: - return cls( - message=message, - boxed_type=cls, + ''' + Constuctor for easy creation from (presumably) catching + the backend interchange lib's underlying validation error + and passing context-specific meta-data to `_mk_msg_type_err()` + (which is normally the caller of this). - # NOTE: original "vanilla decode" of the msg-bytes - # is placed inside a value readable from - # `.msgdata['_msg_dict']` - _msg_dict=msgdict, - - # expand and pack all RAE compat fields - # into the `._extra_msgdata` aux `dict`. - **{ + ''' + # if provided, expand and pack all RAE compat fields into the + # `._extra_msgdata` auxillary data `dict` internal to + # `RemoteActorError`. + extra_msgdata: dict = {} + if msgdict: + extra_msgdata: dict = { k: v for k, v in msgdict.items() if k in _ipcmsg_keys - }, + } + # NOTE: original "vanilla decode" of the msg-bytes + # is placed inside a value readable from + # `.msgdata['_msg_dict']` + extra_msgdata['_msg_dict'] = msgdict + + return cls( + message=message, + boxed_type=cls, + ipc_msg=ipc_msg, + **extra_msgdata, ) @@ -1076,7 +1106,7 @@ _raise_from_no_key_in_msg = _raise_from_unexpected_msg def _mk_msg_type_err( - msg: Any|bytes|Raw, + msg: Any|bytes|MsgType, codec: MsgCodec|MsgDec, message: str|None = None, @@ -1085,6 +1115,7 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, + src_err_msg: Error|None = None, **mte_kwargs, @@ -1159,9 +1190,10 @@ def _mk_msg_type_err( # only the payload being wrong? # -[ ] maybe the better design is to break this construct # logic into a separate explicit helper raiser-func? - msg_dict: dict = {} + msg_dict = None else: + msg: bytes # decode the msg-bytes using the std msgpack # interchange-prot (i.e. without any # `msgspec.Struct` handling) so that we can @@ -1206,6 +1238,14 @@ def _mk_msg_type_err( msgtyperr = MsgTypeError.from_decode( message=message, msgdict=msg_dict, + + # NOTE: for the send-side `.started()` pld-validate + # case we actually set the `._ipc_msg` AFTER we return + # from here inside `Context.started()` since we actually + # want to emulate the `Error` from the mte we build here + # Bo + # so by default in that case this is set to `None` + ipc_msg=src_err_msg, ) msgtyperr.__cause__ = src_validation_error return msgtyperr -- 2.34.1 From 71518ea94a176851fbb4a18747a619c9f429eac1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 27 May 2024 13:52:35 -0400 Subject: [PATCH 121/305] Add basic payload-spec test suite Starts with some very basic cases: - verify both subactor-as-child-ctx-task send side validation (failures) as well as relay and raise on root-parent-side-task. - wrap failure expectation cases that bubble out of `@acm`s with a `maybe_expect_raises()` equiv wrapper with an embedded timeout. - add `Return` cases including invalid by `str` and valid by a `None`. Still ToDo: - commit impl changes to make the bulk of this suite pass. - adjust how `MsgTypeError`s format the local (`.started()`) send side `.tb_str` such that we don't do a "boxed" error prior to `pack_error()` being called normally prior to `Error` transit. --- tests/test_pldrx_limiting.py | 316 +++++++++++++++++++++++++++++++++++ 1 file changed, 316 insertions(+) create mode 100644 tests/test_pldrx_limiting.py diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py new file mode 100644 index 00000000..d658fb51 --- /dev/null +++ b/tests/test_pldrx_limiting.py @@ -0,0 +1,316 @@ +''' +Audit sub-sys APIs from `.msg._ops` +mostly for ensuring correct `contextvars` +related settings around IPC contexts. + +''' +from contextlib import ( + asynccontextmanager as acm, + contextmanager as cm, +) +# import typing +from typing import ( + # Any, + TypeAlias, + # Union, +) +from contextvars import ( + Context, +) + +from msgspec import ( + # structs, + # msgpack, + Struct, + # ValidationError, +) +import pytest +import trio + +import tractor +from tractor import ( + # _state, + MsgTypeError, + current_ipc_ctx, + Portal, +) +from tractor.msg import ( + _ops as msgops, + Return, +) +from tractor.msg import ( + _codec, + # _ctxvar_MsgCodec, + + # NamespacePath, + # MsgCodec, + # mk_codec, + # apply_codec, + # current_codec, +) +from tractor.msg.types import ( + log, + # _payload_msgs, + # PayloadMsg, + # Started, + # mk_msg_spec, +) + + +class PldMsg(Struct): + field: str + + +maybe_msg_spec = PldMsg|None + + +@cm +def custom_spec( + ctx: Context, + spec: TypeAlias, +) -> _codec.MsgCodec: + ''' + Apply a custom payload spec, remove on exit. + + ''' + rx: msgops.PldRx = ctx._pld_rx + + +@acm +async def maybe_expect_raises( + raises: BaseException|None = None, + ensure_in_message: list[str]|None = None, + + reraise: bool = False, + timeout: int = 3, +) -> None: + ''' + Async wrapper for ensuring errors propagate from the inner scope. + + ''' + with trio.fail_after(timeout): + try: + yield + except BaseException as _inner_err: + inner_err = _inner_err + # wasn't-expected to error.. + if raises is None: + raise + + else: + assert type(inner_err) is raises + + # maybe check for error txt content + if ensure_in_message: + part: str + for part in ensure_in_message: + for i, arg in enumerate(inner_err.args): + if part in arg: + break + # if part never matches an arg, then we're + # missing a match. + else: + raise ValueError( + 'Failed to find error message content?\n\n' + f'expected: {ensure_in_message!r}\n' + f'part: {part!r}\n\n' + f'{inner_err.args}' + ) + + if reraise: + raise inner_err + + else: + if raises: + raise RuntimeError( + f'Expected a {raises.__name__!r} to be raised?' + ) + + +@tractor.context +async def child( + ctx: Context, + started_value: int|PldMsg|None, + return_value: str|None, + validate_pld_spec: bool, + raise_on_started_mte: bool = True, + +) -> None: + ''' + Call ``Context.started()`` more then once (an error). + + ''' + expect_started_mte: bool = started_value == 10 + + # sanaity check that child RPC context is the current one + curr_ctx: Context = current_ipc_ctx() + assert ctx is curr_ctx + + rx: msgops.PldRx = ctx._pld_rx + orig_pldec: _codec.MsgDec = rx.pld_dec + # senity that default pld-spec should be set + assert ( + rx.pld_dec + is + msgops._def_any_pldec + ) + + try: + with msgops.limit_plds( + spec=maybe_msg_spec, + ) as pldec: + # sanity on `MsgDec` state + assert rx.pld_dec is pldec + assert pldec.spec is maybe_msg_spec + + # 2 cases: hdndle send-side and recv-only validation + # - when `raise_on_started_mte == True`, send validate + # - else, parent-recv-side only validation + try: + await ctx.started( + value=started_value, + validate_pld_spec=validate_pld_spec, + ) + + except MsgTypeError: + log.exception('started()` raised an MTE!\n') + if not expect_started_mte: + raise RuntimeError( + 'Child-ctx-task SHOULD NOT HAVE raised an MTE for\n\n' + f'{started_value!r}\n' + ) + + # propagate to parent? + if raise_on_started_mte: + raise + else: + if expect_started_mte: + raise RuntimeError( + 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' + f'{started_value!r}\n' + ) + + # XXX should always fail on recv side since we can't + # really do much else beside terminate and relay the + # msg-type-error from this RPC task ;) + return return_value + + finally: + # sanity on `limit_plds()` reversion + assert ( + rx.pld_dec + is + msgops._def_any_pldec + ) + log.runtime( + 'Reverted to previous pld-spec\n\n' + f'{orig_pldec}\n' + ) + + +@pytest.mark.parametrize( + 'return_value', + [ + None, + 'yo', + ], + ids=[ + 'return[invalid-"yo"]', + 'return[valid-None]', + ], +) +@pytest.mark.parametrize( + 'started_value', + [ + 10, + PldMsg(field='yo'), + ], + ids=[ + 'Started[invalid-10]', + 'Started[valid-PldMsg]', + ], +) +@pytest.mark.parametrize( + 'pld_check_started_value', + [ + True, + False, + ], + ids=[ + 'check-started-pld', + 'no-started-pld-validate', + ], +) +def test_basic_payload_spec( + debug_mode: bool, + loglevel: str, + return_value: str|None, + started_value: int|PldMsg, + pld_check_started_value: bool, +): + ''' + Validate the most basic `PldRx` msg-type-spec semantics around + a IPC `Context` endpoint start, started-sync, and final return + value depending on set payload types and the currently applied + pld-spec. + + ''' + invalid_return: bool = return_value == 'yo' + invalid_started: bool = started_value == 10 + + async def main(): + async with tractor.open_nursery( + debug_mode=debug_mode, + loglevel=loglevel, + ) as an: + p: Portal = await an.start_actor( + 'child', + enable_modules=[__name__], + ) + + # since not opened yet. + assert current_ipc_ctx() is None + + async with ( + maybe_expect_raises( + raises=MsgTypeError if ( + invalid_return + or + invalid_started + ) else None, + ensure_in_message=[ + "invalid `Return` payload", + "value: `'yo'` does not match type-spec: `Return.pld: PldMsg|NoneType`", + ], + ), + p.open_context( + child, + return_value=return_value, + started_value=started_value, + pld_spec=maybe_msg_spec, + validate_pld_spec=pld_check_started_value, + ) as (ctx, first), + ): + # now opened with 'child' sub + assert current_ipc_ctx() is ctx + + assert type(first) is PldMsg + assert first.field == 'yo' + + try: + assert (await ctx.result()) is None + except MsgTypeError as mte: + if not invalid_return: + raise + + else: # expected this invalid `Return.pld` + assert mte.cid == ctx.cid + + # verify expected remote mte deats + await tractor.pause() + assert ctx._remote_error is mte + assert mte.expected_msg_type is Return + + await p.cancel_actor() + + trio.run(main) -- 2.34.1 From 6819ec01d0ac164b0ab71a668a0beb08309847be Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 27 May 2024 14:59:40 -0400 Subject: [PATCH 122/305] More correct/explicit `.started()` send-side validation In the sense that we handle it as a special case that exposed through to `RxPld.dec_msg()` with a new `is_started_send_side: bool`. (Non-ideal) `Context.started()` impl deats: - only do send-side pld-spec validation when a new `validate_pld_spec` is set (by default it's not). - call `self.pld_rx.dec_msg(is_started_send_side=True)` to validate the payload field from the just codec-ed `Started` msg's `msg_bytes` by passing the `roundtripped` msg (with it's `.pld: Raw`) directly. - add a `hide_tb: bool` param and proxy it to the `.dec_msg()` call. (Non-ideal) `PldRx.dec_msg()` impl deats: - for now we're packing the MTE inside an `Error` via a manual call to `pack_error()` and then setting that as the `msg` passed to `_raise_from_unexpected_msg()` (though really we should just raise inline?). - manually set the `MsgTypeError._ipc_msg` to the above.. Other, - more comprehensive `Context` type doc string. - various `hide_tb: bool` kwarg additions through `._ops.PldRx` meths. - proto a `.msg._ops.validate_payload_msg()` helper planned to get the logic from this version of `.started()`'s send-side validation so as to be useful more generally elsewhere.. (like for raising back `Return` values on the child side?). Warning: this commit may have been made out of order from required changes to `._exceptions` which will come in a follow up! --- tractor/_context.py | 128 +++++++++++++++++++++----------------------- tractor/msg/_ops.py | 70 ++++++++++++++++++++---- 2 files changed, 122 insertions(+), 76 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 152efdee..fed9f2bc 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -15,12 +15,22 @@ # along with this program. If not, see . ''' -The fundamental cross process SC abstraction: an inter-actor, -cancel-scope linked task "context". +The fundamental cross-process SC abstraction: an inter-actor, +transitively cancel-scope linked, (dual) task IPC coupled "context". -A ``Context`` is very similar to the ``trio.Nursery.cancel_scope`` built -into each ``trio.Nursery`` except it links the lifetimes of memory space -disjoint, parallel executing tasks in separate actors. +A `Context` is very similar to the look and feel of the +`.cancel_scope: trio.CancelScope` built into each `trio.Nursery` +except that it links the lifetimes of 2 memory space disjoint, +parallel executing, tasks scheduled in separate "actors". + +So while a `trio.Nursery` has a `.parent_task` which exists both +before (open) and then inside the body of the `async with` of the +nursery's scope (/block), a `Context` contains 2 tasks, a "parent" +and a "child" side, where both execute independently in separate +memory domains of different (host's) processes linked through +a SC-transitive IPC "shuttle dialog protocol". The underlying IPC +dialog-(un)protocol allows for the maintainance of SC properties +end-2-end between the tasks. ''' from __future__ import annotations @@ -71,13 +81,11 @@ from .msg import ( MsgCodec, NamespacePath, PayloadT, - Return, Started, Stop, Yield, current_codec, pretty_struct, - types as msgtypes, _ops as msgops, ) from ._ipc import ( @@ -90,7 +98,7 @@ from ._state import ( debug_mode, _ctxvar_Context, ) - +# ------ - ------ if TYPE_CHECKING: from ._portal import Portal from ._runtime import Actor @@ -1598,16 +1606,15 @@ class Context: async def started( self, - # TODO: how to type this so that it's the - # same as the payload type? Is this enough? value: PayloadT|None = None, + validate_pld_spec: bool = True, + strict_pld_parity: bool = False, - strict_parity: bool = False, + # TODO: this will always emit for msgpack for any () vs. [] + # inside the value.. do we want to offer warnings on that? + # complain_no_parity: bool = False, - # TODO: this will always emit now that we do `.pld: Raw` - # passthrough.. so maybe just only complain when above strict - # flag is set? - complain_no_parity: bool = False, + hide_tb: bool = True, ) -> None: ''' @@ -1648,63 +1655,54 @@ class Context: # # https://zguide.zeromq.org/docs/chapter7/#The-Cheap-or-Nasty-Pattern # - codec: MsgCodec = current_codec() - msg_bytes: bytes = codec.encode(started_msg) - try: - # be a "cheap" dialog (see above!) - if ( - strict_parity - or - complain_no_parity - ): - rt_started: Started = codec.decode(msg_bytes) - - # XXX something is prolly totes cucked with the - # codec state! - if isinstance(rt_started, dict): - rt_started = msgtypes.from_dict_msg( - dict_msg=rt_started, - ) - raise RuntimeError( - 'Failed to roundtrip `Started` msg?\n' - f'{pretty_struct.pformat(rt_started)}\n' - ) - - if rt_started != started_msg: + __tracebackhide__: bool = hide_tb + if validate_pld_spec: + # __tracebackhide__: bool = False + codec: MsgCodec = current_codec() + msg_bytes: bytes = codec.encode(started_msg) + try: + roundtripped: Started = codec.decode(msg_bytes) + # pld: PayloadT = await self.pld_rx.recv_pld( + pld: PayloadT = self.pld_rx.dec_msg( + msg=roundtripped, + ipc=self, + expect_msg=Started, + hide_tb=hide_tb, + is_started_send_side=True, + ) + if ( + strict_pld_parity + and + pld != value + ): # TODO: make that one a mod func too.. diff = pretty_struct.Struct.__sub__( - rt_started, + roundtripped, started_msg, ) complaint: str = ( 'Started value does not match after roundtrip?\n\n' f'{diff}' ) + raise ValidationError(complaint) - # TODO: rn this will pretty much always fail with - # any other sequence type embeded in the - # payload... - if ( - self._strict_started - or - strict_parity - ): - raise ValueError(complaint) - else: - log.warning(complaint) + # raise any msg type error NO MATTER WHAT! + except ValidationError as verr: + # always show this src frame in the tb + # __tracebackhide__: bool = False + raise _mk_msg_type_err( + msg=roundtripped, + codec=codec, + src_validation_error=verr, + verb_header='Trying to send ', + is_invalid_payload=True, + ) from verr - await self.chan.send(started_msg) - - # raise any msg type error NO MATTER WHAT! - except ValidationError as verr: - raise _mk_msg_type_err( - msg=msg_bytes, - codec=codec, - src_validation_error=verr, - verb_header='Trying to send payload' - # > 'invalid `Started IPC msgs\n' - ) from verr + # TODO: maybe a flag to by-pass encode op if already done + # here in caller? + await self.chan.send(started_msg) + # set msg-related internal runtime-state self._started_called = True self._started_msg = started_msg self._started_pld = value @@ -1997,12 +1995,7 @@ async def open_context_from_portal( pld_spec: TypeAlias|None = None, allow_overruns: bool = False, - - # TODO: if we set this the wrapping `@acm` body will - # still be shown (awkwardly) on pdb REPL entry. Ideally - # we can similarly annotate that frame to NOT show? for now - # we DO SHOW this frame since it's awkward ow.. - hide_tb: bool = False, + hide_tb: bool = True, # proxied to RPC **kwargs, @@ -2115,6 +2108,7 @@ async def open_context_from_portal( ipc=ctx, expect_msg=Started, passthrough_non_pld_msgs=False, + hide_tb=hide_tb, ) # from .devx import pause diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 3014c15b..6faf78ef 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -47,7 +47,7 @@ from tractor._exceptions import ( _raise_from_unexpected_msg, MsgTypeError, _mk_msg_type_err, - pack_from_raise, + pack_error, ) from tractor._state import current_ipc_ctx from ._codec import ( @@ -203,7 +203,6 @@ class PldRx(Struct): msg: MsgType = ( ipc_msg or - # async-rx msg from underlying IPC feeder (mem-)chan await ipc._rx_chan.receive() ) @@ -223,6 +222,10 @@ class PldRx(Struct): raise_error: bool = True, hide_tb: bool = True, + # XXX for special (default?) case of send side call with + # `Context.started(validate_pld_spec=True)` + is_started_send_side: bool = False, + ) -> PayloadT|Raw: ''' Decode a msg's payload field: `MsgType.pld: PayloadT|Raw` and @@ -230,8 +233,6 @@ class PldRx(Struct): ''' __tracebackhide__: bool = hide_tb - - _src_err = None src_err: BaseException|None = None match msg: # payload-data shuttle msg; deliver the `.pld` value @@ -256,18 +257,58 @@ class PldRx(Struct): # pack mgterr into error-msg for # reraise below; ensure remote-actor-err # info is displayed nicely? - msgterr: MsgTypeError = _mk_msg_type_err( + mte: MsgTypeError = _mk_msg_type_err( msg=msg, codec=self.pld_dec, src_validation_error=valerr, is_invalid_payload=True, + expected_msg=expect_msg, + # ipc_msg=msg, ) - msg: Error = pack_from_raise( - local_err=msgterr, + # NOTE: override the `msg` passed to + # `_raise_from_unexpected_msg()` (below) so so that + # we're effectively able to use that same func to + # unpack and raise an "emulated remote `Error`" of + # this local MTE. + err_msg: Error = pack_error( + exc=mte, cid=msg.cid, - src_uid=ipc.chan.uid, + src_uid=( + ipc.chan.uid + if not is_started_send_side + else ipc._actor.uid + ), + # tb=valerr.__traceback__, + tb_str=mte._message, ) + # ^-TODO-^ just raise this inline instead of all the + # pack-unpack-repack non-sense! + + mte._ipc_msg = err_msg + msg = err_msg + + # set emulated remote error more-or-less as the + # runtime would + ctx: Context = getattr(ipc, 'ctx', ipc) + + # TODO: should we instead make this explicit and + # use the above masked `is_started_send_decode`, + # expecting the `Context.started()` caller to set + # it? Rn this is kinda, howyousayyy, implicitly + # edge-case-y.. + if ( + expect_msg is not Started + and not is_started_send_side + ): + ctx._maybe_cancel_and_set_remote_error(mte) + + # XXX NOTE: so when the `_raise_from_unexpected_msg()` + # raises the boxed `err_msg` from above it raises + # it from `None`. src_err = valerr + # if is_started_send_side: + # src_err = None + # XXX some other decoder specific failure? # except TypeError as src_error: @@ -379,6 +420,7 @@ class PldRx(Struct): # NOTE: generally speaking only for handling `Stop`-msgs that # arrive during a call to `drain_to_final_msg()` above! passthrough_non_pld_msgs: bool = True, + hide_tb: bool = True, **kwargs, ) -> tuple[MsgType, PayloadT]: @@ -387,6 +429,7 @@ class PldRx(Struct): the pair of refs. ''' + __tracebackhide__: bool = hide_tb msg: MsgType = await ipc._rx_chan.receive() if passthrough_non_pld_msgs: @@ -401,6 +444,7 @@ class PldRx(Struct): msg, ipc=ipc, expect_msg=expect_msg, + hide_tb=hide_tb, **kwargs, ) return msg, pld @@ -414,7 +458,7 @@ def limit_plds( ) -> MsgDec: ''' Apply a `MsgCodec` that will natively decode the SC-msg set's - `Msg.pld: Union[Type[Struct]]` payload fields using + `PayloadMsg.pld: Union[Type[Struct]]` payload fields using tagged-unions of `msgspec.Struct`s from the `payload_types` for all IPC contexts in use by the current `trio.Task`. @@ -691,3 +735,11 @@ async def drain_to_final_msg( result_msg, pre_result_drained, ) + + +# TODO: factor logic from `.Context.started()` for send-side +# validate raising! +def validate_payload_msg( + msg: Started|Yield|Return, +) -> MsgTypeError|None: + ... -- 2.34.1 From 3ea461712038c3188954708428c1e79e4b462fa3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 27 May 2024 22:36:05 -0400 Subject: [PATCH 123/305] Add `MsgTypeError` "bad msg" capture Such that if caught by user code and/or the runtime we can introspect the original msg which caused the type error. Previously this was kinda half-baked with a `.msg_dict` which was delivered from an `Any`-decode of the shuttle msg in `_mk_msg_type_err()` but now this more explicitly refines the API and supports both `PayloadMsg`-instance or the msg-dict style injection: - allow passing either of `bad_msg: PayloadMsg|None` or `bad_msg_as_dict: dict|None` to `MsgTypeError.from_decode()`. - expose public props for both ^ whilst dropping prior `.msgdict`. - rework `.from_decode()` to explicitly accept `**extra_msgdata: dict` |_ only overriding it from any `bad_msg_as_dict` if the keys are found in `_ipcmsg_keys`, **except** for `_bad_msg` when `bad_msg` is passed. |_ drop `.ipc_msg` passthrough. |_ drop `msgdict` input. - adjust `.cid` to only pull from the `.bad_msg` if set. Related fixes/adjustments: - `pack_from_raise()` should pull `boxed_type_str` from `boxed_type.__name__`, not the `type()` of it.. also add a `hide_tb: bool` flag. - don't include `_msg_dict` and `_bad_msg` in the `_body_fields` set. - allow more granular boxed traceback-str controls: |_ allow passing a `tb_str: str` explicitly in which case we use it verbatim and presume caller knows what they're doing. |_ when not provided, use the more explicit `traceback.format_exception(exc)` since the error instance is a required input (we still fail back to the old `.format_exc()` call if for some reason the caller passes `None`; but that should be a bug right?). |_ if a `tb: TracebackType` and a `tb_str` is passed, concat them. - in `RemoteActorError.pformat()` don't indent the `._message` part used for the `body` when `with_type_header == False`. - update `_mk_msg_type_err()` to use `bad_msg`/`bad_msg_as_dict` appropriately and drop passing `ipc_msg`. --- tractor/_exceptions.py | 220 +++++++++++++++++++++++++++-------------- 1 file changed, 148 insertions(+), 72 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 9a94bbdb..85957356 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -22,6 +22,9 @@ from __future__ import annotations import builtins import importlib from pprint import pformat +from types import ( + TracebackType, +) from typing import ( Any, Callable, @@ -92,26 +95,30 @@ _ipcmsg_keys: list[str] = [ fi.name for fi, k, v in iter_fields(Error) - ] _body_fields: list[str] = list( set(_ipcmsg_keys) - # NOTE: don't show fields that either don't provide - # any extra useful info or that are already shown - # as part of `.__repr__()` output. + # XXX NOTE: DON'T-SHOW-FIELDS + # - don't provide any extra useful info or, + # - are already shown as part of `.__repr__()` or, + # - are sub-type specific. - { 'src_type_str', 'boxed_type_str', 'tb_str', 'relay_path', - '_msg_dict', 'cid', - # since only ctxc should show it but `Error` does + # only ctxc should show it but `Error` does # have it as an optional field. 'canceller', + + # only for MTEs and generally only used + # when devving/testing/debugging. + '_msg_dict', + '_bad_msg', } ) @@ -146,6 +153,7 @@ def pack_from_raise( |MsgTypeError ), cid: str, + hide_tb: bool = True, **rae_fields, @@ -156,7 +164,7 @@ def pack_from_raise( `Error`-msg using `pack_error()` to extract the tb info. ''' - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb try: raise local_err except type(local_err) as local_err: @@ -231,7 +239,8 @@ class RemoteActorError(Exception): if ( extra_msgdata - and ipc_msg + and + ipc_msg ): # XXX mutate the orig msg directly from # manually provided input params. @@ -261,17 +270,16 @@ class RemoteActorError(Exception): # either by customizing `ContextCancelled.__init__()` or # through a special factor func? elif boxed_type: - boxed_type_str: str = type(boxed_type).__name__ + boxed_type_str: str = boxed_type.__name__ if ( ipc_msg - and not self._ipc_msg.boxed_type_str + and + self._ipc_msg.boxed_type_str != boxed_type_str ): self._ipc_msg.boxed_type_str = boxed_type_str assert self.boxed_type_str == self._ipc_msg.boxed_type_str - else: - self._extra_msgdata['boxed_type_str'] = boxed_type_str - + # ensure any roundtripping evals to the input value assert self.boxed_type is boxed_type @property @@ -309,7 +317,9 @@ class RemoteActorError(Exception): if self._ipc_msg else {} ) - return self._extra_msgdata | msgdata + return { + k: v for k, v in self._extra_msgdata.items() + } | msgdata @property def src_type_str(self) -> str: @@ -502,6 +512,8 @@ class RemoteActorError(Exception): ''' header: str = '' + body: str = '' + if with_type_header: header: str = f'<{type(self).__name__}(\n' @@ -525,24 +537,22 @@ class RemoteActorError(Exception): ) if not with_type_header: body = '\n' + body - else: - first: str = '' - message: str = self._message + elif message := self._message: # split off the first line so it isn't indented # the same like the "boxed content". if not with_type_header: lines: list[str] = message.splitlines() - first = lines[0] - message = ''.join(lines[1:]) + first: str = lines[0] + message: str = message.removeprefix(first) + + else: + first: str = '' body: str = ( first + - textwrap.indent( - message, - prefix=' ', - ) + message + '\n' ) @@ -708,52 +718,72 @@ class MsgTypeError( ] @property - def msg_dict(self) -> dict[str, Any]: + def bad_msg(self) -> PayloadMsg|None: ''' - If the underlying IPC `MsgType` was received from a remote - actor but was unable to be decoded to a native - `Yield`|`Started`|`Return` struct, the interchange backend - native format decoder can be used to stash a `dict` - version for introspection by the invalidating RPC task. + Ref to the the original invalid IPC shuttle msg which failed + to decode thus providing for the reason for this error. ''' - return self.msgdata.get('_msg_dict') + if ( + (_bad_msg := self.msgdata.get('_bad_msg')) + and + isinstance(_bad_msg, PayloadMsg) + ): + return _bad_msg - @property - def expected_msg(self) -> MsgType|None: - ''' - Attempt to construct what would have been the original - `MsgType`-with-payload subtype (i.e. an instance from the set - of msgs in `.msg.types._payload_msgs`) which failed - validation. - - ''' - if msg_dict := self.msg_dict.copy(): + elif bad_msg_dict := self.bad_msg_as_dict: return msgtypes.from_dict_msg( - dict_msg=msg_dict, + dict_msg=bad_msg_dict.copy(), # use_pretty=True, # ^-TODO-^ would luv to use this BUT then the # `field_prefix` in `pformat_boxed_tb()` cucks it # all up.. XD ) + return None + @property + def bad_msg_as_dict(self) -> dict[str, Any]: + ''' + If the underlying IPC `MsgType` was received from a remote + actor but was unable to be decoded to a native `PayloadMsg` + (`Yield`|`Started`|`Return`) struct, the interchange backend + native format decoder can be used to stash a `dict` version + for introspection by the invalidating RPC task. + + Optionally when this error is constructed from + `.from_decode()` the caller can attempt to construct what + would have been the original `MsgType`-with-payload subtype + (i.e. an instance from the set of msgs in + `.msg.types._payload_msgs`) which failed validation. + + ''' + return self.msgdata.get('_bad_msg_as_dict') + @property def expected_msg_type(self) -> Type[MsgType]|None: - return type(self.expected_msg) + return type(self.bad_msg) @property def cid(self) -> str: - # pre-packed using `.from_decode()` constructor - return self.msgdata.get('cid') + # pull from required `.bad_msg` ref (or src dict) + if bad_msg := self.bad_msg: + return bad_msg.cid + + return self.msgdata['cid'] @classmethod def from_decode( cls, message: str, - ipc_msg: PayloadMsg|None = None, - msgdict: dict|None = None, + bad_msg: PayloadMsg|None = None, + bad_msg_as_dict: dict|None = None, + + # if provided, expand and pack all RAE compat fields into the + # `._extra_msgdata` auxillary data `dict` internal to + # `RemoteActorError`. + **extra_msgdata, ) -> MsgTypeError: ''' @@ -763,25 +793,44 @@ class MsgTypeError( (which is normally the caller of this). ''' - # if provided, expand and pack all RAE compat fields into the - # `._extra_msgdata` auxillary data `dict` internal to - # `RemoteActorError`. - extra_msgdata: dict = {} - if msgdict: - extra_msgdata: dict = { - k: v - for k, v in msgdict.items() - if k in _ipcmsg_keys - } + if bad_msg_as_dict: # NOTE: original "vanilla decode" of the msg-bytes # is placed inside a value readable from # `.msgdata['_msg_dict']` - extra_msgdata['_msg_dict'] = msgdict + extra_msgdata['_bad_msg_as_dict'] = bad_msg_as_dict + + # scrape out any underlying fields from the + # msg that failed validation. + for k, v in bad_msg_as_dict.items(): + if ( + # always skip a duplicate entry + # if already provided as an arg + k == '_bad_msg' and bad_msg + or + # skip anything not in the default msg-field set. + k not in _ipcmsg_keys + # k not in _body_fields + ): + continue + + extra_msgdata[k] = v + + + elif bad_msg: + if not isinstance(bad_msg, PayloadMsg): + raise TypeError( + 'The provided `bad_msg` is not a `PayloadMsg` type?\n\n' + f'{bad_msg}' + ) + extra_msgdata['_bad_msg'] = bad_msg + extra_msgdata['cid'] = bad_msg.cid + + if 'cid' not in extra_msgdata: + import pdbp; pdbp.set_trace() return cls( message=message, boxed_type=cls, - ipc_msg=ipc_msg, **extra_msgdata, ) @@ -836,9 +885,10 @@ class MessagingError(Exception): def pack_error( exc: BaseException|RemoteActorError, - tb: str|None = None, cid: str|None = None, src_uid: tuple[str, str]|None = None, + tb: TracebackType|None = None, + tb_str: str = '', ) -> Error: ''' @@ -848,10 +898,28 @@ def pack_error( the receiver side using `unpack_error()` below. ''' - if tb: - tb_str = ''.join(traceback.format_tb(tb)) + if not tb_str: + tb_str: str = ( + ''.join(traceback.format_exception(exc)) + + # TODO: can we remove this is `exc` is required? + or + # NOTE: this is just a shorthand for the "last error" as + # provided by `sys.exeception()`, see: + # - https://docs.python.org/3/library/traceback.html#traceback.print_exc + # - https://docs.python.org/3/library/traceback.html#traceback.format_exc + traceback.format_exc() + ) else: - tb_str = traceback.format_exc() + if tb_str[-2:] != '\n': + tb_str += '\n' + + # when caller provides a tb instance (say pulled from some other + # src error's `.__traceback__`) we use that as the "boxed" + # tb-string instead. + if tb: + # https://docs.python.org/3/library/traceback.html#traceback.format_list + tb_str: str = ''.join(traceback.format_tb(tb)) + tb_str error_msg: dict[ # for IPC str, @@ -1115,7 +1183,7 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, - src_err_msg: Error|None = None, + # src_err_msg: Error|None = None, **mte_kwargs, @@ -1164,10 +1232,10 @@ def _mk_msg_type_err( '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' ) - msgtyperr = MsgTypeError( message=message, ipc_msg=msg, + bad_msg=msg, ) # ya, might be `None` msgtyperr.__cause__ = src_type_error @@ -1175,6 +1243,9 @@ def _mk_msg_type_err( # `Channel.recv()` case else: + msg_dict: dict|None = None + bad_msg: PayloadMsg|None = None + if is_invalid_payload: msg_type: str = type(msg) any_pld: Any = msgpack.decode(msg.pld) @@ -1186,19 +1257,20 @@ def _mk_msg_type_err( # f' |_pld: {codec.pld_spec_str}\n'# != {any_pld!r}\n' # f')>\n\n' ) + # src_err_msg = msg + bad_msg = msg # TODO: should we just decode the msg to a dict despite # only the payload being wrong? # -[ ] maybe the better design is to break this construct # logic into a separate explicit helper raiser-func? - msg_dict = None else: - msg: bytes # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any - # `msgspec.Struct` handling) so that we can - # determine what `.msg.types.Msg` is the culprit - # by reporting the received value. + # interchange-prot (i.e. without any `msgspec.Struct` + # handling) so that we can determine what + # `.msg.types.PayloadMsg` is the culprit by reporting the + # received value. + msg: bytes msg_dict: dict = msgpack.decode(msg) msg_type_name: str = msg_dict['msg_type'] msg_type = getattr(msgtypes, msg_type_name) @@ -1235,9 +1307,13 @@ def _mk_msg_type_err( if verb_header: message = f'{verb_header} ' + message + # if not isinstance(bad_msg, PayloadMsg): + # import pdbp; pdbp.set_trace() + msgtyperr = MsgTypeError.from_decode( message=message, - msgdict=msg_dict, + bad_msg=bad_msg, + bad_msg_as_dict=msg_dict, # NOTE: for the send-side `.started()` pld-validate # case we actually set the `._ipc_msg` AFTER we return @@ -1245,7 +1321,7 @@ def _mk_msg_type_err( # want to emulate the `Error` from the mte we build here # Bo # so by default in that case this is set to `None` - ipc_msg=src_err_msg, + # ipc_msg=src_err_msg, ) msgtyperr.__cause__ = src_validation_error return msgtyperr -- 2.34.1 From ea5eeba0a02b76f6f4ba6c94755dc7f6b2029b80 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 09:36:26 -0400 Subject: [PATCH 124/305] Parameterize the `return_msg_type` in `._invoke()` Since we also handle a runtime-specific `CancelAck`, allow the caller-scheduler to pass in the expected return-type msg per the RPC msg endpoint loop. --- tractor/_rpc.py | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 0f200d0e..1ea9bce3 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -63,6 +63,7 @@ from .log import get_logger from .msg import ( current_codec, MsgCodec, + PayloadT, NamespacePath, pretty_struct, ) @@ -96,7 +97,7 @@ async def _invoke_non_context( treat_as_gen: bool, is_rpc: bool, - return_msg: Return|CancelAck = Return, + return_msg_type: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -218,7 +219,7 @@ async def _invoke_non_context( and chan.connected() ): try: - ret_msg = return_msg( + ret_msg = return_msg_type( cid=cid, pld=result, ) @@ -417,7 +418,7 @@ async def _invoke( is_rpc: bool = True, hide_tb: bool = True, - return_msg: Return|CancelAck = Return, + return_msg_type: Return|CancelAck = Return, task_status: TaskStatus[ Context | BaseException @@ -531,7 +532,7 @@ async def _invoke( kwargs, treat_as_gen, is_rpc, - return_msg, + return_msg_type, task_status, ) # XXX below fallthrough is ONLY for `@context` eps @@ -591,18 +592,21 @@ async def _invoke( ctx._scope = tn.cancel_scope task_status.started(ctx) - # TODO: should would be nice to have our - # `TaskMngr` nursery here! - res: Any = await coro - ctx._result = res - - # deliver final result to caller side. - await chan.send( - return_msg( - cid=cid, - pld=res, - ) + # TODO: better `trionics` tooling: + # -[ ] should would be nice to have our `TaskMngr` + # nursery here! + # -[ ] payload value checking like we do with + # `.started()` such that the debbuger can engage + # here in the child task instead of waiting for the + # parent to crash with it's own MTE.. + res: Any|PayloadT = await coro + return_msg: Return|CancelAck = return_msg_type( + cid=cid, + pld=res, ) + # set and shuttle final result to "parent"-side task. + ctx._result = res + await chan.send(return_msg) # NOTE: this happens IFF `ctx._scope.cancel()` is # called by any of, @@ -938,7 +942,7 @@ async def process_messages( actor.cancel, kwargs, is_rpc=False, - return_msg=CancelAck, + return_msg_type=CancelAck, ) log.runtime( @@ -972,7 +976,7 @@ async def process_messages( actor._cancel_task, kwargs, is_rpc=False, - return_msg=CancelAck, + return_msg_type=CancelAck, ) except BaseException: log.exception( -- 2.34.1 From deb61423c43a7061cdc6ea105b10bcdb105a5d2e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 09:55:16 -0400 Subject: [PATCH 125/305] Drop `msg.types.Msg` for new replacement types The `TypeAlias` for the msg type-group is now `MsgType` and any user touching shuttle messages can now be typed as `PayloadMsg`. Relatedly, add MTE specific `Error._bad_msg[_as_dict]` fields which are handy for introspection of remote decode failures. --- tractor/_ipc.py | 2 +- tractor/msg/_codec.py | 10 +++++----- tractor/msg/types.py | 39 +++++++++++++++++++++++---------------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index 511a053c..ec7d348a 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -291,7 +291,7 @@ class MsgpackTCPStream(MsgTransport): async def send( self, - msg: msgtypes.Msg, + msg: msgtypes.MsgType, strict_types: bool = True, # hide_tb: bool = False, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index cd86552f..e1c59e94 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -140,7 +140,7 @@ class MsgDec(Struct): # * also a `.__contains__()` for doing `None in # TypeSpec[None|int]` since rn you need to do it on # `.__args__` for unions.. - # - `MsgSpec: Union[Type[Msg]] + # - `MsgSpec: Union[MsgType] # # -[ ] auto-genning this from new (in 3.12) type parameter lists Bo # |_ https://docs.python.org/3/reference/compound_stmts.html#type-params @@ -188,7 +188,7 @@ def mk_dec( return MsgDec( _dec=msgpack.Decoder( - type=spec, # like `Msg[Any]` + type=spec, # like `MsgType[Any]` dec_hook=dec_hook, ) ) @@ -561,7 +561,7 @@ def mk_codec( ''' # (manually) generate a msg-payload-spec for all relevant - # god-boxing-msg subtypes, parameterizing the `Msg.pld: PayloadT` + # god-boxing-msg subtypes, parameterizing the `PayloadMsg.pld: PayloadT` # for the decoder such that all sub-type msgs in our SCIPP # will automatically decode to a type-"limited" payload (`Struct`) # object (set). @@ -607,7 +607,7 @@ _def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # The built-in IPC `Msg` spec. # Our composing "shuttle" protocol which allows `tractor`-app code -# to use any `msgspec` supported type as the `Msg.pld` payload, +# to use any `msgspec` supported type as the `PayloadMsg.pld` payload, # https://jcristharif.com/msgspec/supported-types.html # _def_tractor_codec: MsgCodec = mk_codec( @@ -743,7 +743,7 @@ def limit_msg_spec( ) -> MsgCodec: ''' Apply a `MsgCodec` that will natively decode the SC-msg set's - `Msg.pld: Union[Type[Struct]]` payload fields using + `PayloadMsg.pld: Union[Type[Struct]]` payload fields using tagged-unions of `msgspec.Struct`s from the `payload_types` for all IPC contexts in use by the current `trio.Task`. diff --git a/tractor/msg/types.py b/tractor/msg/types.py index f8205c23..08511ec0 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -89,11 +89,12 @@ class PayloadMsg( # -[ ] `uuid.UUID` which has multi-protocol support # https://jcristharif.com/msgspec/supported-types.html#uuid - # The msgs "payload" (spelled without vowels): + # The msg's "payload" (spelled without vowels): # https://en.wikipedia.org/wiki/Payload_(computing) - # - # NOTE: inherited from any `Msg` (and maybe overriden - # by use of `limit_msg_spec()`), but by default is + pld: Raw + + # ^-NOTE-^ inherited from any `PayloadMsg` (and maybe type + # overriden via the `._ops.limit_plds()` API), but by default is # parameterized to be `Any`. # # XXX this `Union` must strictly NOT contain `Any` if @@ -106,7 +107,6 @@ class PayloadMsg( # TODO: could also be set to `msgspec.Raw` if the sub-decoders # approach is preferred over the generic parameterization # approach as take by `mk_msg_spec()` below. - pld: Raw # TODO: complete rename @@ -412,19 +412,24 @@ class Error( relay_path: list[tuple[str, str]] tb_str: str - cid: str|None = None - - # TODO: use UNSET or don't include them via + # TODO: only optionally include sub-type specfic fields? + # -[ ] use UNSET or don't include them via `omit_defaults` (see + # inheritance-line options above) # - # `ContextCancelled` + # `ContextCancelled` reports the src cancelling `Actor.uid` canceller: tuple[str, str]|None = None - # `StreamOverrun` + # `StreamOverrun`-specific src `Actor.uid` sender: tuple[str, str]|None = None - # for the `MsgTypeError` case where the receiver side - # decodes the underlying original `Msg`-subtype - _msg_dict: dict|None = None + # `MsgTypeError` meta-data + cid: str|None = None + # when the receiver side fails to decode a delivered + # `PayloadMsg`-subtype; one and/or both the msg-struct instance + # and `Any`-decoded to `dict` of the msg are set and relayed + # (back to the sender) for introspection. + _bad_msg: Started|Yield|Return|None = None + _bad_msg_as_dict: dict|None = None def from_dict_msg( @@ -436,9 +441,11 @@ def from_dict_msg( ) -> MsgType: ''' - Helper to build a specific `MsgType` struct from - a "vanilla" decoded `dict`-ified equivalent of the - msg: i.e. if the `msgpack.Decoder.type == Any`. + Helper to build a specific `MsgType` struct from a "vanilla" + decoded `dict`-ified equivalent of the msg: i.e. if the + `msgpack.Decoder.type == Any`, the default when using + `msgspec.msgpack` and not "typed decoding" using + `msgspec.Struct`. ''' msg_type_tag_field: str = ( -- 2.34.1 From d7ca1dfd948cf11cc22475644006c617d6f9ec49 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 11:05:44 -0400 Subject: [PATCH 126/305] Fix `test_basic_payload_spec` bad msg matching Expecting `Started` or `Return` with respective bad `.pld` values depending on what type of failure is test parametrized. This makes the suite run green it seems B) --- tests/test_pldrx_limiting.py | 50 ++++++++++-------------------------- 1 file changed, 13 insertions(+), 37 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index d658fb51..562164c7 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -6,30 +6,19 @@ related settings around IPC contexts. ''' from contextlib import ( asynccontextmanager as acm, - contextmanager as cm, -) -# import typing -from typing import ( - # Any, - TypeAlias, - # Union, ) from contextvars import ( Context, ) from msgspec import ( - # structs, - # msgpack, Struct, - # ValidationError, ) import pytest import trio import tractor from tractor import ( - # _state, MsgTypeError, current_ipc_ctx, Portal, @@ -40,20 +29,9 @@ from tractor.msg import ( ) from tractor.msg import ( _codec, - # _ctxvar_MsgCodec, - - # NamespacePath, - # MsgCodec, - # mk_codec, - # apply_codec, - # current_codec, ) from tractor.msg.types import ( log, - # _payload_msgs, - # PayloadMsg, - # Started, - # mk_msg_spec, ) @@ -64,23 +42,10 @@ class PldMsg(Struct): maybe_msg_spec = PldMsg|None -@cm -def custom_spec( - ctx: Context, - spec: TypeAlias, -) -> _codec.MsgCodec: - ''' - Apply a custom payload spec, remove on exit. - - ''' - rx: msgops.PldRx = ctx._pld_rx - - @acm async def maybe_expect_raises( raises: BaseException|None = None, ensure_in_message: list[str]|None = None, - reraise: bool = False, timeout: int = 3, ) -> None: @@ -271,6 +236,17 @@ def test_basic_payload_spec( # since not opened yet. assert current_ipc_ctx() is None + if invalid_started: + msg_type_str: str = 'Started' + bad_value_str: str = '10' + elif invalid_return: + msg_type_str: str = 'Return' + bad_value_str: str = "'yo'" + else: + # XXX but should never be used below then.. + msg_type_str: str = '' + bad_value_str: str = '' + async with ( maybe_expect_raises( raises=MsgTypeError if ( @@ -279,8 +255,8 @@ def test_basic_payload_spec( invalid_started ) else None, ensure_in_message=[ - "invalid `Return` payload", - "value: `'yo'` does not match type-spec: `Return.pld: PldMsg|NoneType`", + f"invalid `{msg_type_str}` payload", + f"value: `{bad_value_str}` does not match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], ), p.open_context( -- 2.34.1 From 46a1a54aeb5f4e36c38c16218150bfec420b778f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 11:08:27 -0400 Subject: [PATCH 127/305] Factor `.started()` validation into `.msg._ops` Filling out the helper `validate_payload_msg()` staged in a prior commit and adjusting all imports to match. Also add a `raise_mte: bool` flag for potential usage where the caller wants to handle the MTE instance themselves. --- tractor/_context.py | 57 +++++++---------------------------------- tractor/msg/_ops.py | 62 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 67 insertions(+), 52 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index fed9f2bc..68a23da0 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -58,9 +58,6 @@ from typing import ( import warnings # ------ - ------ import trio -from msgspec import ( - ValidationError, -) # ------ - ------ from ._exceptions import ( ContextCancelled, @@ -78,19 +75,16 @@ from .log import ( from .msg import ( Error, MsgType, - MsgCodec, NamespacePath, PayloadT, Started, Stop, Yield, - current_codec, pretty_struct, _ops as msgops, ) from ._ipc import ( Channel, - _mk_msg_type_err, ) from ._streaming import MsgStream from ._state import ( @@ -1657,54 +1651,21 @@ class Context: # __tracebackhide__: bool = hide_tb if validate_pld_spec: - # __tracebackhide__: bool = False - codec: MsgCodec = current_codec() - msg_bytes: bytes = codec.encode(started_msg) - try: - roundtripped: Started = codec.decode(msg_bytes) - # pld: PayloadT = await self.pld_rx.recv_pld( - pld: PayloadT = self.pld_rx.dec_msg( - msg=roundtripped, - ipc=self, - expect_msg=Started, - hide_tb=hide_tb, - is_started_send_side=True, - ) - if ( - strict_pld_parity - and - pld != value - ): - # TODO: make that one a mod func too.. - diff = pretty_struct.Struct.__sub__( - roundtripped, - started_msg, - ) - complaint: str = ( - 'Started value does not match after roundtrip?\n\n' - f'{diff}' - ) - raise ValidationError(complaint) - - # raise any msg type error NO MATTER WHAT! - except ValidationError as verr: - # always show this src frame in the tb - # __tracebackhide__: bool = False - raise _mk_msg_type_err( - msg=roundtripped, - codec=codec, - src_validation_error=verr, - verb_header='Trying to send ', - is_invalid_payload=True, - ) from verr + msgops.validate_payload_msg( + pld_msg=started_msg, + pld_value=value, + ipc=self, + strict_pld_parity=strict_pld_parity, + hide_tb=hide_tb, + ) # TODO: maybe a flag to by-pass encode op if already done # here in caller? await self.chan.send(started_msg) # set msg-related internal runtime-state - self._started_called = True - self._started_msg = started_msg + self._started_called: bool = True + self._started_msg: Started = started_msg self._started_pld = value async def _drain_overflows( diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 6faf78ef..e22d39f1 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -53,6 +53,8 @@ from tractor._state import current_ipc_ctx from ._codec import ( mk_dec, MsgDec, + MsgCodec, + current_codec, ) from .types import ( CancelAck, @@ -737,9 +739,61 @@ async def drain_to_final_msg( ) -# TODO: factor logic from `.Context.started()` for send-side -# validate raising! def validate_payload_msg( - msg: Started|Yield|Return, + pld_msg: Started|Yield|Return, + pld_value: PayloadT, + ipc: Context|MsgStream, + + raise_mte: bool = True, + strict_pld_parity: bool = False, + hide_tb: bool = True, + ) -> MsgTypeError|None: - ... + ''' + Validate a `PayloadMsg.pld` value with the current + IPC ctx's `PldRx` and raise an appropriate `MsgTypeError` + on failure. + + ''' + __tracebackhide__: bool = hide_tb + codec: MsgCodec = current_codec() + msg_bytes: bytes = codec.encode(pld_msg) + try: + roundtripped: Started = codec.decode(msg_bytes) + ctx: Context = getattr(ipc, 'ctx', ipc) + pld: PayloadT = ctx.pld_rx.dec_msg( + msg=roundtripped, + ipc=ipc, + expect_msg=Started, + hide_tb=hide_tb, + is_started_send_side=True, + ) + if ( + strict_pld_parity + and + pld != pld_value + ): + # TODO: make that one a mod func too.. + diff = pretty_struct.Struct.__sub__( + roundtripped, + pld_msg, + ) + complaint: str = ( + 'Started value does not match after roundtrip?\n\n' + f'{diff}' + ) + raise ValidationError(complaint) + + # raise any msg type error NO MATTER WHAT! + except ValidationError as verr: + mte: MsgTypeError = _mk_msg_type_err( + msg=roundtripped, + codec=codec, + src_validation_error=verr, + verb_header='Trying to send ', + is_invalid_payload=True, + ) + if not raise_mte: + return mte + + raise mte from verr -- 2.34.1 From b673d10e1be65207c20ae372f8a1388a64744328 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 15:30:30 -0400 Subject: [PATCH 128/305] Set remote errors in `_raise_from_unexpected_msg()` By calling `Context._maybe_cancel_and_set_remote_error(exc)` on any unpacked `Error` msg; provides for `Context.maybe_error` consistency to match all other error delivery cases. --- tractor/_exceptions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 85957356..0dfaf675 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -902,7 +902,7 @@ def pack_error( tb_str: str = ( ''.join(traceback.format_exception(exc)) - # TODO: can we remove this is `exc` is required? + # TODO: can we remove this since `exc` is required.. right? or # NOTE: this is just a shorthand for the "last error" as # provided by `sys.exeception()`, see: @@ -917,8 +917,8 @@ def pack_error( # when caller provides a tb instance (say pulled from some other # src error's `.__traceback__`) we use that as the "boxed" # tb-string instead. + # https://docs.python.org/3/library/traceback.html#traceback.format_list if tb: - # https://docs.python.org/3/library/traceback.html#traceback.format_list tb_str: str = ''.join(traceback.format_tb(tb)) + tb_str error_msg: dict[ # for IPC @@ -961,15 +961,15 @@ def pack_error( error_msg['src_type_str'] = type(exc).__name__ error_msg['boxed_type_str'] = type(exc).__name__ - # XXX alawys append us the last relay in error propagation path + # XXX always append us the last relay in error propagation path error_msg.setdefault( 'relay_path', [], ).append(our_uid) - # XXX NOTE: always ensure the traceback-str is from the - # locally raised error (**not** the prior relay's boxed - # content's in `._ipc_msg.tb_str`). + # XXX NOTE XXX always ensure the traceback-str content is from + # the locally raised error (so, NOT the prior relay's boxed + # `._ipc_msg.tb_str`). error_msg['tb_str'] = tb_str if cid is not None: @@ -1109,6 +1109,7 @@ def _raise_from_unexpected_msg( msg, ctx.chan, ) + ctx._maybe_cancel_and_set_remote_error(exc) raise exc from src_err # `MsgStream` termination msg. -- 2.34.1 From aa17635c4baa574e596dacada480d8c090295dd8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 15:52:54 -0400 Subject: [PATCH 129/305] Raise send-side MTEs inline in `PldRx.dec_msg()` So when `is_started_send_side is True` we raise the newly created `MsgTypeError` (MTE) directly instead of doing all the `Error`-msg pack and unpack to raise stuff via `_raise_from_unexpected_msg()` since the raise should happen send side anyway and so doesn't emulate any remote fault like in a bad `Return` or `Started` without send-side pld-spec validation. Oh, and proxy-through the `hide_tb: bool` input from `.drain_to_final_msg()` to `.recv_msg_w_pld()`. --- tractor/msg/_ops.py | 75 ++++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 28 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index e22d39f1..6f472afd 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -215,6 +215,9 @@ class PldRx(Struct): **dec_msg_kwargs, ) + # TODO: rename to, + # -[ ] `.decode_pld()`? + # -[ ] `.dec_pld()`? def dec_msg( self, msg: MsgType, @@ -248,8 +251,8 @@ class PldRx(Struct): pld: PayloadT = self._pld_dec.decode(pld) log.runtime( 'Decoded msg payload\n\n' - f'{msg}\n\n' - f'where payload is\n' + f'{msg}\n' + f'where payload decoded as\n' f'|_pld={pld!r}\n' ) return pld @@ -265,13 +268,7 @@ class PldRx(Struct): src_validation_error=valerr, is_invalid_payload=True, expected_msg=expect_msg, - # ipc_msg=msg, ) - # NOTE: override the `msg` passed to - # `_raise_from_unexpected_msg()` (below) so so that - # we're effectively able to use that same func to - # unpack and raise an "emulated remote `Error`" of - # this local MTE. err_msg: Error = pack_error( exc=mte, cid=msg.cid, @@ -283,34 +280,55 @@ class PldRx(Struct): # tb=valerr.__traceback__, tb_str=mte._message, ) - # ^-TODO-^ just raise this inline instead of all the - # pack-unpack-repack non-sense! - mte._ipc_msg = err_msg - msg = err_msg - # set emulated remote error more-or-less as the - # runtime would - ctx: Context = getattr(ipc, 'ctx', ipc) + # NOTE: just raise the MTE inline instead of all + # the pack-unpack-repack non-sense when this is + # a "send side" validation error. + if is_started_send_side: + raise mte + + # XXX TODO: remove this right? + # => any bad stated/return values should + # always be treated a remote errors right? + # + # if ( + # expect_msg is Return + # or expect_msg is Started + # ): + # # set emulated remote error more-or-less as the + # # runtime would + # ctx: Context = getattr(ipc, 'ctx', ipc) + # ctx._maybe_cancel_and_set_remote_error(mte) + + + # XXX override the `msg` passed to + # `_raise_from_unexpected_msg()` (below) so so + # that we're effectively able to use that same + # func to unpack and raise an "emulated remote + # `Error`" of this local MTE. + msg = err_msg + # XXX NOTE: so when the `_raise_from_unexpected_msg()` + # raises the boxed `err_msg` from above it raises + # it from the above caught interchange-lib + # validation error. + src_err = valerr # TODO: should we instead make this explicit and # use the above masked `is_started_send_decode`, # expecting the `Context.started()` caller to set # it? Rn this is kinda, howyousayyy, implicitly # edge-case-y.. - if ( - expect_msg is not Started - and not is_started_send_side - ): - ctx._maybe_cancel_and_set_remote_error(mte) - - # XXX NOTE: so when the `_raise_from_unexpected_msg()` - # raises the boxed `err_msg` from above it raises - # it from `None`. - src_err = valerr - # if is_started_send_side: - # src_err = None - + # TODO: remove this since it's been added to + # `_raise_from_unexpected_msg()`..? + # if ( + # expect_msg is not Started + # and not is_started_send_side + # ): + # # set emulated remote error more-or-less as the + # # runtime would + # ctx: Context = getattr(ipc, 'ctx', ipc) + # ctx._maybe_cancel_and_set_remote_error(mte) # XXX some other decoder specific failure? # except TypeError as src_error: @@ -561,6 +579,7 @@ async def drain_to_final_msg( ipc=ctx, expect_msg=Return, raise_error=False, + hide_tb=hide_tb, ) # ^-TODO-^ some bad ideas? # -[ ] wrap final outcome .receive() in a scope so -- 2.34.1 From f227ce608003482634bc8d202e07802a41807a9a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 16:03:36 -0400 Subject: [PATCH 130/305] Don't (noisly) log about runtime cancel RPC tasks Since in the case of the `Actor._cancel_task()` related runtime eps we actually don't EVER register them in `Actor._rpc_tasks`.. logging about them is just needless noise, though maybe we should track them in a diff table; something like a `._runtime_rpc_tasks`? Drop the cancel-request-for-stale-RPC-task (`KeyError` case in `Actor._cancel_task()`) log-emit level in to `.runtime()`; it's generally not useful info other then for granular race condition eval when hacking the runtime. --- tractor/_rpc.py | 18 ++++++++++++------ tractor/_runtime.py | 7 ++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 1ea9bce3..84b81c32 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -395,12 +395,18 @@ async def _errors_relayed_via_ipc( f'|_{ctx._task}\n' f' >> {ctx.repr_rpc}\n' ) - else: - log.cancel( - 'Failed to de-alloc internal runtime cancel task?\n' - f'|_{ctx._task}\n' - f' >> {ctx.repr_rpc}\n' - ) + # TODO: remove this right? rn the only non-`is_rpc` cases + # are cancellation methods and according the RPC loop eps + # for thoses below, nothing is ever registered in + # `Actor._rpc_tasks` for those cases.. but should we? + # + # -[ ] maybe we should have an equiv `Actor._runtime_rpc_tasks`? + # else: + # log.cancel( + # 'Failed to de-alloc internal runtime cancel task?\n' + # f'|_{ctx._task}\n' + # f' >> {ctx.repr_rpc}\n' + # ) finally: if not actor._rpc_tasks: diff --git a/tractor/_runtime.py b/tractor/_runtime.py index f267ff67..fe2ecc00 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -1258,9 +1258,10 @@ class Actor: # - child returns a result before cancel-msg/ctxc-raised # - child self raises ctxc before parent send request, # - child errors prior to cancel req. - log.cancel( - 'Cancel request invalid, RPC task already completed?\n\n' - f'<= canceller: {requesting_uid}\n\n' + log.runtime( + 'Cancel request for invalid RPC task.\n' + 'The task likely already completed or was never started!\n\n' + f'<= canceller: {requesting_uid}\n' f'=> {cid}@{parent_chan.uid}\n' f' |_{parent_chan}\n' ) -- 2.34.1 From 235db17c9c5e618482d3ef88be9ac216bf46ff55 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 16:11:01 -0400 Subject: [PATCH 131/305] Raise remote errors rxed during `Context` child-sync More specifically, if `.open_context()` is cancelled when awaiting the first `Context.started()` during the child task sync phase, check to see if it was due to `._scope.cancel_called` and raise any remote error via `.maybe_raise()` instead the `trio.Cancelled` like in every other remote-error handling case. Ensure we set `._scope[_nursery]` only after the `Started` has arrived and audited. --- tractor/_context.py | 74 ++++++++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 68a23da0..291bed16 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -664,7 +664,7 @@ class Context: 'Setting remote error for ctx\n\n' f'<= {self.peer_side!r}: {self.chan.uid}\n' f'=> {self.side!r}: {self._actor.uid}\n\n' - f'{error}' + f'{error!r}' ) self._remote_error: BaseException = error @@ -718,7 +718,7 @@ class Context: log.error( f'Remote context error:\n\n' # f'{pformat(self)}\n' - f'{error}' + f'{error!r}' ) if self._canceller is None: @@ -742,26 +742,27 @@ class Context: and not cs.cancel_called and not cs.cancelled_caught ): - if not ( + if ( msgerr # NOTE: we allow user to config not cancelling the # local scope on `MsgTypeError`s - and not self._cancel_on_msgerr + and + not self._cancel_on_msgerr ): - # TODO: it'd sure be handy to inject our own - # `trio.Cancelled` subtype here ;) - # https://github.com/goodboy/tractor/issues/368 - message: str = 'Cancelling `Context._scope` !\n\n' - self._scope.cancel() - - else: message: str = ( 'NOT Cancelling `Context._scope` since,\n' f'Context._cancel_on_msgerr = {self._cancel_on_msgerr}\n\n' f'AND we got a msg-type-error!\n' f'{error}\n' ) + else: + # TODO: it'd sure be handy to inject our own + # `trio.Cancelled` subtype here ;) + # https://github.com/goodboy/tractor/issues/368 + message: str = 'Cancelling `Context._scope` !\n\n' + self._scope.cancel() + else: message: str = 'NOT cancelling `Context._scope` !\n\n' # from .devx import mk_pdb @@ -2058,6 +2059,12 @@ async def open_context_from_portal( if maybe_msgdec: assert maybe_msgdec.pld_spec == pld_spec + # NOTE: this in an implicit runtime nursery used to, + # - start overrun queuing tasks when as well as + # for cancellation of the scope opened by the user. + ctx._scope_nursery: trio.Nursery = tn + ctx._scope: trio.CancelScope = tn.cancel_scope + # XXX NOTE since `._scope` is NOT set BEFORE we retreive the # `Started`-msg any cancellation triggered # in `._maybe_cancel_and_set_remote_error()` will @@ -2065,25 +2072,42 @@ async def open_context_from_portal( # -> it's expected that if there is an error in this phase of # the dialog, the `Error` msg should be raised from the `msg` # handling block below. - started_msg, first = await ctx._pld_rx.recv_msg_w_pld( - ipc=ctx, - expect_msg=Started, - passthrough_non_pld_msgs=False, - hide_tb=hide_tb, - ) + try: + started_msg, first = await ctx._pld_rx.recv_msg_w_pld( + ipc=ctx, + expect_msg=Started, + passthrough_non_pld_msgs=False, + hide_tb=hide_tb, + ) + except trio.Cancelled as taskc: + ctx_cs: trio.CancelScope = ctx._scope + if not ctx_cs.cancel_called: + raise + + # from .devx import pause + # await pause(shield=True) + + log.cancel( + 'IPC ctx was cancelled during "child" task sync due to\n\n' + f'{ctx.maybe_error}\n' + ) + # OW if the ctx's scope was cancelled manually, + # likely the `Context` was cancelled via a call to + # `._maybe_cancel_and_set_remote_error()` so ensure + # we raise the underlying `._remote_error` directly + # instead of bubbling that taskc. + ctx.maybe_raise() + + # OW, some other unexpected cancel condition + # that should prolly never happen right? + raise InternalError( + 'Invalid cancellation during IPC ctx sync phase?\n' + ) from taskc - # from .devx import pause - # await pause() ctx._started_called: bool = True ctx._started_msg: bool = started_msg ctx._started_pld: bool = first - # NOTE: this in an implicit runtime nursery used to, - # - start overrun queuing tasks when as well as - # for cancellation of the scope opened by the user. - ctx._scope_nursery: trio.Nursery = tn - ctx._scope: trio.CancelScope = tn.cancel_scope - # deliver context instance and .started() msg value # in enter tuple. yield ctx, first -- 2.34.1 From 9330a75255dd60aeaf7764713079f1221535c04a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 28 May 2024 19:46:42 -0400 Subject: [PATCH 132/305] Ensure ctx error-state matches the MTE scenario Namely checking that `Context._remote_error` is set to the raised MTE in the invalid started and return value cases since prior to the recent underlying changes to the `Context.result()` impl, it would not match. Further, - do asserts for non-MTE raising cases in both the parent and child. - add todos for testing ctx-outcomes for per-side-validation policies i anticipate supporting and implied msg-dialog race cases therein. --- tests/test_pldrx_limiting.py | 89 ++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 18 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index 562164c7..ee61dca0 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -148,12 +148,44 @@ async def child( # propagate to parent? if raise_on_started_mte: raise - else: - if expect_started_mte: - raise RuntimeError( - 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' - f'{started_value!r}\n' - ) + + # no-send-side-error fallthrough + if ( + validate_pld_spec + and + expect_started_mte + ): + raise RuntimeError( + 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' + f'{started_value!r}\n' + ) + + assert ( + not expect_started_mte + or + not validate_pld_spec + ) + + # if wait_for_parent_to_cancel: + # ... + # + # ^-TODO-^ logic for diff validation policies on each side: + # + # -[ ] ensure that if we don't validate on the send + # side, that we are eventually error-cancelled by our + # parent due to the bad `Started` payload! + # -[ ] the boxed error should be srced from the parent's + # runtime NOT ours! + # -[ ] we should still error on bad `return_value`s + # despite the parent not yet error-cancelling us? + # |_ how do we want the parent side to look in that + # case? + # -[ ] maybe the equiv of "during handling of the + # above error another occurred" for the case where + # the parent sends a MTE to this child and while + # waiting for the child to terminate it gets back + # the MTE for this case? + # # XXX should always fail on recv side since we can't # really do much else beside terminate and relay the @@ -247,13 +279,17 @@ def test_basic_payload_spec( msg_type_str: str = '' bad_value_str: str = '' + maybe_mte: MsgTypeError|None = None + should_raise: Exception|None = ( + MsgTypeError if ( + invalid_return + or + invalid_started + ) else None + ) async with ( maybe_expect_raises( - raises=MsgTypeError if ( - invalid_return - or - invalid_started - ) else None, + raises=should_raise, ensure_in_message=[ f"invalid `{msg_type_str}` payload", f"value: `{bad_value_str}` does not match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", @@ -274,18 +310,35 @@ def test_basic_payload_spec( assert first.field == 'yo' try: - assert (await ctx.result()) is None + res: None|PldMsg = await ctx.result(hide_tb=False) + assert res is None except MsgTypeError as mte: + maybe_mte = mte if not invalid_return: raise - else: # expected this invalid `Return.pld` - assert mte.cid == ctx.cid + # expected this invalid `Return.pld` so audit + # the error state + meta-data + assert mte.expected_msg_type is Return + assert mte.cid == ctx.cid - # verify expected remote mte deats - await tractor.pause() - assert ctx._remote_error is mte - assert mte.expected_msg_type is Return + # verify expected remote mte deats + try: + assert ctx._local_error is None + assert ( + mte is + ctx._remote_error is + ctx.maybe_error is + ctx.outcome + ) + except: + # XXX should never get here.. + await tractor.pause(shield=True) + raise + + + if should_raise is None: + assert maybe_mte is None await p.cancel_actor() -- 2.34.1 From f05abbcfeeb36a05dbae290bd80cb1a7902bdbf3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 01:11:29 -0400 Subject: [PATCH 133/305] Ensure only a boxed traceback for MTE on parent side --- tests/test_pldrx_limiting.py | 58 ++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index ee61dca0..0c5c7ee8 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -53,6 +53,9 @@ async def maybe_expect_raises( Async wrapper for ensuring errors propagate from the inner scope. ''' + if tractor._state.debug_mode(): + timeout += 999 + with trio.fail_after(timeout): try: yield @@ -68,9 +71,10 @@ async def maybe_expect_raises( # maybe check for error txt content if ensure_in_message: part: str + err_repr: str = repr(inner_err) for part in ensure_in_message: for i, arg in enumerate(inner_err.args): - if part in arg: + if part in err_repr: break # if part never matches an arg, then we're # missing a match. @@ -97,7 +101,7 @@ async def child( ctx: Context, started_value: int|PldMsg|None, return_value: str|None, - validate_pld_spec: bool, + validate_pld_spec: bool, raise_on_started_mte: bool = True, ) -> None: @@ -131,13 +135,15 @@ async def child( # 2 cases: hdndle send-side and recv-only validation # - when `raise_on_started_mte == True`, send validate # - else, parent-recv-side only validation + mte: MsgTypeError|None = None try: await ctx.started( value=started_value, validate_pld_spec=validate_pld_spec, ) - except MsgTypeError: + except MsgTypeError as _mte: + mte = _mte log.exception('started()` raised an MTE!\n') if not expect_started_mte: raise RuntimeError( @@ -145,6 +151,19 @@ async def child( f'{started_value!r}\n' ) + boxed_div: str = '------ - ------' + assert boxed_div not in mte._message + assert boxed_div not in mte.tb_str + assert boxed_div not in repr(mte) + assert boxed_div not in str(mte) + mte_repr: str = repr(mte) + for line in mte.message.splitlines(): + assert line in mte_repr + + # since this is a *local error* there should be no + # boxed traceback content! + assert not mte.tb_str + # propagate to parent? if raise_on_started_mte: raise @@ -208,8 +227,8 @@ async def child( @pytest.mark.parametrize( 'return_value', [ - None, 'yo', + None, ], ids=[ 'return[invalid-"yo"]', @@ -291,8 +310,9 @@ def test_basic_payload_spec( maybe_expect_raises( raises=should_raise, ensure_in_message=[ - f"invalid `{msg_type_str}` payload", - f"value: `{bad_value_str}` does not match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", + f"invalid `{msg_type_str}` msg payload", + f"value: `{bad_value_str}` does not " + f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], ), p.open_context( @@ -321,21 +341,21 @@ def test_basic_payload_spec( # the error state + meta-data assert mte.expected_msg_type is Return assert mte.cid == ctx.cid + mte_repr: str = repr(mte) + for line in mte.message.splitlines(): + assert line in mte_repr + + assert mte.tb_str + # await tractor.pause(shield=True) # verify expected remote mte deats - try: - assert ctx._local_error is None - assert ( - mte is - ctx._remote_error is - ctx.maybe_error is - ctx.outcome - ) - except: - # XXX should never get here.. - await tractor.pause(shield=True) - raise - + assert ctx._local_error is None + assert ( + mte is + ctx._remote_error is + ctx.maybe_error is + ctx.outcome + ) if should_raise is None: assert maybe_mte is None -- 2.34.1 From d154afd678379061506ee1d54edbec235523d8e2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 09:05:23 -0400 Subject: [PATCH 134/305] Don't need to pack an `Error` with send-side MTEs --- tractor/msg/_ops.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 6f472afd..97cd3f29 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -269,19 +269,6 @@ class PldRx(Struct): is_invalid_payload=True, expected_msg=expect_msg, ) - err_msg: Error = pack_error( - exc=mte, - cid=msg.cid, - src_uid=( - ipc.chan.uid - if not is_started_send_side - else ipc._actor.uid - ), - # tb=valerr.__traceback__, - tb_str=mte._message, - ) - mte._ipc_msg = err_msg - # NOTE: just raise the MTE inline instead of all # the pack-unpack-repack non-sense when this is # a "send side" validation error. @@ -301,6 +288,22 @@ class PldRx(Struct): # ctx: Context = getattr(ipc, 'ctx', ipc) # ctx._maybe_cancel_and_set_remote_error(mte) + # NOTE: the `.message` is automatically + # transferred into the message as long as we + # define it as a `Error.message` field. + err_msg: Error = pack_error( + exc=mte, + cid=msg.cid, + src_uid=( + ipc.chan.uid + if not is_started_send_side + else ipc._actor.uid + ), + # tb=valerr.__traceback__, + # tb_str=mte._message, + # message=mte._message, + ) + mte._ipc_msg = err_msg # XXX override the `msg` passed to # `_raise_from_unexpected_msg()` (below) so so -- 2.34.1 From a1779a8fa948971d7d169fc89583b19d36ec0901 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 09:06:10 -0400 Subject: [PATCH 135/305] Fix missing newline in task-cancel log-message --- tractor/_rpc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 84b81c32..e4cc5750 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -391,7 +391,7 @@ async def _errors_relayed_via_ipc( # cancel scope will not have been inserted yet if is_rpc: log.warning( - 'RPC task likely errored or cancelled before start?' + 'RPC task likely errored or cancelled before start?\n' f'|_{ctx._task}\n' f' >> {ctx.repr_rpc}\n' ) -- 2.34.1 From fd0c14df808876da72a6821fe9320881ddcae922 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 09:14:04 -0400 Subject: [PATCH 136/305] Add `Error.message: str` Allows passing a custom error msg other then the traceback-str over the wire. Make `.tb_str` optional (in the blank `''` sense) since it's treated that way thus far in `._exceptions.pack_error()`. --- tractor/msg/types.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 08511ec0..ad6d6fb8 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -410,7 +410,13 @@ class Error( src_type_str: str boxed_type_str: str relay_path: list[tuple[str, str]] - tb_str: str + + # normally either both are provided or just + # a message for certain special cases where + # we pack a message for a locally raised + # mte or ctxc. + message: str|None = None + tb_str: str = '' # TODO: only optionally include sub-type specfic fields? # -[ ] use UNSET or don't include them via `omit_defaults` (see -- 2.34.1 From 08dc32fbb7221f1dd0827c8f315d2afd8af8a6e9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 10:04:54 -0400 Subject: [PATCH 137/305] Better RAE `.pformat()`-ing for send-side MTEs Send-side `MsgTypeError`s actually shouldn't have any "boxed" traceback per say since they're raised in the transmitting actor's local task env and we (normally) don't want the ascii decoration added around the error's `._message: str`, that is not until the exc is `pack_error()`-ed before transit. As such, the presentation of an embedded traceback (and its ascii box) gets bypassed when only a `._message: str` is set (as we now do for pld-spec failures in `_mk_msg_type_err()`). Further this tweaks the `.pformat()` output to include the `._message` part to look like ` ) ..` instead of jamming it implicitly to the end of the embedded `.tb_str` (as was done implicitly by `unpack_error()`) and also adds better handling for the `with_type_header == False` case including forcing that case when we detect that the currently handled exc is the RAE in `.pformat()`. Toss in a lengthier doc-str explaining it all. Surrounding/supporting changes, - better `unpack_error()` message which just briefly reports the remote task's error type. - add public `.message: str` prop. - always set a `._extra_msgdata: dict` since some MTE props rely on it. - handle `.boxed_type == None` for `.boxed_type_str`. - maybe pack any detected input or `exc.message` in `pack_error()`. - comment cruft cleanup in `_mk_msg_type_err()`. --- tractor/_exceptions.py | 199 +++++++++++++++++++++++++---------------- 1 file changed, 124 insertions(+), 75 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 0dfaf675..52048c17 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -22,6 +22,7 @@ from __future__ import annotations import builtins import importlib from pprint import pformat +import sys from types import ( TracebackType, ) @@ -110,6 +111,7 @@ _body_fields: list[str] = list( 'tb_str', 'relay_path', 'cid', + 'message', # only ctxc should show it but `Error` does # have it as an optional field. @@ -236,6 +238,7 @@ class RemoteActorError(Exception): self._boxed_type: BaseException = boxed_type self._src_type: BaseException|None = None self._ipc_msg: Error|None = ipc_msg + self._extra_msgdata = extra_msgdata if ( extra_msgdata @@ -250,8 +253,6 @@ class RemoteActorError(Exception): k, v, ) - else: - self._extra_msgdata = extra_msgdata # TODO: mask out eventually or place in `pack_error()` # pre-`return` lines? @@ -282,6 +283,17 @@ class RemoteActorError(Exception): # ensure any roundtripping evals to the input value assert self.boxed_type is boxed_type + @property + def message(self) -> str: + ''' + Be explicit, instead of trying to read it from the the parent + type's loosely defined `.args: tuple`: + + https://docs.python.org/3/library/exceptions.html#BaseException.args + + ''' + return self._message + @property def ipc_msg(self) -> Struct: ''' @@ -355,7 +367,10 @@ class RemoteActorError(Exception): ''' bt: Type[BaseException] = self.boxed_type - return str(bt.__name__) + if bt: + return str(bt.__name__) + + return '' @property def boxed_type(self) -> Type[BaseException]: @@ -426,8 +441,7 @@ class RemoteActorError(Exception): for key in fields: if ( - key == 'relay_uid' - and not self.is_inception() + key == 'relay_uid' and not self.is_inception() ): continue @@ -504,19 +518,80 @@ class RemoteActorError(Exception): def pformat( self, with_type_header: bool = True, + # with_ascii_box: bool = True, + ) -> str: ''' - Nicely formatted boxed error meta data + traceback, OR just - the normal message from `.args` (for eg. as you'd want shown - by a locally raised `ContextCancelled`). + Format any boxed remote error by multi-line display of, + + - error's src or relay actor meta-data, + - remote runtime env's traceback, + + With optional control over the format of, + + - whether the boxed traceback is ascii-decorated with + a surrounding "box" annotating the embedded stack-trace. + - if the error's type name should be added as margins + around the field and tb content like: + + `> .. )>` + + - the placement of the `.message: str` (explicit equiv of + `.args[0]`), either placed below the `.tb_str` or in the + first line's header when the error is raised locally (since + the type name is already implicitly shown by python). ''' header: str = '' body: str = '' + message: str = '' + # XXX when the currently raised exception is this instance, + # we do not ever use the "type header" style repr. + is_being_raised: bool = False + if ( + (exc := sys.exception()) + and + exc is self + ): + is_being_raised: bool = True + + with_type_header: bool = ( + with_type_header + and + not is_being_raised + ) + + # style if with_type_header: - header: str = f'<{type(self).__name__}(\n' + header: str = f'<{type(self).__name__}(' + if message := self._message: + + # split off the first line so, if needed, it isn't + # indented the same like the "boxed content" which + # since there is no `.tb_str` is just the `.message`. + lines: list[str] = message.splitlines() + first: str = lines[0] + message: str = message.removeprefix(first) + + # with a type-style header we, + # - have no special message "first line" extraction/handling + # - place the message a space in from the header: + # `MsgTypeError( ..` + # ^-here + # - indent the `.message` inside the type body. + if with_type_header: + first = f' {first} )>' + + message: str = textwrap.indent( + message, + prefix=' '*2, + ) + message: str = first + message + + # IFF there is an embedded traceback-str we always + # draw the ascii-box around it. if tb_str := self.tb_str: fields: str = self._mk_fields_str( _body_fields @@ -535,36 +610,19 @@ class RemoteActorError(Exception): # |___ .. tb_body_indent=1, ) - if not with_type_header: - body = '\n' + body - elif message := self._message: - # split off the first line so it isn't indented - # the same like the "boxed content". - if not with_type_header: - lines: list[str] = message.splitlines() - first: str = lines[0] - message: str = message.removeprefix(first) - - else: - first: str = '' - - body: str = ( - first - + - message - + - '\n' - ) - - if with_type_header: - tail: str = ')>' - else: - tail = '' + tail = '' + if ( + with_type_header + and not message + ): + tail: str = '>' return ( header + + message + + f'{body}' + tail @@ -577,7 +635,9 @@ class RemoteActorError(Exception): # |_ i guess `pexepect` relies on `str`-casing # of output? def __str__(self) -> str: - return self.pformat(with_type_header=False) + return self.pformat( + with_type_header=False + ) def unwrap( self, @@ -825,9 +885,6 @@ class MsgTypeError( extra_msgdata['_bad_msg'] = bad_msg extra_msgdata['cid'] = bad_msg.cid - if 'cid' not in extra_msgdata: - import pdbp; pdbp.set_trace() - return cls( message=message, boxed_type=cls, @@ -889,6 +946,7 @@ def pack_error( src_uid: tuple[str, str]|None = None, tb: TracebackType|None = None, tb_str: str = '', + message: str = '', ) -> Error: ''' @@ -971,7 +1029,7 @@ def pack_error( # the locally raised error (so, NOT the prior relay's boxed # `._ipc_msg.tb_str`). error_msg['tb_str'] = tb_str - + error_msg['message'] = message or getattr(exc, 'message', '') if cid is not None: error_msg['cid'] = cid @@ -995,26 +1053,24 @@ def unpack_error( if not isinstance(msg, Error): return None - # retrieve the remote error's msg-encoded details - tb_str: str = msg.tb_str - message: str = ( - f'{chan.uid}\n' - + - tb_str - ) - # try to lookup a suitable error type from the local runtime # env then use it to construct a local instance. # boxed_type_str: str = error_dict['boxed_type_str'] boxed_type_str: str = msg.boxed_type_str boxed_type: Type[BaseException] = get_err_type(boxed_type_str) - if boxed_type_str == 'ContextCancelled': - box_type = ContextCancelled - assert boxed_type is box_type + # retrieve the error's msg-encoded remotoe-env info + message: str = f'remote task raised a {msg.boxed_type_str!r}\n' - elif boxed_type_str == 'MsgTypeError': - box_type = MsgTypeError + # TODO: do we even really need these checks for RAEs? + if boxed_type_str in [ + 'ContextCancelled', + 'MsgTypeError', + ]: + box_type = { + 'ContextCancelled': ContextCancelled, + 'MsgTypeError': MsgTypeError, + }[boxed_type_str] assert boxed_type is box_type # TODO: already included by `_this_mod` in else loop right? @@ -1029,19 +1085,21 @@ def unpack_error( exc = box_type( message, ipc_msg=msg, + tb_str=msg.tb_str, ) return exc -def is_multi_cancelled(exc: BaseException) -> bool: +def is_multi_cancelled( + exc: BaseException|BaseExceptionGroup +) -> bool: ''' Predicate to determine if a possible ``BaseExceptionGroup`` contains only ``trio.Cancelled`` sub-exceptions (and is likely the result of cancelling a collection of subtasks. ''' - # if isinstance(exc, eg.BaseExceptionGroup): if isinstance(exc, BaseExceptionGroup): return exc.subgroup( lambda exc: isinstance(exc, trio.Cancelled) @@ -1184,7 +1242,6 @@ def _mk_msg_type_err( src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, - # src_err_msg: Error|None = None, **mte_kwargs, @@ -1251,19 +1308,11 @@ def _mk_msg_type_err( msg_type: str = type(msg) any_pld: Any = msgpack.decode(msg.pld) message: str = ( - f'invalid `{msg_type.__qualname__}` payload\n\n' - f'value: `{any_pld!r}` does not match type-spec: ' #\n' + f'invalid `{msg_type.__qualname__}` msg payload\n\n' + f'value: `{any_pld!r}` does not match type-spec: ' f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' - # f'<{type(msg).__qualname__}(\n' - # f' |_pld: {codec.pld_spec_str}\n'# != {any_pld!r}\n' - # f')>\n\n' ) - # src_err_msg = msg bad_msg = msg - # TODO: should we just decode the msg to a dict despite - # only the payload being wrong? - # -[ ] maybe the better design is to break this construct - # logic into a separate explicit helper raiser-func? else: # decode the msg-bytes using the std msgpack @@ -1308,21 +1357,21 @@ def _mk_msg_type_err( if verb_header: message = f'{verb_header} ' + message - # if not isinstance(bad_msg, PayloadMsg): - # import pdbp; pdbp.set_trace() - msgtyperr = MsgTypeError.from_decode( message=message, bad_msg=bad_msg, bad_msg_as_dict=msg_dict, - # NOTE: for the send-side `.started()` pld-validate - # case we actually set the `._ipc_msg` AFTER we return - # from here inside `Context.started()` since we actually - # want to emulate the `Error` from the mte we build here - # Bo - # so by default in that case this is set to `None` - # ipc_msg=src_err_msg, + # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: + # - for the send-side `.started()` pld-validate + # case we actually raise inline so we don't need to + # set the it at all. + # - for recv side we set it inside `PldRx.decode_pld()` + # after a manual call to `pack_error()` since we + # actually want to emulate the `Error` from the mte we + # build here. So by default in that case, this is left + # as `None` here. + # ipc_msg=src_err_msg, ) msgtyperr.__cause__ = src_validation_error return msgtyperr -- 2.34.1 From f120ee72f5ea05b9b2133e7b36dff62accb2f722 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 15:24:25 -0400 Subject: [PATCH 138/305] Add `from_src_exc: BaseException` to maybe raisers That is as a control to `Context._maybe_raise_remote_err()` such that if set to anything other then the default (`False` value), we do `raise remote_error from from_src_exc` such that caller can choose to suppress or override the `.__cause__` tb. Also tidy up and old masked TODO regarding calling `.maybe_raise()` after the caller exits from the `yield` in `.open_context()`.. --- tractor/_context.py | 51 ++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 291bed16..453e9065 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1190,6 +1190,7 @@ class Context: self, remote_error: Exception, + from_src_exc: BaseException|None|bool = False, raise_ctxc_from_self_call: bool = False, raise_overrun_from_self: bool = True, hide_tb: bool = True, @@ -1284,7 +1285,10 @@ class Context: # runtime frames from the tb explicitly? # https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement # https://stackoverflow.com/a/24752607 - raise remote_error # from None + if from_src_exc is not False: + raise remote_error from from_src_exc + + raise remote_error # TODO: change to `.wait_for_result()`? async def result( @@ -2096,7 +2100,11 @@ async def open_context_from_portal( # `._maybe_cancel_and_set_remote_error()` so ensure # we raise the underlying `._remote_error` directly # instead of bubbling that taskc. - ctx.maybe_raise() + ctx.maybe_raise( + # mask the above taskc from the tb + from_src_exc=None, + hide_tb=hide_tb, + ) # OW, some other unexpected cancel condition # that should prolly never happen right? @@ -2108,13 +2116,14 @@ async def open_context_from_portal( ctx._started_msg: bool = started_msg ctx._started_pld: bool = first - # deliver context instance and .started() msg value - # in enter tuple. + # deliver context ref and `.started()` msg payload value + # in `__aenter__` tuple. yield ctx, first # ??TODO??: do we still want to consider this or is # the `else:` block handling via a `.result()` # call below enough?? + # # -[ ] pretty sure `.result()` internals do the # same as our ctxc handler below so it ended up # being same (repeated?) behaviour, but ideally we @@ -2123,33 +2132,13 @@ async def open_context_from_portal( # that we can re-use it around the `yield` ^ here # or vice versa? # - # NOTE: between the caller exiting and arriving - # here the far end may have sent a ctxc-msg or - # other error, so check for it here immediately - # and maybe raise so as to engage the ctxc - # handling block below! + # maybe TODO NOTE: between the caller exiting and + # arriving here the far end may have sent a ctxc-msg or + # other error, so the quetion is whether we should check + # for it here immediately and maybe raise so as to engage + # the ctxc handling block below ???? # - # if re := ctx._remote_error: - # maybe_ctxc: ContextCancelled|None = ctx._maybe_raise_remote_err( - # re, - # # TODO: do we want this to always raise? - # # - means that on self-ctxc, if/when the - # # block is exited before the msg arrives - # # but then the msg during __exit__ - # # calling we may not activate the - # # ctxc-handler block below? should we - # # be? - # # - if there's a remote error that arrives - # # after the child has exited, we won't - # # handle until the `finally:` block - # # where `.result()` is always called, - # # again in which case we handle it - # # differently then in the handler block - # # that would normally engage from THIS - # # block? - # raise_ctxc_from_self_call=True, - # ) - # ctxc_from_callee = maybe_ctxc + # self.maybe_raise() # when in allow_overruns mode there may be # lingering overflow sender tasks remaining? @@ -2460,7 +2449,7 @@ async def open_context_from_portal( # # NOTE: further, this should be the only place the # underlying feeder channel is - # once-and-only-CLOSED! + # once-forever-and-only-CLOSED! with trio.CancelScope(shield=True): await ctx._rx_chan.aclose() -- 2.34.1 From 33e646fd6a010c20e85e81d20173d1a2670d8a8f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 15:55:34 -0400 Subject: [PATCH 139/305] Pass `boxed_type` from `_mk_msg_type_err()` Such that we're boxing the interchanged lib's specific error `msgspec.ValidationError` in this case) type much like how a `ContextCancelled[trio.Cancelled]` is composed; allows for seemless multi-backend-codec support later as well B) Pass `ctx.maybe_raise(from_src_exc=src_err)` where needed in a couple spots; as `None` in the send-side `Started` MTE case to avoid showing the `._scope1.cancel_called` result in the traceback from the `.open_context()` child-sync phase. --- tractor/_exceptions.py | 8 ++++---- tractor/_streaming.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 52048c17..92c3fafb 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -518,7 +518,6 @@ class RemoteActorError(Exception): def pformat( self, with_type_header: bool = True, - # with_ascii_box: bool = True, ) -> str: ''' @@ -885,9 +884,9 @@ class MsgTypeError( extra_msgdata['_bad_msg'] = bad_msg extra_msgdata['cid'] = bad_msg.cid + extra_msgdata.setdefault('boxed_type', cls) return cls( message=message, - boxed_type=cls, **extra_msgdata, ) @@ -1111,7 +1110,7 @@ def is_multi_cancelled( def _raise_from_unexpected_msg( ctx: Context, msg: MsgType, - src_err: AttributeError, + src_err: Exception, log: StackLevelAdapter, # caller specific `log` obj expect_msg: Type[MsgType], @@ -1212,7 +1211,7 @@ def _raise_from_unexpected_msg( # in case there already is some underlying remote error # that arrived which is probably the source of this stream # closure - ctx.maybe_raise() + ctx.maybe_raise(from_src_exc=src_err) raise eoc from src_err # TODO: our own transport/IPC-broke error subtype? @@ -1361,6 +1360,7 @@ def _mk_msg_type_err( message=message, bad_msg=bad_msg, bad_msg_as_dict=msg_dict, + boxed_type=type(src_validation_error), # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: # - for the send-side `.started()` pld-validate diff --git a/tractor/_streaming.py b/tractor/_streaming.py index a008eaf5..016577d3 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -233,6 +233,7 @@ class MsgStream(trio.abc.Channel): # ctx: Context = self._ctx ctx.maybe_raise( raise_ctxc_from_self_call=True, + from_src_exc=src_err, ) # propagate any error but hide low-level frame details -- 2.34.1 From 54d397b726f276fd87df8fc56da7276865de0111 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:02:25 -0400 Subject: [PATCH 140/305] Always `.exception()` in `try_ship_error_to_remote()` on internal error --- tractor/_rpc.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index e4cc5750..57db52f3 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -326,7 +326,6 @@ async def _errors_relayed_via_ipc( f'|_{ctx}' ) - # ALWAYS try to ship RPC errors back to parent/caller task if is_rpc: @@ -817,6 +816,12 @@ async def try_ship_error_to_remote( # TODO: use `.msg.preetty_struct` for this! f'{msg}\n' ) + except BaseException: + log.exception( + 'Errored while attempting error shipment?' + ) + __tracebackhide__: bool = False + raise async def process_messages( -- 2.34.1 From a95b84e4fb666a2a75c50e2ec938249ff17d826d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:02:59 -0400 Subject: [PATCH 141/305] Change `reraise` to `post_mortem: bool` in `maybe_expect_raises()` --- tests/test_pldrx_limiting.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index 0c5c7ee8..5cb0d357 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -46,7 +46,7 @@ maybe_msg_spec = PldMsg|None async def maybe_expect_raises( raises: BaseException|None = None, ensure_in_message: list[str]|None = None, - reraise: bool = False, + post_mortem: bool = False, timeout: int = 3, ) -> None: ''' @@ -86,8 +86,8 @@ async def maybe_expect_raises( f'{inner_err.args}' ) - if reraise: - raise inner_err + if post_mortem: + await tractor.post_mortem() else: if raises: @@ -314,6 +314,8 @@ def test_basic_payload_spec( f"value: `{bad_value_str}` does not " f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], + # only for debug + post_mortem=True, ), p.open_context( child, -- 2.34.1 From 1ef1ebfa993b3a2a103bdc95f8abb526de8a8578 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:03:28 -0400 Subject: [PATCH 142/305] Add a `tractor.post_mortem()` API test + example Since turns out we didn't have a single example using that API Bo The test granular-ly checks all use cases: - `.post_mortem()` manual calls in both subactor and root. - ensuring built-in RPC crash handling activates after each manual one from ^. - drafted some call-stack frame checking that i commented out for now since we need to first do ANSI escape code removal due to the colorization that `pdbp` does by default. |_ added a TODO with SO link on `assert_before()`. Also todo-staged a shielded-pause test to match with the already existing-but-needs-refinement example B) --- examples/debugging/pm_in_subactor.py | 56 ++++++++++++ tests/test_debugger.py | 122 ++++++++++++++++++++++++++- 2 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 examples/debugging/pm_in_subactor.py diff --git a/examples/debugging/pm_in_subactor.py b/examples/debugging/pm_in_subactor.py new file mode 100644 index 00000000..a8f5048e --- /dev/null +++ b/examples/debugging/pm_in_subactor.py @@ -0,0 +1,56 @@ +import trio +import tractor + + +@tractor.context +async def name_error( + ctx: tractor.Context, +): + ''' + Raise a `NameError`, catch it and enter `.post_mortem()`, then + expect the `._rpc._invoke()` crash handler to also engage. + + ''' + try: + getattr(doggypants) # noqa (on purpose) + except NameError: + await tractor.post_mortem() + raise + + +async def main(): + ''' + Test 3 `PdbREPL` entries: + - one in the child due to manual `.post_mortem()`, + - another in the child due to runtime RPC crash handling. + - final one here in parent from the RAE. + + ''' + # XXX NOTE: ideally the REPL arrives at this frame in the parent + # ONE UP FROM the inner ctx block below! + async with tractor.open_nursery( + debug_mode=True, + # loglevel='cancel', + ) as an: + p: tractor.Portal = await an.start_actor( + 'child', + enable_modules=[__name__], + ) + + # XXX should raise `RemoteActorError[NameError]` + # AND be the active frame when REPL enters! + try: + async with p.open_context(name_error) as (ctx, first): + assert first + except tractor.RemoteActorError as rae: + assert rae.boxed_type is NameError + + # manually handle in root's parent task + await tractor.post_mortem() + raise + else: + raise RuntimeError('IPC ctx should have remote errored!?') + + +if __name__ == '__main__': + trio.run(main) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 6aa3bd53..ac7b0feb 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -159,6 +159,10 @@ def in_prompt_msg( return True + +# TODO: todo support terminal color-chars stripping so we can match +# against call stack frame output from the the 'll' command the like! +# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 def assert_before( child, patts: list[str], @@ -1123,7 +1127,112 @@ def test_pause_from_sync( child.expect(pexpect.EOF) -# TODO! +def test_post_mortem_api( + spawn, + ctlc: bool, +): + ''' + Verify the `tractor.post_mortem()` API works in an exception + handler block. + + ''' + child = spawn('pm_in_subactor') + + # First entry is via manual `.post_mortem()` + child.expect(PROMPT) + assert_before( + child, + [ + _crash_msg, + " async with p.open_context(name_error) as (ctx, first):', + # ] + # ) + + # # step up a frame to ensure the it's the root's nursery + # child.sendline('u') + # child.expect(PROMPT) + # assert_before( + # child, + # [ + # # handler block annotation + # '-> async with tractor.open_nursery(', + # ] + # ) + + child.sendline('c') + child.expect(pexpect.EOF) + + +# TODO: needs ANSI code stripping tho, see `assert_before()` # above! def test_correct_frames_below_hidden(): ''' Ensure that once a `tractor.pause()` enages, when the user @@ -1136,4 +1245,15 @@ def test_correct_frames_below_hidden(): def test_cant_pause_from_paused_task(): + ''' + Pausing from with an already paused task should raise an error. + + Normally this should only happen in practise while debugging the call stack of `tractor.pause()` itself, likely + by a `.pause()` line somewhere inside our runtime. + + ''' + ... + + +def test_shield_pause(): ... -- 2.34.1 From 59a344945598d719ac6cdd0520ca03711ccdec6f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 16:09:59 -0400 Subject: [PATCH 143/305] Rename `PldRx.dec_msg()` -> `.decode_pld()` Keep the old alias, but i think it's better form to use longer names for internal public APIs and this name better reflects the functionality: decoding and returning a `PayloadMsg.pld` field. --- tractor/msg/_ops.py | 54 +++++++++++++-------------------------------- 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 97cd3f29..86f80395 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -167,7 +167,7 @@ class PldRx(Struct): ipc_msg: MsgType|None = None, expect_msg: Type[MsgType]|None = None, hide_tb: bool = False, - **dec_msg_kwargs, + **dec_pld_kwargs, ) -> Any|Raw: __tracebackhide__: bool = hide_tb @@ -179,12 +179,12 @@ class PldRx(Struct): # sync-rx msg from underlying IPC feeder (mem-)chan ipc._rx_chan.receive_nowait() ) - return self.dec_msg( + return self.decode_pld( msg, ipc=ipc, expect_msg=expect_msg, hide_tb=hide_tb, - **dec_msg_kwargs, + **dec_pld_kwargs, ) async def recv_pld( @@ -194,7 +194,7 @@ class PldRx(Struct): expect_msg: Type[MsgType]|None = None, hide_tb: bool = True, - **dec_msg_kwargs, + **dec_pld_kwargs, ) -> Any|Raw: ''' @@ -208,17 +208,14 @@ class PldRx(Struct): # async-rx msg from underlying IPC feeder (mem-)chan await ipc._rx_chan.receive() ) - return self.dec_msg( + return self.decode_pld( msg=msg, ipc=ipc, expect_msg=expect_msg, - **dec_msg_kwargs, + **dec_pld_kwargs, ) - # TODO: rename to, - # -[ ] `.decode_pld()`? - # -[ ] `.dec_pld()`? - def dec_msg( + def decode_pld( self, msg: MsgType, ipc: Context|MsgStream, @@ -299,9 +296,6 @@ class PldRx(Struct): if not is_started_send_side else ipc._actor.uid ), - # tb=valerr.__traceback__, - # tb_str=mte._message, - # message=mte._message, ) mte._ipc_msg = err_msg @@ -317,29 +311,6 @@ class PldRx(Struct): # validation error. src_err = valerr - # TODO: should we instead make this explicit and - # use the above masked `is_started_send_decode`, - # expecting the `Context.started()` caller to set - # it? Rn this is kinda, howyousayyy, implicitly - # edge-case-y.. - # TODO: remove this since it's been added to - # `_raise_from_unexpected_msg()`..? - # if ( - # expect_msg is not Started - # and not is_started_send_side - # ): - # # set emulated remote error more-or-less as the - # # runtime would - # ctx: Context = getattr(ipc, 'ctx', ipc) - # ctx._maybe_cancel_and_set_remote_error(mte) - - # XXX some other decoder specific failure? - # except TypeError as src_error: - # from .devx import mk_pdb - # mk_pdb().set_trace() - # raise src_error - # ^-TODO-^ can remove? - # a runtime-internal RPC endpoint response. # always passthrough since (internal) runtime # responses are generally never exposed to consumer @@ -435,6 +406,8 @@ class PldRx(Struct): __tracebackhide__: bool = False raise + dec_msg = decode_pld + async def recv_msg_w_pld( self, ipc: Context|MsgStream, @@ -463,7 +436,7 @@ class PldRx(Struct): # TODO: is there some way we can inject the decoded # payload into an existing output buffer for the original # msg instance? - pld: PayloadT = self.dec_msg( + pld: PayloadT = self.decode_pld( msg, ipc=ipc, expect_msg=expect_msg, @@ -610,7 +583,10 @@ async def drain_to_final_msg( # only when we are sure the remote error is # the source cause of this local task's # cancellation. - ctx.maybe_raise() + ctx.maybe_raise( + # TODO: when use this/ + # from_src_exc=taskc, + ) # CASE 1: we DID request the cancel we simply # continue to bubble up as normal. @@ -783,7 +759,7 @@ def validate_payload_msg( try: roundtripped: Started = codec.decode(msg_bytes) ctx: Context = getattr(ipc, 'ctx', ipc) - pld: PayloadT = ctx.pld_rx.dec_msg( + pld: PayloadT = ctx.pld_rx.decode_pld( msg=roundtripped, ipc=ipc, expect_msg=Started, -- 2.34.1 From 319dda77b4aaa4e9325052a50ba6ef7bd9ceb824 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 17:52:24 -0400 Subject: [PATCH 144/305] Finally, officially support shielded REPL-ing! It's been a long time prepped and now finally implemented! Offer a `shield: bool` argument from our async `._debug` APIs: - `await tractor.pause(shield=True)`, - `await tractor.post_mortem(shield=True)` ^-These-^ can now be used inside cancelled `trio.CancelScope`s, something very handy when introspecting complex (distributed) system tear/shut-downs particularly under remote error or (inter-peer) cancellation conditions B) Thanks to previous prepping in a prior attempt and various patches from the rigorous rework of `.devx._debug` internals around typed msg specs, there ain't much that was needed! Impl deats - obvi passthrough `shield` from the public API endpoints (was already done from a prior attempt). - put ad-hoc internal `with trio.CancelScope(shield=shield):` around all checkpoints inside `._pause()` for both the root-process and subactor case branches. Add a fairly rigorous example, `examples/debugging/shielded_pause.py` with a wrapping `pexpect` test, `test_debugger.test_shield_pause()` and ensure it covers as many cases as i can think of offhand: - multiple `.pause()` entries in a loop despite parent scope cancellation in a subactor RPC task which itself spawns a sub-task. - a `trio.Nursery.parent_task` which raises, is handled and tries to enter and unshielded `.post_mortem()`, which of course internally raises `Cancelled` in a `._pause()` checkpoint, so we catch the `Cancelled` again and then debug the debugger's internal cancellation with specific checks for the particular raising checkpoint-LOC. - do ^- the latter -^ for both subactor and root cases to ensure we can debug `._pause()` itself when it tries to REPL engage from a cancelled task scope Bo --- examples/debugging/shielded_pause.py | 88 ++++++++++++++++++++++++++++ tests/test_debugger.py | 79 +++++++++++++++++++++++-- tractor/devx/_debug.py | 59 +++++++++++-------- 3 files changed, 199 insertions(+), 27 deletions(-) create mode 100644 examples/debugging/shielded_pause.py diff --git a/examples/debugging/shielded_pause.py b/examples/debugging/shielded_pause.py new file mode 100644 index 00000000..3e34d8fc --- /dev/null +++ b/examples/debugging/shielded_pause.py @@ -0,0 +1,88 @@ +import trio +import tractor + + +async def cancellable_pause_loop( + task_status: trio.TaskStatus[trio.CancelScope] = trio.TASK_STATUS_IGNORED +): + with trio.CancelScope() as cs: + task_status.started(cs) + for _ in range(3): + try: + # ON first entry, there is no level triggered + # cancellation yet, so this cp does a parent task + # ctx-switch so that this scope raises for the NEXT + # checkpoint we hit. + await trio.lowlevel.checkpoint() + await tractor.pause() + + cs.cancel() + + # parent should have called `cs.cancel()` by now + await trio.lowlevel.checkpoint() + + except trio.Cancelled: + print('INSIDE SHIELDED PAUSE') + await tractor.pause(shield=True) + else: + # should raise it again, bubbling up to parent + print('BUBBLING trio.Cancelled to parent task-nursery') + await trio.lowlevel.checkpoint() + + +async def pm_on_cancelled(): + async with trio.open_nursery() as tn: + tn.cancel_scope.cancel() + try: + await trio.sleep_forever() + except trio.Cancelled: + # should also raise `Cancelled` since + # we didn't pass `shield=True`. + try: + await tractor.post_mortem(hide_tb=False) + except trio.Cancelled as taskc: + + # should enter just fine, in fact it should + # be debugging the internals of the previous + # sin-shield call above Bo + await tractor.post_mortem( + hide_tb=False, + shield=True, + ) + raise taskc + + else: + raise RuntimeError('Dint cancel as expected!?') + + +async def cancelled_before_pause( +): + ''' + Verify that using a shielded pause works despite surrounding + cancellation called state in the calling task. + + ''' + async with trio.open_nursery() as tn: + cs: trio.CancelScope = await tn.start(cancellable_pause_loop) + await trio.sleep(0.1) + + assert cs.cancelled_caught + + await pm_on_cancelled() + + +async def main(): + async with tractor.open_nursery( + debug_mode=True, + ) as n: + portal: tractor.Portal = await n.run_in_actor( + cancelled_before_pause, + ) + await portal.result() + + # ensure the same works in the root actor! + await pm_on_cancelled() + + +if __name__ == '__main__': + trio.run(main) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index ac7b0feb..88453682 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -1232,6 +1232,81 @@ def test_post_mortem_api( child.expect(pexpect.EOF) +def test_shield_pause( + spawn, +): + ''' + Verify the `tractor.pause()/.post_mortem()` API works inside an + already cancelled `trio.CancelScope` and that you can step to the + next checkpoint wherein the cancelled will get raised. + + ''' + child = spawn('shielded_pause') + + # First entry is via manual `.post_mortem()` + child.expect(PROMPT) + assert_before( + child, + [ + _pause_msg, + "cancellable_pause_loop'", + "('cancelled_before_pause'", # actor name + ] + ) + + # since 3 tries in ex. shield pause loop + for i in range(3): + child.sendline('c') + child.expect(PROMPT) + assert_before( + child, + [ + _pause_msg, + "INSIDE SHIELDED PAUSE", + "('cancelled_before_pause'", # actor name + ] + ) + + # back inside parent task that opened nursery + child.sendline('c') + child.expect(PROMPT) + assert_before( + child, + [ + _crash_msg, + "('cancelled_before_pause'", # actor name + "Failed to engage debugger via `_pause()`", + "trio.Cancelled", + "raise Cancelled._create()", + + # we should be handling a taskc inside + # the first `.port_mortem()` sin-shield! + 'await DebugStatus.req_finished.wait()', + ] + ) + + # same as above but in the root actor's task + child.sendline('c') + child.expect(PROMPT) + assert_before( + child, + [ + _crash_msg, + "('root'", # actor name + "Failed to engage debugger via `_pause()`", + "trio.Cancelled", + "raise Cancelled._create()", + + # handling a taskc inside the first unshielded + # `.port_mortem()`. + # BUT in this case in the root-proc path ;) + 'wait Lock._debug_lock.acquire()', + ] + ) + child.sendline('c') + child.expect(pexpect.EOF) + + # TODO: needs ANSI code stripping tho, see `assert_before()` # above! def test_correct_frames_below_hidden(): ''' @@ -1253,7 +1328,3 @@ def test_cant_pause_from_paused_task(): ''' ... - - -def test_shield_pause(): - ... diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 753c1985..2f0e7e12 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -1600,25 +1600,27 @@ async def _pause( f'REPL: {Lock.repl}\n' # TODO: use `._frame_stack` scanner to find the @api_frame ) - await trio.lowlevel.checkpoint() + with trio.CancelScope(shield=shield): + await trio.lowlevel.checkpoint() return # XXX: since we need to enter pdb synchronously below, # we have to release the lock manually from pdb completion # callbacks. Can't think of a nicer way then this atm. - if Lock._debug_lock.locked(): - log.warning( - 'attempting to shield-acquire active TTY lock owned by\n' - f'{ctx}' - ) + with trio.CancelScope(shield=shield): + if Lock._debug_lock.locked(): + log.warning( + 'attempting to shield-acquire active TTY lock owned by\n' + f'{ctx}' + ) - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - with trio.CancelScope(shield=True): + # must shield here to avoid hitting a ``Cancelled`` and + # a child getting stuck bc we clobbered the tty + # with trio.CancelScope(shield=True): + await Lock._debug_lock.acquire() + else: + # may be cancelled await Lock._debug_lock.acquire() - else: - # may be cancelled - await Lock._debug_lock.acquire() # enter REPL from root, no TTY locking IPC ctx necessary _enter_repl_sync(debug_func) @@ -1659,7 +1661,8 @@ async def _pause( f'{task.name}@{actor.uid} already has TTY lock\n' f'ignoring..' ) - await trio.lowlevel.checkpoint() + with trio.CancelScope(shield=shield): + await trio.lowlevel.checkpoint() return else: @@ -1671,8 +1674,9 @@ async def _pause( f'{task}@{actor.uid} already has TTY lock\n' f'waiting for release..' ) - await DebugStatus.repl_release.wait() - await trio.sleep(0.1) + with trio.CancelScope(shield=shield): + await DebugStatus.repl_release.wait() + await trio.sleep(0.1) elif ( req_task @@ -1683,7 +1687,8 @@ async def _pause( 'Waiting for previous request to complete..\n' ) - await DebugStatus.req_finished.wait() + with trio.CancelScope(shield=shield): + await DebugStatus.req_finished.wait() # this **must** be awaited by the caller and is done using the # root nursery so that the debugger can continue to run without @@ -1721,14 +1726,15 @@ async def _pause( 'Starting request task\n' f'|_{task}\n' ) - req_ctx: Context = await actor._service_n.start( - partial( - request_root_stdio_lock, - actor_uid=actor.uid, - task_uid=(task.name, id(task)), # task uuid (effectively) - shield=shield, + with trio.CancelScope(shield=shield): + req_ctx: Context = await actor._service_n.start( + partial( + request_root_stdio_lock, + actor_uid=actor.uid, + task_uid=(task.name, id(task)), # task uuid (effectively) + shield=shield, + ) ) - ) # XXX sanity, our locker task should be the one which # entered a new IPC ctx with the root actor, NOT the one # that exists around the task calling into `._pause()`. @@ -2147,6 +2153,13 @@ async def post_mortem( **_pause_kwargs, ) -> None: + ''' + `tractor`'s builtin async equivalient of `pdb.post_mortem()` + which can be used inside exception handlers. + + It's also used for the crash handler when `debug_mode == True` ;) + + ''' __tracebackhide__: bool = hide_tb tb: TracebackType = tb or sys.exc_info()[2] -- 2.34.1 From dc5d622e708e4b2a8bb86fc8a14a2c83bab31f2f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 30 May 2024 18:33:25 -0400 Subject: [PATCH 145/305] Woops, set `post_mortem=False` by default again! --- tests/test_pldrx_limiting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index 5cb0d357..ddf2a234 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -315,7 +315,7 @@ def test_basic_payload_spec( f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", ], # only for debug - post_mortem=True, + # post_mortem=True, ), p.open_context( child, -- 2.34.1 From c39427dc159648359ad5195ceb38efd0aa7e6a50 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 31 May 2024 12:03:18 -0400 Subject: [PATCH 146/305] Drop sub-decoder proto-cruft from `.msg._codec` It ended up getting necessarily implemented as the `PldRx` though at a different layer and won't be needed as part of `MsgCodec` most likely, though this original idea did provide the source of inspiration for how things work now! Also Move the commented TODO proto for a codec hook factory from `.types` to `._codec` where it prolly better fits and update some msg related todo/questions. --- tractor/msg/_codec.py | 196 ++++++++---------------------------------- tractor/msg/types.py | 62 +++++-------- 2 files changed, 58 insertions(+), 200 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index e1c59e94..c1301bd2 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -52,10 +52,6 @@ from msgspec import ( msgpack, Raw, ) -# from trio.lowlevel import ( -# RunVar, -# RunVarToken, -# ) # TODO: see notes below from @mikenerone.. # from tricycle import TreeVar @@ -368,160 +364,16 @@ class MsgCodec(Struct): # https://jcristharif.com/msgspec/usage.html#typed-decoding return self._dec.decode(msg) - # TODO: a sub-decoder system as well? - # payload_msg_specs: Union[Type[Struct]] = Any - # see related comments in `.msg.types` - # _payload_decs: ( - # dict[ - # str, - # msgpack.Decoder, - # ] - # |None - # ) = None - # OR - # ) = { - # # pre-seed decoders for std-py-type-set for use when - # # `MsgType.pld == None|Any`. - # None: msgpack.Decoder(Any), - # Any: msgpack.Decoder(Any), - # } - # - # -[ ] do we still want to try and support the sub-decoder with - # `.Raw` technique in the case that the `Generic` approach gives - # future grief? - # - # -[ ] - # -> https://jcristharif.com/msgspec/api.html#raw - # - #def mk_pld_subdec( - # self, - # payload_types: Union[Type[Struct]], - #) -> msgpack.Decoder: - # # TODO: sub-decoder suppor for `.pld: Raw`? - # # => see similar notes inside `.msg.types`.. - # # - # # not sure we'll end up needing this though it might have - # # unforeseen advantages in terms of enabling encrypted - # # appliciation layer (only) payloads? - # # - # # register sub-payload decoders to load `.pld: Raw` - # # decoded `Msg`-packets using a dynamic lookup (table) - # # instead of a pre-defined msg-spec via `Generic` - # # parameterization. - # # - # ( - # tags, - # payload_dec, - # ) = mk_tagged_union_dec( - # tagged_structs=list(payload_types.__args__), - # ) - # # register sub-decoders by tag - # subdecs: dict[str, msgpack.Decoder]|None = self._payload_decs - # for name in tags: - # subdecs.setdefault( - # name, - # payload_dec, - # ) - - # return payload_dec - - # sub-decoders for retreiving embedded - # payload data and decoding to a sender - # side defined (struct) type. - # def dec_payload( - # codec: MsgCodec, - # msg: Msg, - - # ) -> Any|Struct: - - # msg: PayloadMsg = codec.dec.decode(msg) - # payload_tag: str = msg.header.payload_tag - # payload_dec: msgpack.Decoder = codec._payload_decs[payload_tag] - # return payload_dec.decode(msg.pld) - - # def enc_payload( - # codec: MsgCodec, - # payload: Any, - # cid: str, - - # ) -> bytes: - - # # tag_field: str|None = None - - # plbytes = codec.enc.encode(payload) - # if b'msg_type' in plbytes: - # assert isinstance(payload, Struct) - - # # tag_field: str = type(payload).__name__ - # payload = msgspec.Raw(plbytes) - - # msg = Msg( - # cid=cid, - # pld=payload, - # # Header( - # # payload_tag=tag_field, - # # # dialog_id, - # # ), - # ) - # return codec.enc.encode(msg) - - - -# TODO: sub-decoded `Raw` fields? -# -[ ] see `MsgCodec._payload_decs` notes +# [x] TODO: a sub-decoder system as well? => No! # -# XXX if we wanted something more complex then field name str-keys -# we might need a header field type to describe the lookup sys? -# class Header(Struct, tag=True): -# ''' -# A msg header which defines payload properties - -# ''' -# payload_tag: str|None = None - - - #def mk_tagged_union_dec( - # tagged_structs: list[Struct], - - #) -> tuple[ - # list[str], - # msgpack.Decoder, - #]: - # ''' - # Create a `msgpack.Decoder` for an input `list[msgspec.Struct]` - # and return a `list[str]` of each struct's `tag_field: str` value - # which can be used to "map to" the initialized dec. - - # ''' - # # See "tagged unions" docs: - # # https://jcristharif.com/msgspec/structs.html#tagged-unions - - # # "The quickest way to enable tagged unions is to set tag=True when - # # defining every struct type in the union. In this case tag_field - # # defaults to "type", and tag defaults to the struct class name - # # (e.g. "Get")." - # first: Struct = tagged_structs[0] - # types_union: Union[Type[Struct]] = Union[ - # first - # ]|Any - # tags: list[str] = [first.__name__] - - # for struct in tagged_structs[1:]: - # types_union |= struct - # tags.append( - # getattr( - # struct, - # struct.__struct_config__.tag_field, - # struct.__name__, - # ) - # ) - - # dec = msgpack.Decoder(types_union) - # return ( - # tags, - # dec, - # ) +# -[x] do we still want to try and support the sub-decoder with +# `.Raw` technique in the case that the `Generic` approach gives +# future grief? +# => NO, since we went with the `PldRx` approach instead B) +# +# IF however you want to see the code that was staged for this +# from wayyy back, see the pure removal commit. def mk_codec( @@ -644,10 +496,6 @@ _def_tractor_codec: MsgCodec = mk_codec( # 3. We similarly set the pending values for the child nurseries # of the *current* task. # - -# TODO: STOP USING THIS, since it's basically a global and won't -# allow sub-IPC-ctxs to limit the msg-spec however desired.. -# _ctxvar_MsgCodec: MsgCodec = RunVar( _ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( 'msgspec_codec', default=_def_tractor_codec, @@ -782,3 +630,31 @@ def limit_msg_spec( # # import pdbp; pdbp.set_trace() # assert ext_codec.pld_spec == extended_spec # yield ext_codec + + +# TODO: make something similar to this inside `._codec` such that +# user can just pass a type table of some sort? +# -[ ] we would need to decode all msgs to `pretty_struct.Struct` +# and then call `.to_dict()` on them? +# -[x] we're going to need to re-impl all the stuff changed in the +# runtime port such that it can handle dicts or `Msg`s? +# +# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: +# ''' +# Deliver a `enc_hook()`/`dec_hook()` pair which does +# manual convertion from our above native `Msg` set +# to `dict` equivalent (wire msgs) in order to keep legacy compat +# with the original runtime implementation. +# +# Note: this is is/was primarly used while moving the core +# runtime over to using native `Msg`-struct types wherein we +# start with the send side emitting without loading +# a typed-decoder and then later flipping the switch over to +# load to the native struct types once all runtime usage has +# been adjusted appropriately. +# +# ''' +# return ( +# # enc_to_dict, +# dec_from_dict, +# ) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index ad6d6fb8..0fc0ee96 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -26,7 +26,6 @@ from __future__ import annotations import types from typing import ( Any, - # Callable, Generic, Literal, Type, @@ -161,7 +160,6 @@ class SpawnSpec( bind_addrs: list[tuple[str, int]] - # TODO: caps based RPC support in the payload? # # -[ ] integration with our ``enable_modules: list[str]`` caps sys. @@ -314,8 +312,9 @@ class Started( pld: PayloadT|Raw -# TODO: instead of using our existing `Start` -# for this (as we did with the original `{'cmd': ..}` style) +# TODO: cancel request dedicated msg? +# -[ ] instead of using our existing `Start`? +# # class Cancel: # cid: str @@ -477,12 +476,16 @@ def from_dict_msg( ) return msgT(**dict_msg) -# TODO: should be make a msg version of `ContextCancelled?` -# and/or with a scope field or a full `ActorCancelled`? +# TODO: should be make a set of cancel msgs? +# -[ ] a version of `ContextCancelled`? +# |_ and/or with a scope field? +# -[ ] or, a full `ActorCancelled`? +# # class Cancelled(MsgType): # cid: str - -# TODO what about overruns? +# +# -[ ] what about overruns? +# # class Overrun(MsgType): # cid: str @@ -564,10 +567,17 @@ def mk_msg_spec( Create a payload-(data-)type-parameterized IPC message specification. Allows generating IPC msg types from the above builtin set - with a payload (field) restricted data-type via the `Msg.pld: - PayloadT` type var. This allows runtime-task contexts to use - the python type system to limit/filter payload values as - determined by the input `payload_type_union: Union[Type]`. + with a payload (field) restricted data-type, the `Msg.pld: PayloadT`. + + This allows runtime-task contexts to use the python type system + to limit/filter payload values as determined by the input + `payload_type_union: Union[Type]`. + + Notes: originally multiple approaches for constructing the + type-union passed to `msgspec` were attempted as selected via the + `spec_build_method`, but it turns out only the defaul method + 'indexed_generics' seems to work reliably in all use cases. As + such, the others will likely be removed in the near future. ''' submsg_types: list[MsgType] = Msg.__subclasses__() @@ -707,31 +717,3 @@ def mk_msg_spec( + ipc_msg_types, ) - - -# TODO: make something similar to this inside `._codec` such that -# user can just pass a type table of some sort? -# -[ ] we would need to decode all msgs to `pretty_struct.Struct` -# and then call `.to_dict()` on them? -# -[ ] we're going to need to re-impl all the stuff changed in the -# runtime port such that it can handle dicts or `Msg`s? -# -# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: -# ''' -# Deliver a `enc_hook()`/`dec_hook()` pair which does -# manual convertion from our above native `Msg` set -# to `dict` equivalent (wire msgs) in order to keep legacy compat -# with the original runtime implementation. -# -# Note: this is is/was primarly used while moving the core -# runtime over to using native `Msg`-struct types wherein we -# start with the send side emitting without loading -# a typed-decoder and then later flipping the switch over to -# load to the native struct types once all runtime usage has -# been adjusted appropriately. -# -# ''' -# return ( -# # enc_to_dict, -# dec_from_dict, -# ) -- 2.34.1 From 4589ff307c68d9079312a6ee339f858159881e16 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 31 May 2024 14:40:55 -0400 Subject: [PATCH 147/305] Use `Context` repr APIs for RPC outcome logs Delegate to the new `.repr_state: str` and adjust log level based on error vs. cancel vs. result. --- tractor/_rpc.py | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 57db52f3..8a9b3487 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -738,37 +738,24 @@ async def _invoke( cid, )) + logmeth: Callable = log.runtime merr: Exception|None = ctx.maybe_error - ( - res_type_str, - res_str, - ) = ( - ('error', f'{type(merr)}',) if merr - else ( - 'result', - f'`{repr(ctx.outcome)}`', - ) - ) + descr_str: str = 'with final result `{repr(ctx.outcome)}`' message: str = ( - f'IPC context terminated with a final {res_type_str}\n\n' - f'{ctx}' + f'IPC context terminated {descr_str}\n\n' ) if merr: - from tractor import RemoteActorError - if not isinstance(merr, RemoteActorError): - fmt_merr: str = ( - f'\n{merr!r}\n' - # f'{merr.args[0]!r}\n' - ) - else: - fmt_merr = f'\n{merr!r}' - log.error( - message - + - fmt_merr + descr_str: str = ( + f'with ctx having {ctx.repr_state!r}\n' + f'{ctx.repr_outcome()}\n' ) - else: - log.runtime(message) + if isinstance(merr, ContextCancelled): + logmeth: Callable = log.runtime + else: + logmeth: Callable = log.error + message += f'\n{merr!r}\n' + + logmeth(message) async def try_ship_error_to_remote( -- 2.34.1 From 0687dac97a4ce8ee9f95ec0d4878356b58a03507 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 31 May 2024 17:32:11 -0400 Subject: [PATCH 148/305] Move `Context.open_stream()` impl to `._streaming` Exactly like how it's organized for `Portal.open_context()`, put the main streaming API `@acm` with the `MsgStream` code and bind the method to the new module func. Other, - rename `Context.result()` -> `.wait_for_result()` to better match the blocking semantics and rebind `.result()` as deprecated. - add doc-str for `Context.maybe_raise()`. --- tractor/_context.py | 247 +++++++----------------------------------- tractor/_streaming.py | 209 +++++++++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+), 208 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 453e9065..dd4ad846 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -86,7 +86,10 @@ from .msg import ( from ._ipc import ( Channel, ) -from ._streaming import MsgStream +from ._streaming import ( + MsgStream, + open_stream_from_ctx, +) from ._state import ( current_actor, debug_mode, @@ -978,198 +981,6 @@ class Context: assert self._scope self._scope.cancel() - # TODO? should we move this to `._streaming` much like we - # moved `Portal.open_context()`'s def to this mod? - @acm - async def open_stream( - self, - allow_overruns: bool|None = False, - msg_buffer_size: int|None = None, - - ) -> AsyncGenerator[MsgStream, None]: - ''' - Open a ``MsgStream``, a bi-directional stream connected to the - cross-actor (far end) task for this ``Context``. - - This context manager must be entered on both the caller and - callee for the stream to logically be considered "connected". - - A ``MsgStream`` is currently "one-shot" use, meaning if you - close it you can not "re-open" it for streaming and instead you - must re-establish a new surrounding ``Context`` using - ``Portal.open_context()``. In the future this may change but - currently there seems to be no obvious reason to support - "re-opening": - - pausing a stream can be done with a message. - - task errors will normally require a restart of the entire - scope of the inter-actor task context due to the nature of - ``trio``'s cancellation system. - - ''' - actor: Actor = self._actor - - # If the surrounding context has been cancelled by some - # task with a handle to THIS, we error here immediately - # since it likely means the surrounding lexical-scope has - # errored, been `trio.Cancelled` or at the least - # `Context.cancel()` was called by some task. - if self._cancel_called: - - # XXX NOTE: ALWAYS RAISE any remote error here even if - # it's an expected `ContextCancelled` due to a local - # task having called `.cancel()`! - # - # WHY: we expect the error to always bubble up to the - # surrounding `Portal.open_context()` call and be - # absorbed there (silently) and we DO NOT want to - # actually try to stream - a cancel msg was already - # sent to the other side! - self.maybe_raise( - raise_ctxc_from_self_call=True, - ) - # NOTE: this is diff then calling - # `._maybe_raise_remote_err()` specifically - # because we want to raise a ctxc on any task entering this `.open_stream()` - # AFTER cancellation was already been requested, - # we DO NOT want to absorb any ctxc ACK silently! - # if self._remote_error: - # raise self._remote_error - - # XXX NOTE: if no `ContextCancelled` has been responded - # back from the other side (yet), we raise a different - # runtime error indicating that this task's usage of - # `Context.cancel()` and then `.open_stream()` is WRONG! - task: str = trio.lowlevel.current_task().name - raise RuntimeError( - 'Stream opened after `Context.cancel()` called..?\n' - f'task: {actor.uid[0]}:{task}\n' - f'{self}' - ) - - if ( - not self._portal - and not self._started_called - ): - raise RuntimeError( - 'Context.started()` must be called before opening a stream' - ) - - # NOTE: in one way streaming this only happens on the - # parent-ctx-task side (on the side that calls - # `Actor.start_remote_task()`) so if you try to send - # a stop from the caller to the callee in the - # single-direction-stream case you'll get a lookup error - # currently. - ctx: Context = actor.get_context( - chan=self.chan, - cid=self.cid, - nsf=self._nsf, - # side=self.side, - - msg_buffer_size=msg_buffer_size, - allow_overruns=allow_overruns, - ) - ctx._allow_overruns: bool = allow_overruns - assert ctx is self - - # XXX: If the underlying channel feeder receive mem chan has - # been closed then likely client code has already exited - # a ``.open_stream()`` block prior or there was some other - # unanticipated error or cancellation from ``trio``. - - if ctx._rx_chan._closed: - raise trio.ClosedResourceError( - 'The underlying channel for this stream was already closed!\n' - ) - - # NOTE: implicitly this will call `MsgStream.aclose()` on - # `.__aexit__()` due to stream's parent `Channel` type! - # - # XXX NOTE XXX: ensures the stream is "one-shot use", - # which specifically means that on exit, - # - signal ``trio.EndOfChannel``/``StopAsyncIteration`` to - # the far end indicating that the caller exited - # the streaming context purposefully by letting - # the exit block exec. - # - this is diff from the cancel/error case where - # a cancel request from this side or an error - # should be sent to the far end indicating the - # stream WAS NOT just closed normally/gracefully. - async with MsgStream( - ctx=self, - rx_chan=ctx._rx_chan, - ) as stream: - - # NOTE: we track all existing streams per portal for - # the purposes of attempting graceful closes on runtime - # cancel requests. - if self._portal: - self._portal._streams.add(stream) - - try: - self._stream_opened: bool = True - self._stream = stream - - # XXX: do we need this? - # ensure we aren't cancelled before yielding the stream - # await trio.lowlevel.checkpoint() - yield stream - - # XXX: (MEGA IMPORTANT) if this is a root opened process we - # wait for any immediate child in debug before popping the - # context from the runtime msg loop otherwise inside - # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in - # the case where that msg is global debugger unlock (via - # a "stop" msg for a stream), this can result in a deadlock - # where the root is waiting on the lock to clear but the - # child has already cleared it and clobbered IPC. - # - # await maybe_wait_for_debugger() - - # XXX TODO: pretty sure this isn't needed (see - # note above this block) AND will result in - # a double `.send_stop()` call. The only reason to - # put it here would be to due with "order" in - # terms of raising any remote error (as per - # directly below) or bc the stream's - # `.__aexit__()` block might not get run - # (doubtful)? Either way if we did put this back - # in we also need a state var to avoid the double - # stop-msg send.. - # - # await stream.aclose() - - # NOTE: absorb and do not raise any - # EoC received from the other side such that - # it is not raised inside the surrounding - # context block's scope! - except trio.EndOfChannel as eoc: - if ( - eoc - and - stream.closed - ): - # sanity, can remove? - assert eoc is stream._eoc - - log.warning( - 'Stream was terminated by EoC\n\n' - # NOTE: won't show the error but - # does show txt followed by IPC msg. - f'{str(eoc)}\n' - ) - - finally: - if self._portal: - try: - self._portal._streams.remove(stream) - except KeyError: - log.warning( - f'Stream was already destroyed?\n' - f'actor: {self.chan.uid}\n' - f'ctx id: {self.cid}' - ) - # TODO: replace all the `._maybe_raise_remote_err()` usage # with instances of this!! def maybe_raise( @@ -1178,6 +989,14 @@ class Context: **kwargs, ) -> Exception|None: + ''' + Check for for a remote error delivered by the runtime from + our peer (task); if set immediately raise. + + This is a convenience wrapper for + `._maybe_raise_remote_err(self._remote_error)`. + + ''' __tracebackhide__: bool = hide_tb if re := self._remote_error: return self._maybe_raise_remote_err( @@ -1290,8 +1109,7 @@ class Context: raise remote_error - # TODO: change to `.wait_for_result()`? - async def result( + async def wait_for_result( self, hide_tb: bool = True, @@ -1380,18 +1198,27 @@ class Context: (not self._cancel_called) ) ) + # TODO: eventually make `.outcome: Outcome` and thus return + # `self.outcome.unwrap()` here! return self.outcome # TODO: switch this with above! # -[ ] should be named `.wait_for_outcome()` and instead do # a `.outcome.Outcome.unwrap()` ? # - # @property - # def result(self) -> Any|None: - # if self._final_result_is_set(): - # return self._result - - # raise RuntimeError('No result is available!') + async def result( + self, + *args, + **kwargs, + ) -> Any|Exception: + log.warning( + '`Context.result()` is DEPRECATED!\n' + 'Use `Context.[no]wait_for_result()` instead!\n' + ) + return await self.wait_for_result( + *args, + **kwargs, + ) @property def maybe_error(self) -> BaseException|None: @@ -1447,6 +1274,9 @@ class Context: return self._result is not Unresolved # def get_result_nowait(self) -> Any|None: + # def get_outcome_nowait(self) -> Any|None: + # def recv_result_nowait(self) -> Any|None: + # def receive_outcome_nowait(self) -> Any|None: # TODO: use `outcome.Outcome` here instead? @property def outcome(self) -> ( @@ -1476,7 +1306,6 @@ class Context: def has_outcome(self) -> bool: return bool(self.maybe_error) or self._final_result_is_set() - # @property def repr_outcome( self, show_error_fields: bool = False, @@ -1498,7 +1327,8 @@ class Context: # just deliver the type name. if ( (reprol := getattr(merr, 'reprol', False)) - and show_error_fields + and + show_error_fields ): return reprol() @@ -1515,10 +1345,6 @@ class Context: repr(merr) ) - # just the type name - # else: # but wen? - # return type(merr).__name__ - # for all other errors show their regular output return ( str(merr) @@ -1572,7 +1398,7 @@ class Context: _, # any non-unresolved value None, ) if self._final_result_is_set(): - status = 'returned' + status = 'result-returned' # normal operation but still in a pre-`Return`-result # dialog phase @@ -1940,6 +1766,11 @@ class Context: # ow, indicate unable to deliver by default return False + # NOTE: similar to `Portal.open_context()`, this impl is found in + # the `._streaming`` mod to make reading/groking the details + # simpler code-org-wise. + open_stream = open_stream_from_ctx + # TODO: exception tb masking by using a manual # `.__aexit__()`/.__aenter__()` pair on a type? diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 016577d3..314a93b8 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -26,6 +26,7 @@ import inspect from pprint import pformat from typing import ( Any, + AsyncGenerator, Callable, AsyncIterator, TYPE_CHECKING, @@ -51,6 +52,7 @@ from tractor.msg import ( ) if TYPE_CHECKING: + from ._runtime import Actor from ._context import Context from ._ipc import Channel @@ -550,6 +552,213 @@ class MsgStream(trio.abc.Channel): # ... +@acm +async def open_stream_from_ctx( + ctx: Context, + allow_overruns: bool|None = False, + msg_buffer_size: int|None = None, + +) -> AsyncGenerator[MsgStream, None]: + ''' + Open a `MsgStream`, a bi-directional msg transport dialog + connected to the cross-actor peer task for an IPC `Context`. + + This context manager must be entered in both the "parent" (task + which entered `Portal.open_context()`) and "child" (RPC task + which is decorated by `@context`) tasks for the stream to + logically be considered "open"; if one side begins sending to an + un-opened peer, depending on policy config, msgs will either be + queued until the other side opens and/or a `StreamOverrun` will + (eventually) be raised. + + ------ - ------ + + Runtime semantics design: + + A `MsgStream` session adheres to "one-shot use" semantics, + meaning if you close the scope it **can not** be "re-opened". + + Instead you must re-establish a new surrounding RPC `Context` + (RTC: remote task context?) using `Portal.open_context()`. + + In the future this *design choice* may need to be changed but + currently there seems to be no obvious reason to support such + semantics.. + + - "pausing a stream" can be supported with a message implemented + by the `tractor` application dev. + + - any remote error will normally require a restart of the entire + `trio.Task`'s scope due to the nature of `trio`'s cancellation + (`CancelScope`) system and semantics (level triggered). + + ''' + actor: Actor = ctx._actor + + # If the surrounding context has been cancelled by some + # task with a handle to THIS, we error here immediately + # since it likely means the surrounding lexical-scope has + # errored, been `trio.Cancelled` or at the least + # `Context.cancel()` was called by some task. + if ctx._cancel_called: + + # XXX NOTE: ALWAYS RAISE any remote error here even if + # it's an expected `ContextCancelled` due to a local + # task having called `.cancel()`! + # + # WHY: we expect the error to always bubble up to the + # surrounding `Portal.open_context()` call and be + # absorbed there (silently) and we DO NOT want to + # actually try to stream - a cancel msg was already + # sent to the other side! + ctx.maybe_raise( + raise_ctxc_from_self_call=True, + ) + # NOTE: this is diff then calling + # `._maybe_raise_remote_err()` specifically + # because we want to raise a ctxc on any task entering this `.open_stream()` + # AFTER cancellation was already been requested, + # we DO NOT want to absorb any ctxc ACK silently! + # if ctx._remote_error: + # raise ctx._remote_error + + # XXX NOTE: if no `ContextCancelled` has been responded + # back from the other side (yet), we raise a different + # runtime error indicating that this task's usage of + # `Context.cancel()` and then `.open_stream()` is WRONG! + task: str = trio.lowlevel.current_task().name + raise RuntimeError( + 'Stream opened after `Context.cancel()` called..?\n' + f'task: {actor.uid[0]}:{task}\n' + f'{ctx}' + ) + + if ( + not ctx._portal + and not ctx._started_called + ): + raise RuntimeError( + 'Context.started()` must be called before opening a stream' + ) + + # NOTE: in one way streaming this only happens on the + # parent-ctx-task side (on the side that calls + # `Actor.start_remote_task()`) so if you try to send + # a stop from the caller to the callee in the + # single-direction-stream case you'll get a lookup error + # currently. + ctx: Context = actor.get_context( + chan=ctx.chan, + cid=ctx.cid, + nsf=ctx._nsf, + # side=ctx.side, + + msg_buffer_size=msg_buffer_size, + allow_overruns=allow_overruns, + ) + ctx._allow_overruns: bool = allow_overruns + assert ctx is ctx + + # XXX: If the underlying channel feeder receive mem chan has + # been closed then likely client code has already exited + # a ``.open_stream()`` block prior or there was some other + # unanticipated error or cancellation from ``trio``. + + if ctx._rx_chan._closed: + raise trio.ClosedResourceError( + 'The underlying channel for this stream was already closed!\n' + ) + + # NOTE: implicitly this will call `MsgStream.aclose()` on + # `.__aexit__()` due to stream's parent `Channel` type! + # + # XXX NOTE XXX: ensures the stream is "one-shot use", + # which specifically means that on exit, + # - signal ``trio.EndOfChannel``/``StopAsyncIteration`` to + # the far end indicating that the caller exited + # the streaming context purposefully by letting + # the exit block exec. + # - this is diff from the cancel/error case where + # a cancel request from this side or an error + # should be sent to the far end indicating the + # stream WAS NOT just closed normally/gracefully. + async with MsgStream( + ctx=ctx, + rx_chan=ctx._rx_chan, + ) as stream: + + # NOTE: we track all existing streams per portal for + # the purposes of attempting graceful closes on runtime + # cancel requests. + if ctx._portal: + ctx._portal._streams.add(stream) + + try: + ctx._stream_opened: bool = True + ctx._stream = stream + + # XXX: do we need this? + # ensure we aren't cancelled before yielding the stream + # await trio.lowlevel.checkpoint() + yield stream + + # XXX: (MEGA IMPORTANT) if this is a root opened process we + # wait for any immediate child in debug before popping the + # context from the runtime msg loop otherwise inside + # ``Actor._deliver_ctx_payload()`` the msg will be discarded and in + # the case where that msg is global debugger unlock (via + # a "stop" msg for a stream), this can result in a deadlock + # where the root is waiting on the lock to clear but the + # child has already cleared it and clobbered IPC. + # + # await maybe_wait_for_debugger() + + # XXX TODO: pretty sure this isn't needed (see + # note above this block) AND will result in + # a double `.send_stop()` call. The only reason to + # put it here would be to due with "order" in + # terms of raising any remote error (as per + # directly below) or bc the stream's + # `.__aexit__()` block might not get run + # (doubtful)? Either way if we did put this back + # in we also need a state var to avoid the double + # stop-msg send.. + # + # await stream.aclose() + + # NOTE: absorb and do not raise any + # EoC received from the other side such that + # it is not raised inside the surrounding + # context block's scope! + except trio.EndOfChannel as eoc: + if ( + eoc + and + stream.closed + ): + # sanity, can remove? + assert eoc is stream._eoc + + log.warning( + 'Stream was terminated by EoC\n\n' + # NOTE: won't show the error but + # does show txt followed by IPC msg. + f'{str(eoc)}\n' + ) + + finally: + if ctx._portal: + try: + ctx._portal._streams.remove(stream) + except KeyError: + log.warning( + f'Stream was already destroyed?\n' + f'actor: {ctx.chan.uid}\n' + f'ctx id: {ctx.cid}' + ) + + + def stream(func: Callable) -> Callable: ''' Mark an async function as a streaming routine with ``@stream``. -- 2.34.1 From 0c8bb88cc54b6af5b73cead2a08e19c1d6da46c2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 6 Jun 2024 16:14:58 -0400 Subject: [PATCH 149/305] Catch `.pause_from_sync()` in root bg thread bugs! Originally discovered as while using `tractor.pause_from_sync()` from the `i3ipc` client running in a bg-thread that uses `asyncio` inside `modden`. Turns out we definitely aren't correctly handling `.pause_from_sync()` from the root actor when called from a `trio.to_thread.run_sync()` bg thread: - root-actor bg threads which can't `Lock._debug_lock.acquire()` since they aren't in `trio.Task`s. - even if scheduled via `.to_thread.run_sync(_debug._pause)` the acquirer won't be the task/thread which calls `Lock.release()` from `PdbREPL` hooks; this results in a RTE raised by `trio`.. - multiple threads will step on each other's stdio since cpython's GIL seems to ctx switch threads on every input from the user to the REPL loop.. Reproduce via reworking our example and test so that they catch and fail for all edge cases: - rework the `/examples/debugging/sync_bp.py` example to demonstrate the above issues, namely the stdio clobbering in the REPL when multiple threads and/or a subactor try to debug simultaneously. |_ run one thread using a task nursery to ensure it runs conc with the nursery's parent task. |_ ensure the bg threads run conc a subactor usage of `.pause_from_sync()`. |_ gravely detail all the special cases inside a TODO comment. |_ add some control flags to `sync_pause()` helper and don't use `breakpoint()` by default. - extend and adjust `test_debugger.test_pause_from_sync` to match (and thus currently fail) by ensuring exclusive `PdbREPL` attachment when the 2 bg root-actor threads are concurrently interacting alongside the subactor: |_ should only see one of the `_pause_msg` logs at a time for either one of the threads or the subactor. |_ ensure each attaches (in no particular order) before expecting the script to exit. Impl adjustments to `.devx._debug`: - drop `Lock.repl`, no longer used. - add `Lock._owned_by_root: bool` for the `.ctx_in_debug == None` root-actor-task active case. - always `log.exception()` for any `._debug_lock.release()` ownership RTE emitted by `trio`, like we used to.. - add special `Lock.release()` log message for the stale lock but `._owned_by_root == True` case; oh yeah and actually `log.devx(message)`.. - rename `Lock.acquire()` -> `.acquire_for_ctx()` since it's only ever used from subactor IPC usage; well that and for local root-task usage we should prolly add a `.acquire_from_root_task()`? - buncha `._pause()` impl improvements: |_ type `._pause()`'s `debug_func` as a `partial` as well. |_ offer `called_from_sync: bool` and `called_from_bg_thread: bool` for the special case handling when called from `.pause_from_sync()` |_ only set `DebugStatus.repl/repl_task` when `debug_func != None` (OW ensure the `.repl_task` is not the current one). |_ handle error logging even when `debug_func is None`.. |_ lotsa detailed commentary around root-actor-bg-thread special cases. - when `._set_trace(hide_tb=False)` do `pdbp.set_trace(frame=currentframe())` so the `._debug` internal frames are always included. - by default always hide tracebacks for `.pause[_from_sync]()` internals. - improve `.pause_from_sync()` to avoid root-bg-thread crashes: |_ pass new `called_from_xxx_` flags and ensure `DebugStatus.repl_task` is actually set to the `threading.current_thread()` when needed. |_ manually call `Lock._debug_lock.acquire_nowait()` for the non-bg thread case. |_ TODO: still need to implement the bg-thread case using a bg `trio.Task`-in-thread with an `trio.Event` set by thread REPL exit. --- examples/debugging/sync_bp.py | 125 +++++++++++++++--- tests/test_debugger.py | 71 +++++++--- tractor/devx/_debug.py | 241 +++++++++++++++++++++++----------- 3 files changed, 323 insertions(+), 114 deletions(-) diff --git a/examples/debugging/sync_bp.py b/examples/debugging/sync_bp.py index efa4e405..e265df44 100644 --- a/examples/debugging/sync_bp.py +++ b/examples/debugging/sync_bp.py @@ -1,15 +1,32 @@ +from functools import partial +import time +from threading import current_thread + import trio import tractor def sync_pause( - use_builtin: bool = True, + use_builtin: bool = False, error: bool = False, + hide_tb: bool = True, + pre_sleep: float|None = None, ): + if pre_sleep: + time.sleep(pre_sleep) + if use_builtin: - breakpoint(hide_tb=False) + print( + f'Entering `breakpoint()` from\n' + f'{current_thread()}\n' + ) + breakpoint(hide_tb=hide_tb) else: + print( + f'Entering `tractor.pause_from_sync()` from\n' + f'{current_thread()}@{tractor.current_actor().uid}\n' + ) tractor.pause_from_sync() if error: @@ -25,44 +42,114 @@ async def start_n_sync_pause( # sync to parent-side task await ctx.started() - print(f'entering SYNC PAUSE in {actor.uid}') + print(f'Entering `sync_pause()` in subactor: {actor.uid}\n') sync_pause() - print(f'back from SYNC PAUSE in {actor.uid}') + print(f'Exited `sync_pause()` in subactor: {actor.uid}\n') async def main() -> None: - async with tractor.open_nursery( - # NOTE: required for pausing from sync funcs - maybe_enable_greenback=True, - debug_mode=True, - ) as an: + async with ( + tractor.open_nursery( + # NOTE: required for pausing from sync funcs + maybe_enable_greenback=True, + debug_mode=True, + # loglevel='cancel', + ) as an, + trio.open_nursery() as tn, + ): + # just from root task + sync_pause() p: tractor.Portal = await an.start_actor( 'subactor', enable_modules=[__name__], # infect_asyncio=True, debug_mode=True, - loglevel='cancel', ) # TODO: 3 sub-actor usage cases: + # -[x] via a `.open_context()` # -[ ] via a `.run_in_actor()` call # -[ ] via a `.run()` - # -[ ] via a `.open_context()` - # + # -[ ] via a `.to_thread.run_sync()` in subactor async with p.open_context( start_n_sync_pause, ) as (ctx, first): assert first is None - await tractor.pause() - sync_pause() + # TODO: handle bg-thread-in-root-actor special cases! + # + # there are a couple very subtle situations possible here + # and they are likely to become more important as cpython + # moves to support no-GIL. + # + # Cases: + # 1. root-actor bg-threads that call `.pause_from_sync()` + # whilst an in-tree subactor also is using ` .pause()`. + # |_ since the root-actor bg thread can not + # `Lock._debug_lock.acquire_nowait()` without running + # a `trio.Task`, AND because the + # `PdbREPL.set_continue()` is called from that + # bg-thread, we can not `._debug_lock.release()` + # either! + # |_ this results in no actor-tree `Lock` being used + # on behalf of the bg-thread and thus the subactor's + # task and the thread trying to to use stdio + # simultaneously which results in the classic TTY + # clobbering! + # + # 2. mutiple sync-bg-threads that call + # `.pause_from_sync()` where one is scheduled via + # `Nursery.start_soon(to_thread.run_sync)` in a bg + # task. + # + # Due to the GIL, the threads never truly try to step + # through the REPL simultaneously, BUT their `logging` + # and traceback outputs are interleaved since the GIL + # (seemingly) on every REPL-input from the user + # switches threads.. + # + # Soo, the context switching semantics of the GIL + # result in a very confusing and messy interaction UX + # since eval and (tb) print output is NOT synced to + # each REPL-cycle (like we normally make it via + # a `.set_continue()` callback triggering the + # `Lock.release()`). Ideally we can solve this + # usability issue NOW because this will of course be + # that much more important when eventually there is no + # GIL! - # TODO: make this work!! - await trio.to_thread.run_sync( - sync_pause, - abandon_on_cancel=False, - ) + # XXX should cause double REPL entry and thus TTY + # clobbering due to case 1. above! + tn.start_soon( + partial( + trio.to_thread.run_sync, + partial( + sync_pause, + use_builtin=False, + # pre_sleep=0.5, + ), + abandon_on_cancel=True, + thread_name='start_soon_root_bg_thread', + ) + ) + + await tractor.pause() + + # XXX should cause double REPL entry and thus TTY + # clobbering due to case 2. above! + await trio.to_thread.run_sync( + partial( + sync_pause, + # NOTE this already works fine since in the new + # thread the `breakpoint()` built-in is never + # overloaded, thus NO locking is used, HOWEVER + # the case 2. from above still exists! + use_builtin=True, + ), + abandon_on_cancel=False, + thread_name='inline_root_bg_thread', + ) await ctx.cancel() diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 88453682..c95c4f95 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -1071,6 +1071,8 @@ def test_pause_from_sync( ''' child = spawn('sync_bp') + + # first `sync_pause()` after nurseries open child.expect(PROMPT) assert_before( child, @@ -1085,43 +1087,70 @@ def test_pause_from_sync( do_ctlc(child) child.sendline('c') + + + # first `await tractor.pause()` inside `p.open_context()` body child.expect(PROMPT) - # XXX shouldn't see gb loaded again + # XXX shouldn't see gb loaded message with PDB loglevel! before = str(child.before.decode()) assert not in_prompt_msg( before, ['`greenback` portal opened!'], ) + # should be same root task assert_before( child, - [_pause_msg, "('root'",], + [ + _pause_msg, + " CancelScope|None: if not is_root_process(): @@ -223,6 +219,7 @@ class Lock: ctx_in_debug: Context|None = None req_handler_finished: trio.Event|None = None + _owned_by_root: bool = False _debug_lock: trio.StrictFIFOLock = trio.StrictFIFOLock() _blocked: set[ tuple[str, str] # `Actor.uid` for per actor @@ -231,23 +228,16 @@ class Lock: @classmethod def repr(cls) -> str: - - # both root and subs + lock_stats: trio.LockStatistics = cls._debug_lock.statistics() fields: str = ( - f'repl: {cls.repl}\n' + f'req_handler_finished: {cls.req_handler_finished}\n' + f'_blocked: {cls._blocked}\n\n' + f'_debug_lock: {cls._debug_lock}\n' + f'lock_stats: {lock_stats}\n' + f'ctx_in_debug: {cls.ctx_in_debug}\n' + ) - if is_root_process(): - lock_stats: trio.LockStatistics = cls._debug_lock.statistics() - fields += ( - f'req_handler_finished: {cls.req_handler_finished}\n' - - f'_blocked: {cls._blocked}\n\n' - f'_debug_lock: {cls._debug_lock}\n' - f'lock_stats: {lock_stats}\n' - - ) - body: str = textwrap.indent( fields, prefix=' |_', @@ -256,8 +246,6 @@ class Lock: f'<{cls.__name__}(\n' f'{body}' ')>\n\n' - - f'{cls.ctx_in_debug}\n' ) @classmethod @@ -266,7 +254,10 @@ class Lock: cls, force: bool = False, ): - message: str = 'TTY lock not held by any child\n' + if not cls._owned_by_root: + message: str = 'TTY lock not held by any child\n' + else: + message: str = 'TTY lock held in root-actor task\n' if not (is_trio_main := DebugStatus.is_main_trio_thread()): task: threading.Thread = threading.current_thread() @@ -279,8 +270,20 @@ class Lock: if ( lock.locked() and - owner is task - # ^-NOTE-^ if not will raise a RTE.. + ( + owner is task + # or + # cls._owned_by_root + ) + # ^-NOTE-^ if we do NOT ensure this, `trio` will + # raise a RTE when a non-owner tries to releasee the + # lock. + # + # Further we need to be extra pedantic about the + # correct task, greenback-spawned-task and/or thread + # being set to the `.repl_task` such that the above + # condition matches and we actually release the lock. + # This is particular of note from `.pause_from_sync()`! ): if not is_trio_main: trio.from_thread.run_sync( @@ -290,6 +293,10 @@ class Lock: cls._debug_lock.release() message: str = 'TTY lock released for child\n' + except RuntimeError as rte: + log.exception('Failed to release `Lock`?') + raise rte + finally: # IFF there are no more requesting tasks queued up fire, the # "tty-unlocked" event thereby alerting any monitors of the lock that @@ -305,7 +312,11 @@ class Lock: ): message += '-> No more child ctx tasks hold the TTY lock!\n' - elif req_handler_finished: + elif ( + req_handler_finished + and + lock.locked() + ): req_stats = req_handler_finished.statistics() message += ( f'-> A child ctx task still owns the `Lock` ??\n' @@ -315,9 +326,20 @@ class Lock: cls.ctx_in_debug = None + if ( + cls._owned_by_root + ): + if not lock.locked(): + cls._owned_by_root = False + else: + message += 'Lock still held by root actor task?!?\n' + lock.release() + + log.devx(message) + @classmethod @acm - async def acquire( + async def acquire_for_ctx( cls, ctx: Context, @@ -372,7 +394,7 @@ class Lock: ) # NOTE: critical section: this yield is unshielded! - + # # IF we received a cancel during the shielded lock entry of some # next-in-queue requesting task, then the resumption here will # result in that ``trio.Cancelled`` being raised to our caller @@ -384,7 +406,7 @@ class Lock: yield cls._debug_lock finally: - message :str = 'Exiting `Lock.acquire()` on behalf of sub-actor\n' + message :str = 'Exiting `Lock.acquire_for_ctx()` on behalf of sub-actor\n' if we_acquired: message += '-> TTY lock released by child\n' cls.release() @@ -468,11 +490,11 @@ async def lock_tty_for_child( # TODO: use `.msg._ops.maybe_limit_plds()` here instead so we # can merge into a single async with, with the - # `Lock.acquire()` enter below? + # `Lock.acquire_for_ctx()` enter below? # # enable the locking msgspec with apply_debug_pldec(): - async with Lock.acquire(ctx=ctx): + async with Lock.acquire_for_ctx(ctx=ctx): debug_lock_cs.shield = True log.devx( @@ -567,6 +589,11 @@ class DebugStatus: whenever a local task is an active REPL. ''' + # XXX local ref to the `pdbp.Pbp` instance, ONLY set in the + # actor-process that currently has activated a REPL i.e. it + # should be `None` (unset) in any other actor-process that does + # not yet have the `Lock` acquired via a root-actor debugger + # request. repl: PdbREPL|None = None # TODO: yet again this looks like a task outcome where we need @@ -1443,7 +1470,7 @@ class DebugRequestError(RuntimeError): async def _pause( - debug_func: Callable|None, + debug_func: Callable|partial|None, # NOTE: must be passed in the `.pause_from_sync()` case! repl: PdbREPL|None = None, @@ -1457,7 +1484,9 @@ async def _pause( # be no way to override it?.. # shield: bool = False, - hide_tb: bool = False, + hide_tb: bool = True, + called_from_sync: bool = False, + called_from_bg_thread: bool = False, task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, **debug_func_kwargs, @@ -1502,27 +1531,15 @@ async def _pause( # -[ ] factor out better, main reason for it is common logic for # both root and sub repl entry def _enter_repl_sync( - debug_func: Callable, + debug_func: partial[None], ) -> None: __tracebackhide__: bool = hide_tb + debug_func_name: str = ( + debug_func.func.__name__ if debug_func else 'None' + ) try: - # set local actor task to avoid recurrent - # entries/requests from the same local task (to the root - # process). - DebugStatus.repl_task = task - DebugStatus.repl = repl - - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - if debug_func is None: - task_status.started(DebugStatus) - - else: - log.warning( - 'Entering REPL for task fuck you!\n' - f'{task}\n' - ) + if debug_func: # block here one (at the appropriate frame *up*) where # ``breakpoint()`` was awaited and begin handling stdio. log.devx( @@ -1531,6 +1548,12 @@ async def _pause( f' |_{task}\n' ) + # set local actor task to avoid recurrent + # entries/requests from the same local task (to the root + # process). + DebugStatus.repl = repl + DebugStatus.repl_task = task + # invoke the low-level REPL activation routine which itself # should call into a `Pdb.set_trace()` of some sort. debug_func( @@ -1539,10 +1562,27 @@ async def _pause( **debug_func_kwargs, ) + # TODO: maybe invert this logic and instead + # do `assert debug_func is None` when + # `called_from_sync`? + else: + if ( + called_from_sync + # and + # is_root_process() + and + not DebugStatus.is_main_trio_thread() + ): + assert DebugStatus.repl_task is not task + + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + task_status.started(DebugStatus) + except trio.Cancelled: log.exception( - 'Cancelled during invoke of internal `debug_func = ' - f'{debug_func.func.__name__}`\n' + 'Cancelled during invoke of internal\n\n' + f'`debug_func = {debug_func_name}`\n' ) # XXX NOTE: DON'T release lock yet raise @@ -1550,8 +1590,8 @@ async def _pause( except BaseException: __tracebackhide__: bool = False log.exception( - 'Failed to invoke internal `debug_func = ' - f'{debug_func.func.__name__}`\n' + 'Failed to invoke internal\n\n' + f'`debug_func = {debug_func_name}`\n' ) # NOTE: OW this is ONLY called from the # `.set_continue/next` hooks! @@ -1597,34 +1637,56 @@ async def _pause( f'This root actor task is already within an active REPL session\n' f'Ignoring this re-entered `tractor.pause()`\n' f'task: {task.name}\n' - f'REPL: {Lock.repl}\n' # TODO: use `._frame_stack` scanner to find the @api_frame ) with trio.CancelScope(shield=shield): await trio.lowlevel.checkpoint() return - # XXX: since we need to enter pdb synchronously below, - # we have to release the lock manually from pdb completion - # callbacks. Can't think of a nicer way then this atm. + # must shield here to avoid hitting a `Cancelled` and + # a child getting stuck bc we clobbered the tty with trio.CancelScope(shield=shield): if Lock._debug_lock.locked(): - log.warning( - 'attempting to shield-acquire active TTY lock owned by\n' + + acq_prefix: str = 'shield-' if shield else '' + ctx_line: str = ( + 'lock owned by ctx\n\n' f'{ctx}' + ) if ctx else 'stale lock with no request ctx!?' + log.devx( + f'attempting to {acq_prefix}acquire active TTY ' + f'{ctx_line}' ) - # must shield here to avoid hitting a ``Cancelled`` and - # a child getting stuck bc we clobbered the tty - # with trio.CancelScope(shield=True): - await Lock._debug_lock.acquire() - else: - # may be cancelled + # XXX: since we need to enter pdb synchronously below, + # and we don't want to block the thread that starts + # stepping through the application thread, we later + # must `Lock._debug_lock.release()` manually from + # some `PdbREPL` completion callback(`.set_[continue/exit]()`). + # + # So, when `._pause()` is called from a (bg/non-trio) + # thread, special provisions are needed and we need + # to do the `.acquire()`/`.release()` calls from + # a common `trio.task` (due to internal impl of + # `FIFOLock`). Thus we do not acquire here and + # instead expect `.pause_from_sync()` to take care of + # this detail depending on the caller's (threading) + # usage. + # + # NOTE that this special case is ONLY required when + # using `.pause_from_sync()` from the root actor + # since OW a subactor will instead make an IPC + # request (in the branch below) to acquire the + # `Lock`-mutex and a common root-actor RPC task will + # take care of `._debug_lock` mgmt! + if not called_from_sync: await Lock._debug_lock.acquire() + Lock._owned_by_root = True # enter REPL from root, no TTY locking IPC ctx necessary + # since we can acquire the `Lock._debug_lock` directly in + # thread. _enter_repl_sync(debug_func) - return # next branch is mutex and for subactors # TODO: need a more robust check for the "root" actor elif ( @@ -1843,6 +1905,11 @@ def _set_trace( # called our API. caller_frame: FrameType = api_frame.f_back # type: ignore + # pretend this frame is the caller frame to show + # the entire call-stack all the way down to here. + if not hide_tb: + caller_frame: FrameType = inspect.currentframe() + # engage ze REPL # B~() repl.set_trace(frame=caller_frame) @@ -1850,7 +1917,7 @@ def _set_trace( async def pause( *, - hide_tb: bool = False, + hide_tb: bool = True, api_frame: FrameType|None = None, # TODO: figure out how to still make this work: @@ -1970,13 +2037,12 @@ async def maybe_init_greenback( # runtime aware version which takes care of all . def pause_from_sync( - hide_tb: bool = False, - # proxied to `_pause()` + hide_tb: bool = True, - **_pause_kwargs, - # for eg. + # proxy to `._pause()`, for ex: # shield: bool = False, # api_frame: FrameType|None = None, + **_pause_kwargs, ) -> None: @@ -2020,26 +2086,53 @@ def pause_from_sync( # noop: non-cancelled `.to_thread` # `trio.Cancelled`: cancelled `.to_thread` # + log.warning( + 'Engaging `.pause_from_sync()` from ANOTHER THREAD!' + ) + task: threading.Thread = threading.current_thread() + DebugStatus.repl_task: str = task + + # TODO: make root-actor bg thread usage work! + # if is_root_process(): + # async def _pause_from_sync_thread(): + # ... + # else: + # .. the below .. + trio.from_thread.run( partial( _pause, debug_func=None, repl=mdb, + hide_tb=hide_tb, + + # XXX to prevent `._pause()` for setting + # `DebugStatus.repl_task` to the gb task! + called_from_sync=True, + called_from_bg_thread=True, + **_pause_kwargs ), ) - task: threading.Thread = threading.current_thread() else: # we are presumably the `trio.run()` + main thread task: trio.Task = current_task() + DebugStatus.repl_task: str = task greenback.await_( _pause( debug_func=None, repl=mdb, + hide_tb=hide_tb, + called_from_sync=True, **_pause_kwargs, ) ) - DebugStatus.repl_task: str = current_task() + + if is_root_process(): + # Manually acquire since otherwise on release we'll + # get a RTE raised by `trio` due to ownership.. + Lock._debug_lock.acquire_nowait() + Lock._owned_by_root = True # TODO: ensure we aggressively make the user aware about # entering the global ``breakpoint()`` built-in from sync -- 2.34.1 From d8dd0c0a8114df1968ddef9b337d833263574cd9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 7 Jun 2024 22:35:59 -0400 Subject: [PATCH 150/305] Drop thread logging to make `log.pdb()` patts match in test --- examples/debugging/sync_bp.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/examples/debugging/sync_bp.py b/examples/debugging/sync_bp.py index e265df44..137710fc 100644 --- a/examples/debugging/sync_bp.py +++ b/examples/debugging/sync_bp.py @@ -1,6 +1,5 @@ from functools import partial import time -from threading import current_thread import trio import tractor @@ -16,17 +15,9 @@ def sync_pause( time.sleep(pre_sleep) if use_builtin: - print( - f'Entering `breakpoint()` from\n' - f'{current_thread()}\n' - ) breakpoint(hide_tb=hide_tb) else: - print( - f'Entering `tractor.pause_from_sync()` from\n' - f'{current_thread()}@{tractor.current_actor().uid}\n' - ) tractor.pause_from_sync() if error: -- 2.34.1 From d3f7b83ea0de3db22d973fdbb48883e4918c8f7e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 08:54:03 -0400 Subject: [PATCH 151/305] First proto: multi-threaded synced `pdb`-REPLs Functionally working for multi-threaded (via cpython threads spawned from `to_trio.to_thread.run_sync()`) alongside subactors, tested (for now) only with threads started inside the root actor (which seemed to have the most issues in terms of the impl and special cases..) using the new `tractor.pause_from_sync()` API! Main implementation changes to `.pause_from_sync()` ------ - ------ - from the root actor, we need to ensure bg thread case is handled *specially* since no IPC is used to request the TTY stdio mutex and `Lock` (API) usage is conducted entirely from a local task or thread; dedicated `Lock` usage for the root-actor already is branched inside `._pause()` and needs similar handling from a root bg-thread: |_for the special case of a root bg thread we need to `trio`-main-thread schedule a bg task inside a new `_pause_from_bg_root_thread()`. The new task needs to implement most of what was is handled inside `._pause()` manually, mostly because in this root-actor-bg-thread case we have 2 constraints: 1. to enter `PdbREPL.interaction()` **from the bg thread** directly, 2. the task that `Lock._debug_lock.acquire()`s has to be the same that calls `.release() (a `trio.FIFOLock` constraint) |_impl deats of this `_pause_from_bg_root_thread()` include: - (for now) calling `._pause()` to acquire the `Lock._debug_lock`. - setting its own `DebugStatus.repl_release`. - calling `.DebugStatus.shield_sigint()` to ensure the root's main thread uses the right handler when the bg one is REPL-ing. - wait manually on the `.repl_release()` to be set by the thread's dedicated `PdbREPL` exit. - manually calling `Lock.release()` from the **same task** that acquired it. - expect calls to `._pause()` to deliver a `tuple[Task, PdbREPL]` such that we always get the handle both to any newly created REPl instance and the (maybe) the scheduled bg task within which is runs. - add a single `message: str` style to `log.devx()` based on branching style for logging. - ensure both `DebugStatus.repl` and `.repl_task` are set **just before** calling `._set_trace()` to ensure the correct `Task|Thread` is set when the REPL is finally entered from sync code. - add a wrapping caller `_sync_pause_from_builtin()` which passes in the new `called_from_builtin=True` to indicate `breakpoint()` caller usage, obvi pass in `api_frame`. Changes to `._pause()` in support of ^ ------ - ------ - `TaskStatus.started()` and return the `tuple[Task, PdbREPL]` to callers / starters. - only call `DebugStatus.shield_sigint()` when no `repl` passed bc some callers (like bg threads) may need to apply it at some specific point themselves. - tweak some asserts for the `debug_func == None` / non-`trio`-thread case. - add a mod-level `_repl_fail_msg: str` to be used when there's an internal `._pause()` failure for testing, easier to pexpect match. - more comprehensive logging for the root-actor branched case to (attempt to) indicate any of the 3 cases: - remote ctx from subactor has the `Lock`, - already existing root task or thread has it or, - some kinda stale `.locked()` situation where the root has the lock but we don't know why. - for root usage, revert to always `await Lock._debug_lock.acquire()`-ing despite `called_from_sync` since `.pause_from_sync()` was reworked to instead handle the special bg thread case in the new `_pause_from_bg_root_thread()` task. - always do `return _enter_repl_sync(debug_func)`. - try to report any `repl_task: Task|Thread` set by the caller (particularly for the bg thread cases) as being the thread or task `._pause()` was called "on behalf of" Changes to `DebugStatus`/`Lock` in support of ^ ------ - ------ - only call `Lock.release()` from `DebugStatus.set_[quit/continue]()` when called from the main `trio` thread and always call `DebugStatus.release()` **after** to ensure `.repl_released()` is set **after** `._debug_lock.release()`. - only call `.repl_release.set()` from `trio` thread otherwise use `.from_thread.run()`. - much more refinements in `Lock.release()` for threading cases: - return `bool` to indicate whether lock was released by caller. - mask (in prep to drop) `_pause()` usage of `Lock.release.force=True)` since forcing a release can't ever avoid the RTE from `trio`.. same task **must** acquire/release. - don't allow usage from non-`trio`-main-threads, ever; there's no point since the same-task-needs-to-manage-`FIFOLock` constraint. - much more detailed logging using `message`-building-style for all caller (edge) cases. |_ use a `we_released: bool` to determine failed-to-release edge cases which can happen if called from bg threads, ensure we `log.exception()` on any incorrect usage resulting in release failure. |_ complain loudly if the release fails and some other task/thread still holds the lock. |_ be explicit about "who" (which task or thread) the release is "on behalf of" by reading `DebugStatus.repl_task` since the caller isn't the REPL operator in many sync cases. - more or less drop `force` support, as mentioned above. - ensure we unset `._owned_by_root` if the caller is a root task. Other misc ------ - ------ - rename `lock_tty_for_child()` -> `lock_stdio_for_peer()`. - rejig `Lock.repr()` to show lock and event stats. - stage `Lock.stats` and `.owner` methods in prep for doing a singleton instance and `@property`s. --- tractor/devx/_debug.py | 732 +++++++++++++++++++++++++++++------------ 1 file changed, 525 insertions(+), 207 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 858133fd..3218cffa 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -62,7 +62,6 @@ import trio from trio import CancelScope from trio.lowlevel import ( current_task, - Task, ) from trio import ( TaskStatus, @@ -81,6 +80,8 @@ from tractor._state import ( # ) if TYPE_CHECKING: + from trio.lowlevel import Task + from threading import Thread from tractor._ipc import Channel from tractor._context import Context from tractor._runtime import ( @@ -92,6 +93,11 @@ if TYPE_CHECKING: log = get_logger(__name__) +# TODO: refine the internal impl and APIs in this module! +# +# -[ ] separate `._pause()` branch-cases for calling from a root task +# vs. from subactors + def hide_runtime_frames() -> dict[FunctionType, CodeType]: ''' @@ -203,6 +209,15 @@ class Lock: return None + # TODO: once we convert to singleton-per-actor-style + # @property + # def stats(cls) -> trio.LockStatistics: + # return cls._debug_lock.statistics() + + # @property + # def owner(cls) -> Task: + # return cls._debug_lock.statistics().owner + # ROOT ONLY # ------ - ------- # the root-actor-ONLY singletons for, @@ -229,18 +244,22 @@ class Lock: @classmethod def repr(cls) -> str: lock_stats: trio.LockStatistics = cls._debug_lock.statistics() + req: trio.Event|None = cls.req_handler_finished fields: str = ( - f'req_handler_finished: {cls.req_handler_finished}\n' - f'_blocked: {cls._blocked}\n\n' - f'_debug_lock: {cls._debug_lock}\n' - f'lock_stats: {lock_stats}\n' - f'ctx_in_debug: {cls.ctx_in_debug}\n' + f'|_ ._blocked: {cls._blocked}\n' + f'|_ ._debug_lock: {cls._debug_lock}\n' + f' {lock_stats}\n\n' + f'|_ .ctx_in_debug: {cls.ctx_in_debug}\n' + f'|_ .req_handler_finished: {req}\n' ) + if req: + req_stats: trio.EventStatistics = req.statistics() + fields += f' {req_stats}\n' body: str = textwrap.indent( fields, - prefix=' |_', + prefix=' ', ) return ( f'<{cls.__name__}(\n' @@ -253,28 +272,59 @@ class Lock: def release( cls, force: bool = False, - ): - if not cls._owned_by_root: - message: str = 'TTY lock not held by any child\n' - else: - message: str = 'TTY lock held in root-actor task\n' + raise_on_thread: bool = True, - if not (is_trio_main := DebugStatus.is_main_trio_thread()): - task: threading.Thread = threading.current_thread() + ) -> bool: + ''' + Release the actor-tree global TTY stdio lock (only) from the + `trio.run()`-main-thread. + + ''' + we_released: bool = False + ctx_in_debug: Context|None = cls.ctx_in_debug + repl_task: Task|Thread|None = DebugStatus.repl_task + if not DebugStatus.is_main_trio_thread(): + thread: threading.Thread = threading.current_thread() + message: str = ( + '`Lock.release()` can not be called from a non-main-`trio` thread!\n' + f'{thread}\n' + ) + if raise_on_thread: + raise RuntimeError(message) + + log.devx(message) + return False + + task: Task = current_task() + + # sanity check that if we're the root actor + # the lock is marked as such. + # note the pre-release value may be diff the the + # post-release task. + if repl_task is task: + assert cls._owned_by_root + message: str = ( + 'TTY lock held by root-actor on behalf of local task\n' + f'|_{repl_task}\n' + ) else: - task: trio.Task = current_task() + assert DebugStatus.repl_task is not task + + message: str = ( + 'TTY lock was NOT released on behalf of caller\n' + f'|_{task}\n' + ) try: lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( - lock.locked() - and - ( - owner is task - # or - # cls._owned_by_root - ) + (lock.locked() or force) + # ^-TODO-NOTE-^ should we just remove this, since the + # RTE case above will always happen when you force + # from the wrong task? + + and (owner is task) # ^-NOTE-^ if we do NOT ensure this, `trio` will # raise a RTE when a non-owner tries to releasee the # lock. @@ -284,17 +334,27 @@ class Lock: # being set to the `.repl_task` such that the above # condition matches and we actually release the lock. # This is particular of note from `.pause_from_sync()`! + ): - if not is_trio_main: - trio.from_thread.run_sync( - cls._debug_lock.release + cls._debug_lock.release() + we_released: bool = True + if repl_task: + message: str = ( + 'Lock released on behalf of root-actor-local REPL owner\n' + f'|_{repl_task}\n' ) else: - cls._debug_lock.release() - message: str = 'TTY lock released for child\n' + message: str = ( + 'TTY lock released by us on behalf of remote peer?\n' + f'|_ctx_in_debug: {ctx_in_debug}\n\n' + ) + # mk_pdb().set_trace() + # elif owner: except RuntimeError as rte: - log.exception('Failed to release `Lock`?') + log.exception( + 'Failed to release `Lock._debug_lock: trio.FIFOLock`?\n' + ) raise rte finally: @@ -303,40 +363,59 @@ class Lock: # we are now back in the "tty unlocked" state. This is basically # and edge triggered signal around an empty queue of sub-actor # tasks that may have tried to acquire the lock. - lock_stats = cls._debug_lock.statistics() + lock_stats: trio.LockStatistics = cls._debug_lock.statistics() req_handler_finished: trio.Event|None = Lock.req_handler_finished if ( not lock_stats.owner - or force and req_handler_finished is None ): - message += '-> No more child ctx tasks hold the TTY lock!\n' - - elif ( - req_handler_finished - and - lock.locked() - ): - req_stats = req_handler_finished.statistics() message += ( - f'-> A child ctx task still owns the `Lock` ??\n' - f' |_lock_stats: {lock_stats}\n' - f' |_req_stats: {req_stats}\n' + '-> No new task holds the TTY lock!\n\n' + f'{Lock.repr()}\n' ) - cls.ctx_in_debug = None + elif ( + req_handler_finished # new IPC ctx debug request active + and + lock.locked() # someone has the lock + ): + behalf_of_task = ( + ctx_in_debug + or + repl_task + ) + message += ( + f'\nA non-caller task still owns this lock on behalf of ' + f'{behalf_of_task}\n' + f'|_{lock_stats.owner}\n' + ) if ( - cls._owned_by_root + we_released + and + ctx_in_debug ): - if not lock.locked(): - cls._owned_by_root = False - else: - message += 'Lock still held by root actor task?!?\n' - lock.release() + cls.ctx_in_debug = None # unset + + # post-release value (should be diff then value above!) + repl_task: Task|Thread|None = DebugStatus.repl_task + if ( + cls._owned_by_root + and + we_released + ): + cls._owned_by_root = False + + if task is not repl_task: + message += ( + 'Lock released by root actor on behalf of bg thread\n' + f'|_{repl_task}\n' + ) log.devx(message) + return we_released + @classmethod @acm async def acquire_for_ctx( @@ -380,7 +459,7 @@ class Lock: log.runtime(pre_msg) # NOTE: if the surrounding cancel scope from the - # `lock_tty_for_child()` caller is cancelled, this line should + # `lock_stdio_for_peer()` caller is cancelled, this line should # unblock and NOT leave us in some kind of # a "child-locked-TTY-but-child-is-uncontactable-over-IPC" # condition. @@ -398,7 +477,7 @@ class Lock: # IF we received a cancel during the shielded lock entry of some # next-in-queue requesting task, then the resumption here will # result in that ``trio.Cancelled`` being raised to our caller - # (likely from ``lock_tty_for_child()`` below)! In + # (likely from `lock_stdio_for_peer()` below)! In # this case the ``finally:`` below should trigger and the # surrounding caller side context should cancel normally # relaying back to the caller. @@ -408,8 +487,8 @@ class Lock: finally: message :str = 'Exiting `Lock.acquire_for_ctx()` on behalf of sub-actor\n' if we_acquired: - message += '-> TTY lock released by child\n' cls.release() + message += '-> TTY lock released by child\n' else: message += '-> TTY lock never acquired by child??\n' @@ -421,7 +500,7 @@ class Lock: @tractor.context -async def lock_tty_for_child( +async def lock_stdio_for_peer( ctx: Context, subactor_task_uid: tuple[str, int], @@ -545,25 +624,26 @@ async def lock_tty_for_child( except BaseException as req_err: message: str = ( + f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' 'Forcing `Lock.release()` for req-ctx since likely an ' 'internal error!\n\n' f'{ctx}' ) if isinstance(req_err, trio.Cancelled): message = ( - 'Cancelled during root TTY-lock dialog?\n' + 'Cancelled during root TTY-lock dialog\n' + message ) else: message = ( - 'Errored during root TTY-lock dialog?\n' + 'Errored during root TTY-lock dialog\n' + message ) log.exception(message) - Lock.release(force=True) + Lock.release() #force=True) raise finally: @@ -645,7 +725,7 @@ class DebugStatus: def shield_sigint(cls): ''' Shield out SIGINT handling (which by default triggers - `trio.Task` cancellation) in subactors when a `pdb` REPL + `Task` cancellation) in subactors when a `pdb` REPL is active. Avoids cancellation of the current actor (task) when the user @@ -767,9 +847,17 @@ class DebugStatus: try: # sometimes the task might already be terminated in # which case this call will raise an RTE? - if repl_release is not None: - repl_release.set() - + if ( + repl_release is not None + ): + if cls.is_main_trio_thread(): + repl_release.set() + else: + # XXX NOTE ONLY used for bg root-actor sync + # threads, see `.pause_from_sync()`. + trio.from_thread.run_sync( + repl_release.set + ) finally: # if req_ctx := cls.req_ctx: # req_ctx._scope.cancel() @@ -856,8 +944,6 @@ class PdbREPL(pdbp.Pdb): try: super().set_continue() finally: - DebugStatus.release() - # NOTE: for subactors the stdio lock is released via the # allocated RPC locker task, so for root we have to do it # manually. @@ -865,21 +951,32 @@ class PdbREPL(pdbp.Pdb): is_root_process() and Lock._debug_lock.locked() + and + DebugStatus.is_main_trio_thread() ): + # Lock.release(raise_on_thread=False) Lock.release() + # XXX after `Lock.release()` for root local repl usage + DebugStatus.release() + def set_quit(self): try: super().set_quit() finally: - DebugStatus.release() if ( is_root_process() and Lock._debug_lock.locked() + and + DebugStatus.is_main_trio_thread() ): + # Lock.release(raise_on_thread=False) Lock.release() + # XXX after `Lock.release()` for root local repl usage + DebugStatus.release() + # XXX NOTE: we only override this because apparently the stdlib pdb # bois likes to touch the SIGINT handler as much as i like to touch # my d$%&. @@ -960,20 +1057,24 @@ async def request_root_stdio_lock( task_status: TaskStatus[CancelScope] = trio.TASK_STATUS_IGNORED, ): ''' - Connect to the root actor of this process tree and RPC-invoke - a task which acquires a std-streams global `Lock`: a actor tree - global mutex which prevents other subactors from entering - a `PdbREPL` at the same time as any other. + Connect to the root actor for this actor's process tree and + RPC-invoke a task which acquires the std-streams global `Lock`: + a process-tree-global mutex which prevents multiple actors from + entering `PdbREPL.interaction()` at the same time such that the + parent TTY's stdio is never "clobbered" by simultaneous + reads/writes. - The actual `Lock` singleton exists ONLY in the root actor's - memory and does nothing more then set process-tree global state. - The actual `PdbREPL` interaction is completely isolated to each - sub-actor and with the `Lock` merely providing the multi-process - syncing mechanism to avoid any subactor (or the root itself) from - entering the REPL at the same time. + The actual `Lock` singleton instance exists ONLY in the root + actor's memory space and does nothing more then manage + process-tree global state, + namely a `._debug_lock: trio.FIFOLock`. + + The actual `PdbREPL` interaction/operation is completely isolated + to each sub-actor (process) with the root's `Lock` providing the + multi-process mutex-syncing mechanism to avoid parallel REPL + usage within an actor tree. ''' - log.devx( 'Initing stdio-lock request task with root actor' ) @@ -1004,7 +1105,7 @@ async def request_root_stdio_lock( # `.repl_release: # trio.Event`. with trio.CancelScope(shield=shield) as req_cs: # XXX: was orig for debugging cs stack corruption.. - # log.info( + # log.devx( # 'Request cancel-scope is:\n\n' # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' # ) @@ -1014,7 +1115,7 @@ async def request_root_stdio_lock( # TODO: merge into single async with ? async with get_root() as portal: async with portal.open_context( - lock_tty_for_child, + lock_stdio_for_peer, subactor_task_uid=task_uid, # NOTE: set it here in the locker request task bc it's # possible for multiple such requests for the lock in any @@ -1468,6 +1569,11 @@ class DebugRequestError(RuntimeError): ''' +_repl_fail_msg: str = ( + 'Failed to REPl via `_pause()` ' +) + + async def _pause( debug_func: Callable|partial|None, @@ -1487,10 +1593,13 @@ async def _pause( hide_tb: bool = True, called_from_sync: bool = False, called_from_bg_thread: bool = False, - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, + task_status: TaskStatus[ + tuple[Task, PdbREPL], + trio.Event + ] = trio.TASK_STATUS_IGNORED, **debug_func_kwargs, -) -> None: +) -> tuple[PdbREPL, Task]|None: ''' Inner impl for `pause()` to avoid the `trio.CancelScope.__exit__()` stack frame when not shielded (since apparently i can't figure out @@ -1502,25 +1611,26 @@ async def _pause( __tracebackhide__: bool = hide_tb actor: Actor = current_actor() try: - # TODO: use the `Task` instance instead for `is` checks - # below! - task: Task = trio.lowlevel.current_task() + task: Task = current_task() except RuntimeError as rte: + log.exception('Failed to get current task?') if actor.is_infected_aio(): raise RuntimeError( '`tractor.pause[_from_sync]()` not yet supported ' 'for infected `asyncio` mode!' ) from rte + raise + if debug_func is not None: debug_func = partial(debug_func) - repl: PdbREPL = repl or mk_pdb() - # XXX NOTE XXX set it here to avoid ctl-c from cancelling a debug # request from a subactor BEFORE the REPL is entered by that # process. - DebugStatus.shield_sigint() + if not repl: + DebugStatus.shield_sigint() + repl: PdbREPL = repl or mk_pdb() # TODO: move this into a `open_debug_request()` @acm? # -[ ] prolly makes the most sense to do the request @@ -1538,6 +1648,9 @@ async def _pause( debug_func.func.__name__ if debug_func else 'None' ) + # TODO: do we want to support using this **just** for the + # locking / common code (prolly to help address #320)? + task_status.started((task, repl)) try: if debug_func: # block here one (at the appropriate frame *up*) where @@ -1548,11 +1661,11 @@ async def _pause( f' |_{task}\n' ) - # set local actor task to avoid recurrent - # entries/requests from the same local task (to the root - # process). - DebugStatus.repl = repl + # set local task on process-global state to avoid + # recurrent entries/requests from the same + # actor-local task. DebugStatus.repl_task = task + DebugStatus.repl = repl # invoke the low-level REPL activation routine which itself # should call into a `Pdb.set_trace()` of some sort. @@ -1568,16 +1681,13 @@ async def _pause( else: if ( called_from_sync - # and - # is_root_process() and not DebugStatus.is_main_trio_thread() ): + assert called_from_bg_thread assert DebugStatus.repl_task is not task - # TODO: do we want to support using this **just** for the - # locking / common code (prolly to help address #320)? - task_status.started(DebugStatus) + return (task, repl) except trio.Cancelled: log.exception( @@ -1607,12 +1717,23 @@ async def _pause( # TODO: this should be created as part of `DebugRequest()` init # which should instead be a one-shot-use singleton much like # the `PdbREPL`. + repl_task: Thread|Task|None = DebugStatus.repl_task if ( not DebugStatus.repl_release or DebugStatus.repl_release.is_set() ): + log.devx( + 'Setting new `DebugStatus.repl_release: trio.Event` for requesting task\n' + f'|_{task}\n' + ) DebugStatus.repl_release = trio.Event() + else: + log.devx( + 'Already an existing actor-local REPL user task\n' + f'|_{repl_task}\n' + ) + # ^-NOTE-^ this must be created BEFORE scheduling any subactor # debug-req task since it needs to wait on it just after # `.started()`-ing back its wrapping `.req_cs: CancelScope`. @@ -1620,73 +1741,110 @@ async def _pause( repl_err: BaseException|None = None try: if is_root_process(): - # we also wait in the root-parent for any child that # may have the tty locked prior - # TODO: wait, what about multiple root tasks acquiring it though? + # TODO: wait, what about multiple root tasks (with bg + # threads) acquiring it though? ctx: Context|None = Lock.ctx_in_debug + repl_task: Task|None = DebugStatus.repl_task if ( ctx is None and - DebugStatus.repl - and - DebugStatus.repl_task is task + repl_task is task + # and + # DebugStatus.repl + # ^-NOTE-^ matches for multi-threaded case as well? ): # re-entrant root process already has it: noop. log.warning( f'This root actor task is already within an active REPL session\n' - f'Ignoring this re-entered `tractor.pause()`\n' - f'task: {task.name}\n' + f'Ignoring this recurrent`tractor.pause()` entry\n\n' + f'|_{task}\n' # TODO: use `._frame_stack` scanner to find the @api_frame ) with trio.CancelScope(shield=shield): await trio.lowlevel.checkpoint() - return + return repl, task + + # elif repl_task: + # log.warning( + # f'This root actor has another task already in REPL\n' + # f'Waitin for the other task to complete..\n\n' + # f'|_{task}\n' + # # TODO: use `._frame_stack` scanner to find the @api_frame + # ) + # with trio.CancelScope(shield=shield): + # await DebugStatus.repl_release.wait() + # await trio.sleep(0.1) # must shield here to avoid hitting a `Cancelled` and # a child getting stuck bc we clobbered the tty with trio.CancelScope(shield=shield): - if Lock._debug_lock.locked(): + ctx_line = '`Lock` in this root actor task' + acq_prefix: str = 'shield-' if shield else '' + if ( + Lock._debug_lock.locked() + ): + if ctx: + ctx_line: str = ( + 'active `Lock` owned by ctx\n\n' + f'{ctx}' + ) + elif Lock._owned_by_root: + ctx_line: str = ( + 'Already owned by root-task `Lock`\n\n' + f'repl_task: {DebugStatus.repl_task}\n' + f'repl: {DebugStatus.repl}\n' + ) + else: + ctx_line: str = ( + '**STALE `Lock`** held by unknown root/remote task ' + 'with no request ctx !?!?' + ) - acq_prefix: str = 'shield-' if shield else '' - ctx_line: str = ( - 'lock owned by ctx\n\n' - f'{ctx}' - ) if ctx else 'stale lock with no request ctx!?' - log.devx( - f'attempting to {acq_prefix}acquire active TTY ' - f'{ctx_line}' - ) + log.devx( + f'attempting to {acq_prefix}acquire ' + f'{ctx_line}' + ) + await Lock._debug_lock.acquire() + Lock._owned_by_root = True + # else: - # XXX: since we need to enter pdb synchronously below, - # and we don't want to block the thread that starts - # stepping through the application thread, we later - # must `Lock._debug_lock.release()` manually from - # some `PdbREPL` completion callback(`.set_[continue/exit]()`). - # - # So, when `._pause()` is called from a (bg/non-trio) - # thread, special provisions are needed and we need - # to do the `.acquire()`/`.release()` calls from - # a common `trio.task` (due to internal impl of - # `FIFOLock`). Thus we do not acquire here and - # instead expect `.pause_from_sync()` to take care of - # this detail depending on the caller's (threading) - # usage. - # - # NOTE that this special case is ONLY required when - # using `.pause_from_sync()` from the root actor - # since OW a subactor will instead make an IPC - # request (in the branch below) to acquire the - # `Lock`-mutex and a common root-actor RPC task will - # take care of `._debug_lock` mgmt! - if not called_from_sync: - await Lock._debug_lock.acquire() - Lock._owned_by_root = True + # if ( + # not called_from_bg_thread + # and not called_from_sync + # ): + # log.devx( + # f'attempting to {acq_prefix}acquire ' + # f'{ctx_line}' + # ) + + # XXX: since we need to enter pdb synchronously below, + # and we don't want to block the thread that starts + # stepping through the application thread, we later + # must `Lock._debug_lock.release()` manually from + # some `PdbREPL` completion callback(`.set_[continue/exit]()`). + # + # So, when `._pause()` is called from a (bg/non-trio) + # thread, special provisions are needed and we need + # to do the `.acquire()`/`.release()` calls from + # a common `trio.task` (due to internal impl of + # `FIFOLock`). Thus we do not acquire here and + # instead expect `.pause_from_sync()` to take care of + # this detail depending on the caller's (threading) + # usage. + # + # NOTE that this special case is ONLY required when + # using `.pause_from_sync()` from the root actor + # since OW a subactor will instead make an IPC + # request (in the branch below) to acquire the + # `Lock`-mutex and a common root-actor RPC task will + # take care of `._debug_lock` mgmt! # enter REPL from root, no TTY locking IPC ctx necessary # since we can acquire the `Lock._debug_lock` directly in # thread. - _enter_repl_sync(debug_func) + return _enter_repl_sync(debug_func) # TODO: need a more robust check for the "root" actor elif ( @@ -1809,7 +1967,7 @@ async def _pause( ) # enter REPL - _enter_repl_sync(debug_func) + return _enter_repl_sync(debug_func) # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? @@ -1838,7 +1996,9 @@ async def _pause( else: log.exception( - 'Failed to engage debugger via `_pause()` ??\n' + _repl_fail_msg + + + f'on behalf of {repl_task} ??\n' ) DebugStatus.release(cancel_req_task=True) @@ -1882,11 +2042,11 @@ def _set_trace( # optionally passed in to provide support for # `pause_from_sync()` where actor: tractor.Actor|None = None, - task: trio.Task|None = None, + task: Task|Thread|None = None, ): __tracebackhide__: bool = hide_tb actor: tractor.Actor = actor or current_actor() - task: trio.Task = task or current_task() + task: Task|Thread = task or current_task() # else: # TODO: maybe print the actor supervion tree up to the @@ -2023,7 +2183,7 @@ async def maybe_init_greenback( if mod := maybe_import_greenback(**kwargs): await mod.ensure_portal() - log.info( + log.devx( '`greenback` portal opened!\n' 'Sync debug support activated!\n' ) @@ -2032,12 +2192,116 @@ async def maybe_init_greenback( return None -# TODO: allow pausing from sync code. -# normally by remapping python's builtin breakpoint() hook to this -# runtime aware version which takes care of all . -def pause_from_sync( +async def _pause_from_bg_root_thread( + behalf_of_thread: Thread, + repl: PdbREPL, + hide_tb: bool, + task_status: TaskStatus[Task] = trio.TASK_STATUS_IGNORED, + **_pause_kwargs, +): + ''' + Acquire the `Lock._debug_lock` from a bg (only need for + root-actor) non-`trio` thread (started via a call to + `.to_thread.run_sync()` in some actor) by scheduling this func in + the actor's service (TODO eventually a special debug_mode) + nursery. This task acquires the lock then `.started()`s the + `DebugStatus.repl_release: trio.Event` waits for the `PdbREPL` to + set it, then terminates very much the same way as + `request_root_stdio_lock()` uses an IPC `Context` from a subactor + to do the same from a remote process. + + This task is normally only required to be scheduled for the + special cases of a bg sync thread running in the root actor; see + the only usage inside `.pause_from_sync()`. + + ''' + global Lock + # TODO: unify this copied code with where it was + # from in `maybe_wait_for_debugger()` + # if ( + # Lock.req_handler_finished is not None + # and not Lock.req_handler_finished.is_set() + # and (in_debug := Lock.ctx_in_debug) + # ): + # log.devx( + # '\nRoot is waiting on tty lock to release from\n\n' + # # f'{caller_frame_info}\n' + # ) + # with trio.CancelScope(shield=True): + # await Lock.req_handler_finished.wait() + + # log.pdb( + # f'Subactor released debug lock\n' + # f'|_{in_debug}\n' + # ) + task: Task = current_task() + + # Manually acquire since otherwise on release we'll + # get a RTE raised by `trio` due to ownership.. + log.devx( + 'Trying to acquire `Lock` on behalf of bg thread\n' + f'|_{behalf_of_thread}\n' + ) + # DebugStatus.repl_task = behalf_of_thread + out = await _pause( + debug_func=None, + repl=repl, + hide_tb=hide_tb, + called_from_sync=True, + called_from_bg_thread=True, + **_pause_kwargs + ) + lock: trio.FIFOLock = Lock._debug_lock + stats: trio.LockStatistics= lock.statistics() + assert stats.owner is task + assert Lock._owned_by_root + assert DebugStatus.repl_release + + # TODO: do we actually need this? + # originally i was trying to solve wy this was + # unblocking too soon in a thread but it was actually + # that we weren't setting our own `repl_release` below.. + while stats.owner is not task: + log.devx( + 'Trying to acquire `._debug_lock` from {stats.owner} for\n' + f'|_{behalf_of_thread}\n' + ) + await lock.acquire() + break + + # XXX NOTE XXX super important dawg.. + # set our own event since the current one might + # have already been overriden and then set when the + # last REPL mutex holder exits their sesh! + # => we do NOT want to override any existing one + # and we want to ensure we set our own ONLY AFTER we have + # acquired the `._debug_lock` + repl_release = DebugStatus.repl_release = trio.Event() + + # unblock caller thread delivering this bg task + log.devx( + 'Unblocking root-bg-thread since we acquired lock via `._pause()`\n' + f'|_{behalf_of_thread}\n' + ) + task_status.started(out) + DebugStatus.shield_sigint() + + # wait for bg thread to exit REPL sesh. + try: + await repl_release.wait() + finally: + log.devx( + 'releasing lock from bg root thread task!\n' + f'|_ {behalf_of_thread}\n' + ) + Lock.release() + + +def pause_from_sync( hide_tb: bool = True, + called_from_builtin: bool = False, + api_frame: FrameType|None = None, # proxy to `._pause()`, for ex: # shield: bool = False, @@ -2045,15 +2309,24 @@ def pause_from_sync( **_pause_kwargs, ) -> None: + ''' + Pause a `tractor` scheduled task or thread from sync (non-async + function) code. + When `greenback` is installed we remap python's builtin + `breakpoint()` hook to this runtime-aware version which takes + care of all bg-thread detection and appropriate synchronization + with the root actor's `Lock` to avoid mult-thread/process REPL + clobbering Bo + + ''' __tracebackhide__: bool = hide_tb try: actor: tractor.Actor = current_actor( err_on_no_runtime=False, ) - log.debug( - f'{actor.uid}: JUST ENTERED `tractor.pause_from_sync()`' - f'|_{actor}\n' + message: str = ( + f'{actor.uid} task called `tractor.pause_from_sync()`\n\n' ) if not actor: raise RuntimeError( @@ -2063,7 +2336,7 @@ def pause_from_sync( '- `async with tractor.open_root_actor()`\n' ) - # NOTE: once supported, remove this AND the one + # TODO: once supported, remove this AND the one # inside `._pause()`! if actor.is_infected_aio(): raise RuntimeError( @@ -2071,78 +2344,111 @@ def pause_from_sync( 'for infected `asyncio` mode!' ) - # raises on not-found by default - greenback: ModuleType = maybe_import_greenback() - mdb: PdbREPL = mk_pdb() + DebugStatus.shield_sigint() + repl: PdbREPL = mk_pdb() - # run async task which will lock out the root proc's TTY. + # message += f'-> created local REPL {repl}\n' + is_root: bool = is_root_process() + + # TODO: we could also check for a non-`.to_thread` context + # using `trio.from_thread.check_cancelled()` (says + # oremanj) wherein we get the following outputs: + # + # `RuntimeError`: non-`.to_thread` spawned thread + # noop: non-cancelled `.to_thread` + # `trio.Cancelled`: cancelled `.to_thread` + + # when called from a (bg) thread, run an async task in a new + # thread which will call `._pause()` manually with special + # handling for root-actor caller usage. if not DebugStatus.is_main_trio_thread(): - - # TODO: we could also check for a non-`.to_thread` context - # using `trio.from_thread.check_cancelled()` (says - # oremanj) wherein we get the following outputs: - # - # `RuntimeError`: non-`.to_thread` spawned thread - # noop: non-cancelled `.to_thread` - # `trio.Cancelled`: cancelled `.to_thread` - # - log.warning( - 'Engaging `.pause_from_sync()` from ANOTHER THREAD!' - ) - task: threading.Thread = threading.current_thread() - DebugStatus.repl_task: str = task + thread: threading.Thread = threading.current_thread() + repl_owner = thread # TODO: make root-actor bg thread usage work! - # if is_root_process(): - # async def _pause_from_sync_thread(): - # ... - # else: - # .. the below .. + if is_root: + message += ( + f'-> called from a root-actor bg {thread}\n' + f'-> scheduling `._pause_from_sync_thread()`..\n' + ) + bg_task, repl = trio.from_thread.run( + afn=partial( + actor._service_n.start, + partial( + _pause_from_bg_root_thread, + behalf_of_thread=thread, + repl=repl, + hide_tb=hide_tb, + **_pause_kwargs, + ), + ) + ) + message += ( + f'-> `._pause_from_sync_thread()` started bg task {bg_task}\n' + ) + else: + message += f'-> called from a bg {thread}\n' + # NOTE: since this is a subactor, `._pause()` will + # internally issue a debug request via + # `request_root_stdio_lock()` and we don't need to + # worry about all the special considerations as with + # the root-actor per above. + bg_task, repl = trio.from_thread.run( + afn=partial( + _pause, + debug_func=None, + repl=repl, + hide_tb=hide_tb, - trio.from_thread.run( - partial( - _pause, - debug_func=None, - repl=mdb, - hide_tb=hide_tb, + # XXX to prevent `._pause()` for setting + # `DebugStatus.repl_task` to the gb task! + called_from_sync=True, + called_from_bg_thread=True, - # XXX to prevent `._pause()` for setting - # `DebugStatus.repl_task` to the gb task! - called_from_sync=True, - called_from_bg_thread=True, - - **_pause_kwargs - ), - ) + **_pause_kwargs + ), + ) + assert bg_task is not DebugStatus.repl_task else: # we are presumably the `trio.run()` + main thread - task: trio.Task = current_task() - DebugStatus.repl_task: str = task - greenback.await_( + # raises on not-found by default + greenback: ModuleType = maybe_import_greenback() + message += f'-> imported {greenback}\n' + repl_owner: Task = current_task() + message += '-> calling `greenback.await_(_pause(debug_func=None))` from sync caller..\n' + out = greenback.await_( _pause( debug_func=None, - repl=mdb, + repl=repl, hide_tb=hide_tb, called_from_sync=True, **_pause_kwargs, ) ) + if out: + bg_task, repl = out + assert repl is repl + assert bg_task is repl_owner - if is_root_process(): - # Manually acquire since otherwise on release we'll - # get a RTE raised by `trio` due to ownership.. - Lock._debug_lock.acquire_nowait() - Lock._owned_by_root = True + # NOTE: normally set inside `_enter_repl_sync()` + DebugStatus.repl_task: str = repl_owner # TODO: ensure we aggressively make the user aware about - # entering the global ``breakpoint()`` built-in from sync + # entering the global `breakpoint()` built-in from sync # code? + message += ( + f'-> successfully scheduled `._pause()` in `trio` thread on behalf of {bg_task}\n' + f'-> Entering REPL via `tractor._set_trace()` from caller {repl_owner}\n' + ) + log.devx(message) + + DebugStatus.repl = repl _set_trace( - api_frame=inspect.currentframe(), - repl=mdb, + api_frame=api_frame or inspect.currentframe(), + repl=repl, hide_tb=hide_tb, actor=actor, - task=task, + task=repl_owner, ) # LEGACY NOTE on next LOC's frame showing weirdness.. # @@ -2155,6 +2461,26 @@ def pause_from_sync( raise err +def _sync_pause_from_builtin( + *args, + called_from_builtin=True, + **kwargs, +) -> None: + ''' + Proxy call `.pause_from_sync()` but indicate the caller is the + `breakpoint()` built-in. + + Note: this assigned to `os.environ['PYTHONBREAKPOINT']` inside `._root` + + ''' + pause_from_sync( + *args, + called_from_builtin=True, + api_frame=inspect.currentframe(), + **kwargs, + ) + + # NOTE prefer a new "pause" semantic since it better describes # "pausing the actor's runtime" for this particular # paralell task to do debugging in a REPL. @@ -2406,7 +2732,6 @@ async def maybe_wait_for_debugger( and not Lock.req_handler_finished.is_set() and in_debug is not None ): - # caller_frame_info: str = pformat_caller_frame() logmeth( msg @@ -2421,7 +2746,7 @@ async def maybe_wait_for_debugger( with trio.CancelScope(shield=True): await Lock.req_handler_finished.wait() - log.pdb( + log.devx( f'Subactor released debug lock\n' f'|_{in_debug}\n' ) @@ -2453,13 +2778,6 @@ async def maybe_wait_for_debugger( await trio.sleep(poll_delay) continue - # fallthrough on failure to acquire.. - # else: - # raise RuntimeError( - # msg - # + - # 'Root actor failed to acquire debug lock?' - # ) return True # else: -- 2.34.1 From 6754a801863ccfe6bba4d498b0a9c8d970d9354d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 17:46:10 -0400 Subject: [PATCH 152/305] Make big TODO: for `devx._debug` refinements Hopefully would make grok-ing this fairly sophisticated sub-sys possible for any up-and-coming `tractor` hacker XD A lot of internal API and re-org ideas I discovered/realized as part of finishing the `__pld_spec__` and multi-threaded support. Particularly better isolation between root-actor vs subactor task APIs and generally less globally-state-ful stuff like `DebugStatus` and `Lock` method APIs would likely make a lot of the hard to follow edge cases more clear? --- tractor/devx/_debug.py | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 3218cffa..5578e8a6 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -95,8 +95,38 @@ log = get_logger(__name__) # TODO: refine the internal impl and APIs in this module! # -# -[ ] separate `._pause()` branch-cases for calling from a root task -# vs. from subactors +# -[ ] rework `._pause()` and it's branch-cases for root vs. +# subactor: +# -[ ] `._pause_from_root()` + `_pause_from_subactor()`? +# -[ ] do the de-factor based on bg-thread usage in +# `.pause_from_sync()` & `_pause_from_bg_root_thread()`. +# -[ ] drop `debug_func == None` case which is confusing af.. +# -[ ] factor out `_enter_repl_sync()` into a util func for calling +# the `_set_trace()` / `_post_mortem()` APIs? +# +# -[ ] figure out if we need `acquire_debug_lock()` and/or re-implement +# it as part of the `.pause_from_sync()` rework per above? +# +# -[ ] pair the `._pause_from_subactor()` impl with a "debug nursery" +# that's dynamically allocated inside the `._rpc` task thus +# avoiding the `._service_n.start()` usage for the IPC request? +# -[ ] see the TODO inside `._rpc._errors_relayed_via_ipc()` +# +# -[ ] impl a `open_debug_request()` which encaps all +# `request_root_stdio_lock()` task scheduling deats +# + `DebugStatus` state mgmt; which should prolly be re-branded as +# a `DebugRequest` type anyway AND with suppoort for bg-thread +# (from root actor) usage? +# +# -[ ] handle the `xonsh` case for bg-root-threads in the SIGINT +# handler! +# -[ ] do we need to do the same for subactors? +# -[ ] make the failing tests finally pass XD +# +# -[ ] simplify `maybe_wait_for_debugger()` to be a root-task only +# API? +# -[ ] currently it's implemented as that so might as well make it +# formal? def hide_runtime_frames() -> dict[FunctionType, CodeType]: -- 2.34.1 From 85fd312c22936fee3bfc2f227ca5d71a592cdb1f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 17:57:43 -0400 Subject: [PATCH 153/305] Use new `._debug._repl_fail_msg` inside `test_pause_from_sync` --- tests/test_debugger.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index c95c4f95..e4f28548 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -12,7 +12,6 @@ TODO: """ from functools import partial import itertools -from typing import Optional import platform import pathlib import time @@ -30,6 +29,7 @@ from tractor._testing import ( from tractor.devx._debug import ( _pause_msg, _crash_msg, + _repl_fail_msg, ) from .conftest import ( _ci_env, @@ -291,7 +291,7 @@ def do_ctlc( child, count: int = 3, delay: float = 0.1, - patt: Optional[str] = None, + patt: str|None = None, # expect repl UX to reprint the prompt after every # ctrl-c send. @@ -1304,7 +1304,7 @@ def test_shield_pause( [ _crash_msg, "('cancelled_before_pause'", # actor name - "Failed to engage debugger via `_pause()`", + _repl_fail_msg, "trio.Cancelled", "raise Cancelled._create()", @@ -1322,7 +1322,7 @@ def test_shield_pause( [ _crash_msg, "('root'", # actor name - "Failed to engage debugger via `_pause()`", + _repl_fail_msg, "trio.Cancelled", "raise Cancelled._create()", -- 2.34.1 From dee312cae1ef793dcc2223fc1cf7843725d2c863 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Jun 2024 19:16:21 -0400 Subject: [PATCH 154/305] Use `_debug._sync_pause_from_builtin()` as `breakpoint()` override --- tractor/_root.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/_root.py b/tractor/_root.py index 4ddfde51..bbd9d93d 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -120,7 +120,7 @@ async def open_root_actor( ) ): os.environ['PYTHONBREAKPOINT'] = ( - 'tractor.devx._debug.pause_from_sync' + 'tractor.devx._debug._sync_pause_from_builtin' ) _state._runtime_vars['use_greenback'] = True -- 2.34.1 From f5b1d0179e774537ffe3e8545a5dbc16b804277a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 14 Jun 2024 15:27:35 -0400 Subject: [PATCH 155/305] Add `@context(pld_spec=)` TODO list Longer run we don't want `tractor` app devs having to call `msg._ops.limit_plds()` from every child endpoint.. so this starts a list of decorator API ideas and obviously ties in with an ideal final API design that will come with py3.13 and typed funcs. Obviously this is directly fueled by, - https://github.com/goodboy/tractor/issues/365 Other, - type with direct `trio.lowlevel.Task` import. - use `log.exception()` to show tbs for all error-terminations in `.open_context()` (for now) and always explicitly mention the `.side`. --- tractor/_context.py | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index dd4ad846..20ca29f6 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -58,6 +58,7 @@ from typing import ( import warnings # ------ - ------ import trio +from trio.lowlevel import Task # ------ - ------ from ._exceptions import ( ContextCancelled, @@ -121,7 +122,7 @@ class Unresolved: @dataclass class Context: ''' - An inter-actor, SC transitive, `trio.Task` communication context. + An inter-actor, SC transitive, `Task` communication context. NB: This class should **never be instatiated directly**, it is allocated by the runtime in 2 ways: @@ -134,7 +135,7 @@ class Context: Allows maintaining task or protocol specific state between 2 cancel-scope-linked, communicating and parallel executing - `trio.Task`s. Contexts are allocated on each side of any task + `Task`s. Contexts are allocated on each side of any task RPC-linked msg dialog, i.e. for every request to a remote actor from a `Portal`. On the "callee" side a context is always allocated inside `._rpc._invoke()`. @@ -214,7 +215,7 @@ class Context: # which is exactly the primitive that allows for # cross-actor-task-supervision and thus SC. _scope: trio.CancelScope|None = None - _task: trio.lowlevel.Task|None = None + _task: Task|None = None # TODO: cs around result waiting so we can cancel any # permanently blocking `._rx_chan.receive()` call in @@ -258,14 +259,14 @@ class Context: # a call to `.cancel()` which triggers `ContextCancelled`. _cancel_msg: str|dict|None = None - # NOTE: this state var used by the runtime to determine if the + # NOTE: this state-var is used by the runtime to determine if the # `pdbp` REPL is allowed to engage on contexts terminated via # a `ContextCancelled` due to a call to `.cancel()` triggering # "graceful closure" on either side: # - `._runtime._invoke()` will check this flag before engaging # the crash handler REPL in such cases where the "callee" # raises the cancellation, - # - `.devx._debug.lock_tty_for_child()` will set it to `False` if + # - `.devx._debug.lock_stdio_for_peer()` will set it to `False` if # the global tty-lock has been configured to filter out some # actors from being able to acquire the debugger lock. _enter_debugger_on_cancel: bool = True @@ -861,7 +862,7 @@ class Context: ) -> None: ''' Cancel this inter-actor IPC context by requestng the - remote side's cancel-scope-linked `trio.Task` by calling + remote side's cancel-scope-linked `Task` by calling `._scope.cancel()` and delivering an `ContextCancelled` ack msg in reponse. @@ -1030,7 +1031,7 @@ class Context: # XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption # for "graceful cancellation" case: # - # Whenever a "side" of a context (a `trio.Task` running in + # Whenever a "side" of a context (a `Task` running in # an actor) **is** the side which requested ctx # cancellation (likekly via ``Context.cancel()``), we # **don't** want to re-raise any eventually received @@ -1089,7 +1090,8 @@ class Context: else: log.warning( 'Local error already set for ctx?\n' - f'{self._local_error}\n' + f'{self._local_error}\n\n' + f'{self}' ) return remote_error @@ -2117,8 +2119,9 @@ async def open_context_from_portal( # the `ContextCancelled` "self cancellation absorbed" case # handled in the block above ^^^ !! # await _debug.pause() - log.cancel( - 'Context terminated due to\n\n' + # log.cancel( + log.exception( + f'{ctx.side}-side of `Context` terminated with ' f'.outcome => {ctx.repr_outcome()}\n' ) @@ -2319,7 +2322,7 @@ async def open_context_from_portal( # type_only=True, ) log.cancel( - f'Context terminated due to local scope error:\n\n' + f'Context terminated due to local {ctx.side!r}-side error:\n\n' f'{ctx.chan.uid} => {outcome_str}\n' ) @@ -2385,15 +2388,25 @@ def mk_context( # TODO: use the new type-parameters to annotate this in 3.13? # -[ ] https://peps.python.org/pep-0718/#unknown-types +# -[ ] allow for `pld_spec` input(s) ideally breaking down, +# |_ `start: ParameterSpec`, +# |_ `started: TypeAlias`, +# |_ `yields: TypeAlias`, +# |_ `return: TypeAlias`, +# |_ `invalid_policy: str|Callable` ? +# -[ ] prolly implement the `@acm` wrapper using +# a `contextlib.ContextDecorator`? +# def context( func: Callable, ) -> Callable: ''' - Mark an (async) function as an SC-supervised, inter-`Actor`, - child-`trio.Task`, IPC endpoint otherwise known more - colloquially as a (RPC) "context". + Mark an async function as an SC-supervised, inter-`Actor`, RPC + scheduled child-side `Task`, IPC endpoint otherwise + known more colloquially as a (RPC) "context". - Functions annotated the fundamental IPC endpoint type offered by `tractor`. + Functions annotated the fundamental IPC endpoint type offered by + `tractor`. ''' # TODO: apply whatever solution ``mypy`` ends up picking for this: -- 2.34.1 From 69fb7beff87a476a7d5b10def3dd43835498e503 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 14 Jun 2024 15:49:30 -0400 Subject: [PATCH 156/305] Log tbs from non-RAE `._invoke()`-RPC-task errors `RemoteActorError`s show this by default in their `.__repr__()`, and we obvi capture and embed the src traceback in an `Error` msg prior to transit, but for logging it's also handy to see the tb of any set `Context._remote_error` on console especially when trying to decipher remote error details at their origin actor. Also improve the log message description using `ctx.repr_state` and show any `ctx.outcome`. --- tractor/_rpc.py | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 8a9b3487..a8cb010a 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -26,6 +26,7 @@ from contextlib import ( from functools import partial import inspect from pprint import pformat +import traceback from typing import ( Any, Callable, @@ -47,6 +48,7 @@ from ._context import ( ) from ._exceptions import ( ContextCancelled, + RemoteActorError, ModuleNotExposed, MsgTypeError, TransportClosed, @@ -197,7 +199,8 @@ async def _invoke_non_context( raise ipc_err else: log.exception( - f'Failed to respond to runtime RPC request for\n\n' + f'Failed to ack runtime RPC request\n\n' + f'{func} x=> {ctx.chan}\n\n' f'{ack}\n' ) @@ -414,7 +417,6 @@ async def _errors_relayed_via_ipc( async def _invoke( - actor: Actor, cid: str, chan: Channel, @@ -690,10 +692,6 @@ async def _invoke( boxed_type=trio.Cancelled, canceller=canceller, ) - # does this matter other then for - # consistentcy/testing? |_ no user code should be - # in this scope at this point.. - # ctx._local_error = ctxc raise ctxc # XXX: do we ever trigger this block any more? @@ -714,6 +712,11 @@ async def _invoke( # always set this (child) side's exception as the # local error on the context ctx._local_error: BaseException = scope_error + # ^-TODO-^ question, + # does this matter other then for + # consistentcy/testing? + # |_ no user code should be in this scope at this point + # AND we already set this in the block below? # if a remote error was set then likely the # exception group was raised due to that, so @@ -740,22 +743,32 @@ async def _invoke( logmeth: Callable = log.runtime merr: Exception|None = ctx.maybe_error - descr_str: str = 'with final result `{repr(ctx.outcome)}`' - message: str = ( - f'IPC context terminated {descr_str}\n\n' + message: str = 'IPC context terminated ' + descr_str: str = ( + f'after having {ctx.repr_state!r}\n' ) if merr: - descr_str: str = ( - f'with ctx having {ctx.repr_state!r}\n' - f'{ctx.repr_outcome()}\n' - ) + + logmeth: Callable = log.error if isinstance(merr, ContextCancelled): logmeth: Callable = log.runtime - else: - logmeth: Callable = log.error - message += f'\n{merr!r}\n' - logmeth(message) + if not isinstance(merr, RemoteActorError): + tb_str: str = ''.join(traceback.format_exception(merr)) + descr_str += ( + f'\n{merr!r}\n' # needed? + f'{tb_str}\n' + ) + else: + descr_str += f'\n{merr!r}\n' + else: + descr_str += f'\nand final result {ctx.outcome!r}\n' + + logmeth( + message + + + descr_str + ) async def try_ship_error_to_remote( -- 2.34.1 From aee1bf8456706f4d560ce6472d96e630504cda57 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 16 Jun 2024 22:50:43 -0400 Subject: [PATCH 157/305] Offer a `@context(pld_spec=)` API Instead of the WIP/prototyped `Portal.open_context()` offering a `pld_spec` input arg, this changes to a proper decorator API for specifying the "payload spec" on `@context` endpoints. The impl change details actually cover 2-birds: - monkey patch decorated functions with a new `._tractor_context_meta: dict[str, Any]` and insert any provided input `@context` kwargs: `_pld_spec`, `enc_hook`, `enc_hook`. - use `inspect.get_annotations()` to scan for a `func` arg type-annotated with `tractor.Context` and use the name of that arg as the RPC task-side injected `Context`, thus injecting the needed arg by type instead of by name (a longstanding TODO); raise a type-error when not found. - pull the `pld_spec` from the `._tractor_context_meta` attr both in the `.open_context()` parent-side and child-side `._invoke()`-cation of the RPC task and use the `msg._ops.maybe_limit_plds()` API to apply it internally in the runtime for each case. --- tractor/_context.py | 89 +++++++++++++++++++++++++++++++++++---------- tractor/_rpc.py | 25 +++++++++++-- 2 files changed, 92 insertions(+), 22 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 20ca29f6..c61af15a 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1792,7 +1792,6 @@ async def open_context_from_portal( portal: Portal, func: Callable, - pld_spec: TypeAlias|None = None, allow_overruns: bool = False, hide_tb: bool = True, @@ -1838,12 +1837,20 @@ async def open_context_from_portal( # NOTE: 2 bc of the wrapping `@acm` __runtimeframe__: int = 2 # noqa - # conduct target func method structural checks - if not inspect.iscoroutinefunction(func) and ( - getattr(func, '_tractor_contex_function', False) + # if NOT an async func but decorated with `@context`, error. + if ( + not inspect.iscoroutinefunction(func) + and getattr(func, '_tractor_context_meta', False) ): raise TypeError( - f'{func} must be an async generator function!') + f'{func!r} must be an async function!' + ) + + ctx_meta: dict[str, Any]|None = getattr( + func, + '_tractor_context_meta', + None, + ) # TODO: i think from here onward should probably # just be factored into an `@acm` inside a new @@ -1890,12 +1897,9 @@ async def open_context_from_portal( trio.open_nursery() as tn, msgops.maybe_limit_plds( ctx=ctx, - spec=pld_spec, - ) as maybe_msgdec, + spec=ctx_meta.get('pld_spec'), + ), ): - if maybe_msgdec: - assert maybe_msgdec.pld_spec == pld_spec - # NOTE: this in an implicit runtime nursery used to, # - start overrun queuing tasks when as well as # for cancellation of the scope opened by the user. @@ -2398,7 +2402,15 @@ def mk_context( # a `contextlib.ContextDecorator`? # def context( - func: Callable, + func: Callable|None = None, + + *, + + # must be named! + pld_spec: Union[Type]|TypeAlias = Any, + dec_hook: Callable|None = None, + enc_hook: Callable|None = None, + ) -> Callable: ''' Mark an async function as an SC-supervised, inter-`Actor`, RPC @@ -2409,15 +2421,54 @@ def context( `tractor`. ''' + # XXX for the `@context(pld_spec=MyMsg|None)` case + if func is None: + return partial( + context, + pld_spec=pld_spec, + dec_hook=dec_hook, + enc_hook=enc_hook, + ) + + # TODO: from this, enforcing a `Start.sig` type + # check when invoking RPC tasks by ensuring the input + # args validate against the endpoint def. + sig: inspect.Signature = inspect.signature(func) + # params: inspect.Parameters = sig.parameters + + # https://docs.python.org/3/library/inspect.html#inspect.get_annotations + annots: dict[str, Type] = inspect.get_annotations( + func, + eval_str=True, + ) + name: str + param: Type + for name, param in annots.items(): + if param is Context: + ctx_var_name: str = name + break + else: + raise TypeError( + 'At least one (normally the first) argument to the `@context` function ' + f'{func.__name__!r} must be typed as `tractor.Context`, for ex,\n\n' + f'`ctx: tractor.Context`\n' + ) + # TODO: apply whatever solution ``mypy`` ends up picking for this: # https://github.com/python/mypy/issues/2087#issuecomment-769266912 - func._tractor_context_function = True # type: ignore + # func._tractor_context_function = True # type: ignore + func._tractor_context_meta: dict[str, Any] = { + 'ctx_var_name': ctx_var_name, + # `msgspec` related settings + 'pld_spec': pld_spec, + 'enc_hook': enc_hook, + 'dec_hook': dec_hook, - sig: inspect.Signature = inspect.signature(func) - params: Mapping = sig.parameters - if 'ctx' not in params: - raise TypeError( - "The first argument to the context function " - f"{func.__name__} must be `ctx: tractor.Context`" - ) + # TODO: eventually we need to "signature-check" with these + # vs. the `Start` msg fields! + # => this would allow for TPC endpoint argument-type-spec + # limiting and we could then error on + # invalid inputs passed to `.open_context(rpc_ep, arg0='blah')` + 'sig': sig, + } return func diff --git a/tractor/_rpc.py b/tractor/_rpc.py index a8cb010a..37fea079 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -68,6 +68,7 @@ from .msg import ( PayloadT, NamespacePath, pretty_struct, + _ops as msgops, ) from tractor.msg.types import ( CancelAck, @@ -499,8 +500,19 @@ async def _invoke( # handle decorated ``@tractor.context`` async function - elif getattr(func, '_tractor_context_function', False): - kwargs['ctx'] = ctx + # - pull out any typed-pld-spec info and apply (below) + # - (TODO) store func-ref meta data for API-frame-info logging + elif ( + ctx_meta := getattr( + func, + '_tractor_context_meta', + False, + ) + ): + # kwargs['ctx'] = ctx + # set the required `tractor.Context` typed input argument to + # the allocated RPC task context. + kwargs[ctx_meta['ctx_var_name']] = ctx context_ep_func = True # errors raised inside this block are propgated back to caller @@ -594,7 +606,14 @@ async def _invoke( # `@context` marked RPC function. # - `._portal` is never set. try: - async with trio.open_nursery() as tn: + async with ( + trio.open_nursery() as tn, + msgops.maybe_limit_plds( + ctx=ctx, + spec=ctx_meta.get('pld_spec'), + dec_hook=ctx_meta.get('dec_hook'), + ), + ): ctx._scope_nursery = tn ctx._scope = tn.cancel_scope task_status.started(ctx) -- 2.34.1 From e8fee545344db4201ea8e1bd4ba0194e763b0201 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 09:01:13 -0400 Subject: [PATCH 158/305] Port debug request ep to use `@context(pld_spec)` Namely passing the `.__pld_spec__` directly to the `lock_stdio_for_peer()` decorator B) Also, allows dropping `apply_debug_pldec()` (which was a todo) and removing a `lock_stdio_for_peer()` indent level. --- tractor/devx/_debug.py | 150 +++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 96 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 5578e8a6..ccf57d62 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -68,6 +68,7 @@ from trio import ( ) import tractor from tractor.log import get_logger +from tractor._context import Context from tractor._state import ( current_actor, is_root_process, @@ -83,7 +84,6 @@ if TYPE_CHECKING: from trio.lowlevel import Task from threading import Thread from tractor._ipc import Channel - from tractor._context import Context from tractor._runtime import ( Actor, ) @@ -529,7 +529,10 @@ class Lock: ) -@tractor.context +@tractor.context( + # enable the locking msgspec + pld_spec=__pld_spec__, +) async def lock_stdio_for_peer( ctx: Context, subactor_task_uid: tuple[str, int], @@ -597,61 +600,55 @@ async def lock_stdio_for_peer( # scope despite the shielding we apply below. debug_lock_cs: CancelScope = ctx._scope - # TODO: use `.msg._ops.maybe_limit_plds()` here instead so we - # can merge into a single async with, with the - # `Lock.acquire_for_ctx()` enter below? - # - # enable the locking msgspec - with apply_debug_pldec(): - async with Lock.acquire_for_ctx(ctx=ctx): - debug_lock_cs.shield = True + async with Lock.acquire_for_ctx(ctx=ctx): + debug_lock_cs.shield = True - log.devx( - 'Subactor acquired debugger request lock!\n' - f'root task: {root_task_name}\n' - f'subactor_uid: {subactor_uid}\n' - f'remote task: {subactor_task_uid}\n\n' + log.devx( + 'Subactor acquired debugger request lock!\n' + f'root task: {root_task_name}\n' + f'subactor_uid: {subactor_uid}\n' + f'remote task: {subactor_task_uid}\n\n' - 'Sending `ctx.started(LockStatus)`..\n' + 'Sending `ctx.started(LockStatus)`..\n' - ) - - # indicate to child that we've locked stdio - await ctx.started( - LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=True, - ) - ) - - log.devx( - f'Actor {subactor_uid} acquired `Lock` via debugger request' - ) - - # wait for unlock pdb by child - async with ctx.open_stream() as stream: - release_msg: LockRelease = await stream.receive() - - # TODO: security around only releasing if - # these match? - log.devx( - f'TTY lock released requested\n\n' - f'{release_msg}\n' - ) - assert release_msg.cid == ctx.cid - assert release_msg.subactor_uid == tuple(subactor_uid) - - log.devx( - f'Actor {subactor_uid} released TTY lock' - ) - - return LockStatus( - subactor_uid=subactor_uid, - cid=ctx.cid, - locked=False, ) + # indicate to child that we've locked stdio + await ctx.started( + LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=True, + ) + ) + + log.devx( + f'Actor {subactor_uid} acquired `Lock` via debugger request' + ) + + # wait for unlock pdb by child + async with ctx.open_stream() as stream: + release_msg: LockRelease = await stream.receive() + + # TODO: security around only releasing if + # these match? + log.devx( + f'TTY lock released requested\n\n' + f'{release_msg}\n' + ) + assert release_msg.cid == ctx.cid + assert release_msg.subactor_uid == tuple(subactor_uid) + + log.devx( + f'Actor {subactor_uid} released TTY lock' + ) + + return LockStatus( + subactor_uid=subactor_uid, + cid=ctx.cid, + locked=False, + ) + except BaseException as req_err: message: str = ( f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' @@ -1037,48 +1034,6 @@ class PdbREPL(pdbp.Pdb): return None -# TODO: prolly remove this and instead finally get our @context API -# supporting a msg/pld-spec via type annots as per, -# https://github.com/goodboy/tractor/issues/365 -@cm -def apply_debug_pldec() -> _codec.MsgCodec: - ''' - Apply the subactor TTY `Lock`-ing protocol's msgspec temporarily - (only in the current task). - - ''' - from tractor.msg import ( - _ops as msgops, - ) - cctx: Context = current_ipc_ctx() - rx: msgops.PldRx = cctx.pld_rx - orig_pldec: msgops.MsgDec = rx.pld_dec - - try: - with msgops.limit_plds( - spec=__pld_spec__, - ) as debug_dec: - assert ( - debug_dec - is - rx.pld_dec - ) - log.runtime( - 'Applied `.devx._debug` pld-spec\n\n' - f'{debug_dec}\n' - ) - yield debug_dec - - finally: - assert ( - rx.pld_dec is orig_pldec - ) - log.runtime( - 'Reverted to previous pld-spec\n\n' - f'{orig_pldec}\n' - ) - - async def request_root_stdio_lock( actor_uid: tuple[str, str], task_uid: tuple[str, int], @@ -1147,6 +1102,7 @@ async def request_root_stdio_lock( async with portal.open_context( lock_stdio_for_peer, subactor_task_uid=task_uid, + # NOTE: set it here in the locker request task bc it's # possible for multiple such requests for the lock in any # single sub-actor AND there will be a race between when the @@ -1159,7 +1115,7 @@ async def request_root_stdio_lock( # this IPC-ctx request task, NOT any other task(s) # including the one that actually enters the REPL. This # is oc desired bc ow the debugged task will msg-type-error. - pld_spec=__pld_spec__, + # pld_spec=__pld_spec__, ) as (req_ctx, status): @@ -2856,7 +2812,9 @@ def open_crash_handler( @cm -def maybe_open_crash_handler(pdb: bool = False): +def maybe_open_crash_handler( + pdb: bool = False, +): ''' Same as `open_crash_handler()` but with bool input flag to allow conditional handling. -- 2.34.1 From ed42aa7e657eb97ea4f400adb5c77231d9ea367b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 09:23:31 -0400 Subject: [PATCH 159/305] Proxy through `dec_hook` in `.limit_plds()` APIs --- tractor/msg/_ops.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 86f80395..80633e7e 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -27,6 +27,7 @@ from contextlib import ( ) from typing import ( Any, + Callable, Type, TYPE_CHECKING, Union, @@ -138,6 +139,7 @@ class PldRx(Struct): def limit_plds( self, spec: Union[Type[Struct]], + **dec_kwargs, ) -> MsgDec: ''' @@ -147,7 +149,10 @@ class PldRx(Struct): ''' orig_dec: MsgDec = self._pld_dec - limit_dec: MsgDec = mk_dec(spec=spec) + limit_dec: MsgDec = mk_dec( + spec=spec, + **dec_kwargs, + ) try: self._pld_dec = limit_dec yield limit_dec @@ -449,7 +454,7 @@ class PldRx(Struct): @cm def limit_plds( spec: Union[Type[Struct]], - **kwargs, + **dec_kwargs, ) -> MsgDec: ''' @@ -467,7 +472,7 @@ def limit_plds( with rx.limit_plds( spec=spec, - **kwargs, + **dec_kwargs, ) as pldec: log.runtime( 'Applying payload-decoder\n\n' @@ -487,7 +492,9 @@ def limit_plds( async def maybe_limit_plds( ctx: Context, spec: Union[Type[Struct]]|None = None, + dec_hook: Callable|None = None, **kwargs, + ) -> MsgDec|None: ''' Async compat maybe-payload type limiter. @@ -497,7 +504,11 @@ async def maybe_limit_plds( used. ''' - if spec is None: + if ( + spec is None + and + dec_hook is None + ): yield None return @@ -505,7 +516,11 @@ async def maybe_limit_plds( curr_ctx: Context = current_ipc_ctx() assert ctx is curr_ctx - with ctx._pld_rx.limit_plds(spec=spec) as msgdec: + with ctx._pld_rx.limit_plds( + spec=spec, + dec_hook=dec_hook, + **kwargs, + ) as msgdec: yield msgdec curr_ctx: Context = current_ipc_ctx() -- 2.34.1 From e863159c7f4667dacf14c37d13105e5b991f13ce Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 09:24:03 -0400 Subject: [PATCH 160/305] Update pld-rx limiting test(s) to use deco input The tests only use one input spec (conveniently) so there's not much to change in the logic, - only pass the `maybe_msg_spec` to the child-side decorator and obvi drop the surrounding `msgops.limit_plds()` block in the child. - tweak a few `MsgDec` asserts, mostly dropping the `msg._ops._def_any_spec` state checks since the child-side won't have any pre pld-spec state given the runtime now applies the `pld_spec` before running the task's func body. - also allowed dropping the `finally:` which did a similar check outside the `.limit_plds()` block. --- tests/test_pldrx_limiting.py | 203 +++++++++++++++++------------------ 1 file changed, 99 insertions(+), 104 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index ddf2a234..e5ce691a 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -7,9 +7,6 @@ related settings around IPC contexts. from contextlib import ( asynccontextmanager as acm, ) -from contextvars import ( - Context, -) from msgspec import ( Struct, @@ -19,6 +16,7 @@ import trio import tractor from tractor import ( + Context, MsgTypeError, current_ipc_ctx, Portal, @@ -35,7 +33,17 @@ from tractor.msg.types import ( ) -class PldMsg(Struct): +class PldMsg( + Struct, + + # TODO: with multiple structs in-spec we need to tag them! + # -[ ] offer a built-in `PldMsg` type to inherit from which takes + # case of these details? + # + # https://jcristharif.com/msgspec/structs.html#tagged-unions + # tag=True, + # tag_field='msg_type', +): field: str @@ -96,12 +104,14 @@ async def maybe_expect_raises( ) -@tractor.context +@tractor.context( + pld_spec=maybe_msg_spec, +) async def child( ctx: Context, started_value: int|PldMsg|None, return_value: str|None, - validate_pld_spec: bool, + validate_pld_spec: bool, raise_on_started_mte: bool = True, ) -> None: @@ -116,113 +126,99 @@ async def child( assert ctx is curr_ctx rx: msgops.PldRx = ctx._pld_rx - orig_pldec: _codec.MsgDec = rx.pld_dec - # senity that default pld-spec should be set - assert ( - rx.pld_dec - is - msgops._def_any_pldec + curr_pldec: _codec.MsgDec = rx.pld_dec + + ctx_meta: dict = getattr( + child, + '_tractor_context_meta', + None, ) + if ctx_meta: + assert ( + ctx_meta['pld_spec'] + is curr_pldec.spec + is curr_pldec.pld_spec + ) + # 2 cases: hdndle send-side and recv-only validation + # - when `raise_on_started_mte == True`, send validate + # - else, parent-recv-side only validation + mte: MsgTypeError|None = None try: - with msgops.limit_plds( - spec=maybe_msg_spec, - ) as pldec: - # sanity on `MsgDec` state - assert rx.pld_dec is pldec - assert pldec.spec is maybe_msg_spec + await ctx.started( + value=started_value, + validate_pld_spec=validate_pld_spec, + ) - # 2 cases: hdndle send-side and recv-only validation - # - when `raise_on_started_mte == True`, send validate - # - else, parent-recv-side only validation - mte: MsgTypeError|None = None - try: - await ctx.started( - value=started_value, - validate_pld_spec=validate_pld_spec, - ) - - except MsgTypeError as _mte: - mte = _mte - log.exception('started()` raised an MTE!\n') - if not expect_started_mte: - raise RuntimeError( - 'Child-ctx-task SHOULD NOT HAVE raised an MTE for\n\n' - f'{started_value!r}\n' - ) - - boxed_div: str = '------ - ------' - assert boxed_div not in mte._message - assert boxed_div not in mte.tb_str - assert boxed_div not in repr(mte) - assert boxed_div not in str(mte) - mte_repr: str = repr(mte) - for line in mte.message.splitlines(): - assert line in mte_repr - - # since this is a *local error* there should be no - # boxed traceback content! - assert not mte.tb_str - - # propagate to parent? - if raise_on_started_mte: - raise - - # no-send-side-error fallthrough - if ( - validate_pld_spec - and - expect_started_mte - ): - raise RuntimeError( - 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' - f'{started_value!r}\n' - ) - - assert ( - not expect_started_mte - or - not validate_pld_spec + except MsgTypeError as _mte: + mte = _mte + log.exception('started()` raised an MTE!\n') + if not expect_started_mte: + raise RuntimeError( + 'Child-ctx-task SHOULD NOT HAVE raised an MTE for\n\n' + f'{started_value!r}\n' ) - # if wait_for_parent_to_cancel: - # ... - # - # ^-TODO-^ logic for diff validation policies on each side: - # - # -[ ] ensure that if we don't validate on the send - # side, that we are eventually error-cancelled by our - # parent due to the bad `Started` payload! - # -[ ] the boxed error should be srced from the parent's - # runtime NOT ours! - # -[ ] we should still error on bad `return_value`s - # despite the parent not yet error-cancelling us? - # |_ how do we want the parent side to look in that - # case? - # -[ ] maybe the equiv of "during handling of the - # above error another occurred" for the case where - # the parent sends a MTE to this child and while - # waiting for the child to terminate it gets back - # the MTE for this case? - # + boxed_div: str = '------ - ------' + assert boxed_div not in mte._message + assert boxed_div not in mte.tb_str + assert boxed_div not in repr(mte) + assert boxed_div not in str(mte) + mte_repr: str = repr(mte) + for line in mte.message.splitlines(): + assert line in mte_repr - # XXX should always fail on recv side since we can't - # really do much else beside terminate and relay the - # msg-type-error from this RPC task ;) - return return_value + # since this is a *local error* there should be no + # boxed traceback content! + assert not mte.tb_str - finally: - # sanity on `limit_plds()` reversion - assert ( - rx.pld_dec - is - msgops._def_any_pldec - ) - log.runtime( - 'Reverted to previous pld-spec\n\n' - f'{orig_pldec}\n' + # propagate to parent? + if raise_on_started_mte: + raise + + # no-send-side-error fallthrough + if ( + validate_pld_spec + and + expect_started_mte + ): + raise RuntimeError( + 'Child-ctx-task SHOULD HAVE raised an MTE for\n\n' + f'{started_value!r}\n' ) + assert ( + not expect_started_mte + or + not validate_pld_spec + ) + + # if wait_for_parent_to_cancel: + # ... + # + # ^-TODO-^ logic for diff validation policies on each side: + # + # -[ ] ensure that if we don't validate on the send + # side, that we are eventually error-cancelled by our + # parent due to the bad `Started` payload! + # -[ ] the boxed error should be srced from the parent's + # runtime NOT ours! + # -[ ] we should still error on bad `return_value`s + # despite the parent not yet error-cancelling us? + # |_ how do we want the parent side to look in that + # case? + # -[ ] maybe the equiv of "during handling of the + # above error another occurred" for the case where + # the parent sends a MTE to this child and while + # waiting for the child to terminate it gets back + # the MTE for this case? + # + + # XXX should always fail on recv side since we can't + # really do much else beside terminate and relay the + # msg-type-error from this RPC task ;) + return return_value + @pytest.mark.parametrize( 'return_value', @@ -321,7 +317,6 @@ def test_basic_payload_spec( child, return_value=return_value, started_value=started_value, - pld_spec=maybe_msg_spec, validate_pld_spec=pld_check_started_value, ) as (ctx, first), ): -- 2.34.1 From d3680bfe6a7a62fe2ee2e49a5566af890d01a36f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 10:32:38 -0400 Subject: [PATCH 161/305] Add note about using `@acm` as decorator in 3.10 --- tractor/_context.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tractor/_context.py b/tractor/_context.py index c61af15a..686a648b 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -2399,7 +2399,11 @@ def mk_context( # |_ `return: TypeAlias`, # |_ `invalid_policy: str|Callable` ? # -[ ] prolly implement the `@acm` wrapper using -# a `contextlib.ContextDecorator`? +# a `contextlib.ContextDecorator`, i guess not if +# we don't need an `__aexit__` block right? +# |_ de hecho, @acm can already be used as a decorator as of 3.10 +# but i dunno how that's gonna play with `trio.Nursery.start[_soon]()` +# |_ https://docs.python.org/3/library/contextlib.html#using-a-context-manager-as-a-function-decorator # def context( func: Callable|None = None, -- 2.34.1 From b517dacf0ae5d016cffcc1dbf5708f608ee137d9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 10:32:50 -0400 Subject: [PATCH 162/305] Don't pass `ipc_msg` for send side MTEs Just pass `_bad_msg` such that it get's injected to `.msgdata` since with a send-side `MsgTypeError` we don't have a remote `._ipc_msg: Error` per say to include. --- tractor/_exceptions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 92c3fafb..8ed46ebc 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -441,7 +441,8 @@ class RemoteActorError(Exception): for key in fields: if ( - key == 'relay_uid' and not self.is_inception() + key == 'relay_uid' + and not self.is_inception() ): continue @@ -1291,8 +1292,7 @@ def _mk_msg_type_err( msgtyperr = MsgTypeError( message=message, - ipc_msg=msg, - bad_msg=msg, + _bad_msg=msg, ) # ya, might be `None` msgtyperr.__cause__ = src_type_error -- 2.34.1 From 8dcc49fce2402cdba098925459c926429f7a2b2c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 17 Jun 2024 13:12:16 -0400 Subject: [PATCH 163/305] Break `_mk_msg_type_err()` into recv/send side funcs Name them `_mk_send_mte()`/`_mk_recv_mte()` and change the runtime to call each appropriately depending on location/usage. Also add some dynamic call-frame "unhide" blocks such that when we expect raised MTE from the aboves calls but we get a different unexpected error from the runtime, we ensure the call stack downward is shown in tbs/pdb. |_ ideally in the longer run we come up with a fancier dynamic sys for this, prolly something in `.devx._frame_stack`? --- tractor/_context.py | 30 ++--- tractor/_exceptions.py | 258 ++++++++++++++++++++++------------------- tractor/_ipc.py | 12 +- tractor/msg/_ops.py | 36 +++--- 4 files changed, 174 insertions(+), 162 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 686a648b..be492e4e 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -49,7 +49,6 @@ from typing import ( Any, AsyncGenerator, Callable, - Mapping, Type, TypeAlias, TYPE_CHECKING, @@ -1484,13 +1483,21 @@ class Context: # __tracebackhide__: bool = hide_tb if validate_pld_spec: - msgops.validate_payload_msg( - pld_msg=started_msg, - pld_value=value, - ipc=self, - strict_pld_parity=strict_pld_parity, - hide_tb=hide_tb, - ) + # TODO: prolly wrap this as a `show_frame_when_not()` + try: + msgops.validate_payload_msg( + pld_msg=started_msg, + pld_value=value, + ipc=self, + strict_pld_parity=strict_pld_parity, + hide_tb=hide_tb, + ) + except BaseException as err: + if not isinstance(err, MsgTypeError): + __tracebackhide__: bool = False + + raise + # TODO: maybe a flag to by-pass encode op if already done # here in caller? @@ -2185,11 +2192,6 @@ async def open_context_from_portal( try: result_or_err: Exception|Any = await ctx.result() except BaseException as berr: - # cancelled before (or maybe during?) final result capture - # if isinstance(trio.Cancelled, berr): - # from .devx import mk_pdb - # mk_pdb.set_trace() - # on normal teardown, if we get some error # raised in `Context.result()` we still want to # save that error on the ctx's state to @@ -2201,7 +2203,7 @@ async def open_context_from_portal( ctx._local_error: BaseException = scope_err raise - # yes! this worx Bp + # yes this worx! # from .devx import _debug # await _debug.pause() diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 8ed46ebc..7164d6ab 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -1232,14 +1232,13 @@ def _raise_from_unexpected_msg( _raise_from_no_key_in_msg = _raise_from_unexpected_msg -def _mk_msg_type_err( +def _mk_send_mte( msg: Any|bytes|MsgType, codec: MsgCodec|MsgDec, message: str|None = None, verb_header: str = '', - src_validation_error: ValidationError|None = None, src_type_error: TypeError|None = None, is_invalid_payload: bool = False, @@ -1247,131 +1246,148 @@ def _mk_msg_type_err( ) -> MsgTypeError: ''' - Compose a `MsgTypeError` from an input runtime context. + Compose a `MsgTypeError` from a `Channel.send()`-side error, + normally raised witih a runtime IPC `Context`. ''' - # `Channel.send()` case - if src_validation_error is None: + if isinstance(codec, MsgDec): + raise RuntimeError( + '`codec` must be a `MsgCodec` for send-side errors?' + ) - if isinstance(codec, MsgDec): - raise RuntimeError( - '`codec` must be a `MsgCodec` for send-side errors?' + from tractor.devx import ( + pformat_caller_frame, + ) + # no src error from `msgspec.msgpack.Decoder.decode()` so + # prolly a manual type-check on our part. + if message is None: + tb_fmt: str = pformat_caller_frame(stack_limit=3) + message: str = ( + f'invalid msg -> {msg}: {type(msg)}\n\n' + f'{tb_fmt}\n' + f'Valid IPC msgs are:\n\n' + f'{codec.msg_spec_str}\n', + ) + elif src_type_error: + src_message: str = str(src_type_error) + patt: str = 'type ' + type_idx: int = src_message.find('type ') + invalid_type: str = src_message[type_idx + len(patt):].split()[0] + + enc_hook: Callable|None = codec.enc.enc_hook + if enc_hook is None: + message += ( + '\n\n' + + f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" + f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' + + f'Check the `msgspec` docs for ad-hoc type extending:\n' + '|_ https://jcristharif.com/msgspec/extending.html\n' + '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' ) - from tractor.devx import ( - pformat_caller_frame, + msgtyperr = MsgTypeError( + message=message, + _bad_msg=msg, + ) + # ya, might be `None` + msgtyperr.__cause__ = src_type_error + return msgtyperr + + +def _mk_recv_mte( + msg: Any|bytes|MsgType, + codec: MsgCodec|MsgDec, + + message: str|None = None, + verb_header: str = '', + + src_validation_error: ValidationError|None = None, + is_invalid_payload: bool = False, + + **mte_kwargs, + +) -> MsgTypeError: + ''' + Compose a `MsgTypeError` from a + `Channel|Context|MsgStream.receive()`-side error, + normally raised witih a runtime IPC ctx or streaming + block. + + ''' + msg_dict: dict|None = None + bad_msg: PayloadMsg|None = None + + if is_invalid_payload: + msg_type: str = type(msg) + any_pld: Any = msgpack.decode(msg.pld) + message: str = ( + f'invalid `{msg_type.__qualname__}` msg payload\n\n' + f'value: `{any_pld!r}` does not match type-spec: ' + f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' ) - # no src error from `msgspec.msgpack.Decoder.decode()` so - # prolly a manual type-check on our part. - if message is None: - tb_fmt: str = pformat_caller_frame(stack_limit=3) - message: str = ( - f'invalid msg -> {msg}: {type(msg)}\n\n' - f'{tb_fmt}\n' - f'Valid IPC msgs are:\n\n' - f'{codec.msg_spec_str}\n', - ) - elif src_type_error: - src_message: str = str(src_type_error) - patt: str = 'type ' - type_idx: int = src_message.find('type ') - invalid_type: str = src_message[type_idx + len(patt):].split()[0] + bad_msg = msg - enc_hook: Callable|None = codec.enc.enc_hook - if enc_hook is None: - message += ( - '\n\n' - - f"The current IPC-msg codec can't encode type `{invalid_type}` !\n" - f'Maybe a `msgpack.Encoder.enc_hook()` extension is needed?\n\n' - - f'Check the `msgspec` docs for ad-hoc type extending:\n' - '|_ https://jcristharif.com/msgspec/extending.html\n' - '|_ https://jcristharif.com/msgspec/extending.html#defining-a-custom-extension-messagepack-only\n' - ) - - msgtyperr = MsgTypeError( - message=message, - _bad_msg=msg, - ) - # ya, might be `None` - msgtyperr.__cause__ = src_type_error - return msgtyperr - - # `Channel.recv()` case else: - msg_dict: dict|None = None - bad_msg: PayloadMsg|None = None - - if is_invalid_payload: - msg_type: str = type(msg) - any_pld: Any = msgpack.decode(msg.pld) - message: str = ( - f'invalid `{msg_type.__qualname__}` msg payload\n\n' - f'value: `{any_pld!r}` does not match type-spec: ' - f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' - ) - bad_msg = msg - - else: - # decode the msg-bytes using the std msgpack - # interchange-prot (i.e. without any `msgspec.Struct` - # handling) so that we can determine what - # `.msg.types.PayloadMsg` is the culprit by reporting the - # received value. - msg: bytes - msg_dict: dict = msgpack.decode(msg) - msg_type_name: str = msg_dict['msg_type'] - msg_type = getattr(msgtypes, msg_type_name) - message: str = ( - f'invalid `{msg_type_name}` IPC msg\n\n' - ) - # XXX be "fancy" and see if we can determine the exact - # invalid field such that we can comprehensively report - # the specific field's type problem. - msgspec_msg: str = src_validation_error.args[0].rstrip('`') - msg, _, maybe_field = msgspec_msg.rpartition('$.') - obj = object() - if (field_val := msg_dict.get(maybe_field, obj)) is not obj: - field_name_expr: str = ( - f' |_{maybe_field}: {codec.pld_spec_str} = ' - ) - fmt_val_lines: list[str] = pformat(field_val).splitlines() - fmt_val: str = ( - f'{fmt_val_lines[0]}\n' - + - textwrap.indent( - '\n'.join(fmt_val_lines[1:]), - prefix=' '*len(field_name_expr), - ) - ) - message += ( - f'{msg.rstrip("`")}\n\n' - f'<{msg_type.__qualname__}(\n' - # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' - f'{field_name_expr}{fmt_val}\n' - f')>' - ) - - if verb_header: - message = f'{verb_header} ' + message - - msgtyperr = MsgTypeError.from_decode( - message=message, - bad_msg=bad_msg, - bad_msg_as_dict=msg_dict, - boxed_type=type(src_validation_error), - - # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: - # - for the send-side `.started()` pld-validate - # case we actually raise inline so we don't need to - # set the it at all. - # - for recv side we set it inside `PldRx.decode_pld()` - # after a manual call to `pack_error()` since we - # actually want to emulate the `Error` from the mte we - # build here. So by default in that case, this is left - # as `None` here. - # ipc_msg=src_err_msg, + # decode the msg-bytes using the std msgpack + # interchange-prot (i.e. without any `msgspec.Struct` + # handling) so that we can determine what + # `.msg.types.PayloadMsg` is the culprit by reporting the + # received value. + msg: bytes + msg_dict: dict = msgpack.decode(msg) + msg_type_name: str = msg_dict['msg_type'] + msg_type = getattr(msgtypes, msg_type_name) + message: str = ( + f'invalid `{msg_type_name}` IPC msg\n\n' ) - msgtyperr.__cause__ = src_validation_error - return msgtyperr + # XXX be "fancy" and see if we can determine the exact + # invalid field such that we can comprehensively report + # the specific field's type problem. + msgspec_msg: str = src_validation_error.args[0].rstrip('`') + msg, _, maybe_field = msgspec_msg.rpartition('$.') + obj = object() + if (field_val := msg_dict.get(maybe_field, obj)) is not obj: + field_name_expr: str = ( + f' |_{maybe_field}: {codec.pld_spec_str} = ' + ) + fmt_val_lines: list[str] = pformat(field_val).splitlines() + fmt_val: str = ( + f'{fmt_val_lines[0]}\n' + + + textwrap.indent( + '\n'.join(fmt_val_lines[1:]), + prefix=' '*len(field_name_expr), + ) + ) + message += ( + f'{msg.rstrip("`")}\n\n' + f'<{msg_type.__qualname__}(\n' + # f'{".".join([msg_type.__module__, msg_type.__qualname__])}\n' + f'{field_name_expr}{fmt_val}\n' + f')>' + ) + + if verb_header: + message = f'{verb_header} ' + message + + msgtyperr = MsgTypeError.from_decode( + message=message, + bad_msg=bad_msg, + bad_msg_as_dict=msg_dict, + boxed_type=type(src_validation_error), + + # NOTE: for pld-spec MTEs we set the `._ipc_msg` manually: + # - for the send-side `.started()` pld-validate + # case we actually raise inline so we don't need to + # set the it at all. + # - for recv side we set it inside `PldRx.decode_pld()` + # after a manual call to `pack_error()` since we + # actually want to emulate the `Error` from the mte we + # build here. So by default in that case, this is left + # as `None` here. + # ipc_msg=src_err_msg, + ) + msgtyperr.__cause__ = src_validation_error + return msgtyperr diff --git a/tractor/_ipc.py b/tractor/_ipc.py index ec7d348a..e5e3d10f 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -49,7 +49,8 @@ from tractor._exceptions import ( MsgTypeError, pack_from_raise, TransportClosed, - _mk_msg_type_err, + _mk_send_mte, + _mk_recv_mte, ) from tractor.msg import ( _ctxvar_MsgCodec, @@ -256,7 +257,7 @@ class MsgpackTCPStream(MsgTransport): # and always raise such that spec violations # are never allowed to be caught silently! except msgspec.ValidationError as verr: - msgtyperr: MsgTypeError = _mk_msg_type_err( + msgtyperr: MsgTypeError = _mk_recv_mte( msg=msg_bytes, codec=codec, src_validation_error=verr, @@ -321,7 +322,7 @@ class MsgpackTCPStream(MsgTransport): if type(msg) not in msgtypes.__msg_types__: if strict_types: - raise _mk_msg_type_err( + raise _mk_send_mte( msg, codec=codec, ) @@ -333,8 +334,9 @@ class MsgpackTCPStream(MsgTransport): try: bytes_data: bytes = codec.encode(msg) - except TypeError as typerr: - msgtyperr: MsgTypeError = _mk_msg_type_err( + except TypeError as _err: + typerr = _err + msgtyperr: MsgTypeError = _mk_send_mte( msg, codec=codec, message=( diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 80633e7e..91c0ddea 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -47,7 +47,7 @@ from tractor._exceptions import ( InternalError, _raise_from_unexpected_msg, MsgTypeError, - _mk_msg_type_err, + _mk_recv_mte, pack_error, ) from tractor._state import current_ipc_ctx @@ -264,7 +264,7 @@ class PldRx(Struct): # pack mgterr into error-msg for # reraise below; ensure remote-actor-err # info is displayed nicely? - mte: MsgTypeError = _mk_msg_type_err( + mte: MsgTypeError = _mk_recv_mte( msg=msg, codec=self.pld_dec, src_validation_error=valerr, @@ -277,19 +277,6 @@ class PldRx(Struct): if is_started_send_side: raise mte - # XXX TODO: remove this right? - # => any bad stated/return values should - # always be treated a remote errors right? - # - # if ( - # expect_msg is Return - # or expect_msg is Started - # ): - # # set emulated remote error more-or-less as the - # # runtime would - # ctx: Context = getattr(ipc, 'ctx', ipc) - # ctx._maybe_cancel_and_set_remote_error(mte) - # NOTE: the `.message` is automatically # transferred into the message as long as we # define it as a `Error.message` field. @@ -799,13 +786,18 @@ def validate_payload_msg( # raise any msg type error NO MATTER WHAT! except ValidationError as verr: - mte: MsgTypeError = _mk_msg_type_err( - msg=roundtripped, - codec=codec, - src_validation_error=verr, - verb_header='Trying to send ', - is_invalid_payload=True, - ) + try: + mte: MsgTypeError = _mk_recv_mte( + msg=roundtripped, + codec=codec, + src_validation_error=verr, + verb_header='Trying to send ', + is_invalid_payload=True, + ) + except BaseException: + __tracebackhide__: bool = False + raise + if not raise_mte: return mte -- 2.34.1 From 682cf884c436c7c5a515527abd4404e6a562fc0d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 09:57:10 -0400 Subject: [PATCH 164/305] Expand `PayloadMsg` doc-str --- tractor/msg/types.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 0fc0ee96..0904411f 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -73,11 +73,22 @@ class PayloadMsg( # as_array=True, ): ''' - The "god" boxing msg type. + An abstract payload boxing/shuttling IPC msg type. - Boxes user data-msgs in a `.pld` and uses `msgspec`'s tagged - unions support to enable a spec from a common msg inheritance - tree. + Boxes data-values passed to/from user code + + (i.e. any values passed by `tractor` application code using any of + + |_ `._streaming.MsgStream.send/receive()` + |_ `._context.Context.started/result()` + |_ `._ipc.Channel.send/recv()` + + aka our "IPC primitive APIs") + + as message "payloads" set to the `.pld` field and uses + `msgspec`'s "tagged unions" feature to support a subset of our + "SC-transitive shuttle protocol" specification with + a `msgspec.Struct` inheritance tree. ''' cid: str # call/context-id -- 2.34.1 From 3c1f56f8d93aecb8e66200517db65d4d026d4bcc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 14:40:26 -0400 Subject: [PATCH 165/305] Change `_Cache` reuse emit to `.runtime()` --- tractor/trionics/_mngrs.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tractor/trionics/_mngrs.py b/tractor/trionics/_mngrs.py index f57be0a7..08e70ad2 100644 --- a/tractor/trionics/_mngrs.py +++ b/tractor/trionics/_mngrs.py @@ -271,8 +271,11 @@ async def maybe_open_context( yield False, yielded else: - log.info(f'Reusing _Cached resource for {ctx_key}') _Cache.users += 1 + log.runtime( + f'Reusing resource for `_Cache` user {_Cache.users}\n\n' + f'{ctx_key!r} -> {yielded!r}\n' + ) lock.release() yield True, yielded -- 2.34.1 From 4792ffcc04a0efdad9a91ab980442ccc91cfdd32 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 14:42:25 -0400 Subject: [PATCH 166/305] Avoid actor-nursery-exit warns on registrees Since a local-actor-nursery-parented subactor might also use the root as its registry, we need to avoid warning when short lived IPC `Channel` connections establish and then disconnect (quickly, bc the apparently the subactor isn't re-using an already cached parente-peer<->child conn as you'd expect efficiency..) since such cases currently considered normal operation of our super shoddy/naive "discovery sys" XD As such, (un)guard the whole local-actor-nursery OR channel-draining waiting blocks with the additional `or Actor._cancel_called` branch since really we should also be waiting on the parent nurse to exit (at least, for sure and always) when the local `Actor` indeed has been "globally" cancelled-called. Further add separate timeout warnings for channel-draining vs. local-actor-nursery-exit waiting since they are technically orthogonal cases (at least, afaik). Also, - adjust the `Actor._stream_handler()` connection status log-emit to `.runtime()`, especially to reduce noise around the aforementioned ephemeral registree connection-requests. - if we do wait on a local actor-nurse to exit, report its `._children` table (which should help figure out going forward how useful the warning is, if at all). --- tractor/_runtime.py | 136 +++++++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 41 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index fe2ecc00..dfa76cd8 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -446,7 +446,7 @@ class Actor: # inside ``open_root_actor()`` where there is a check for # a bound listener on the "arbiter" addr. the reset will be # because the handshake was never meant took place. - log.warning( + log.runtime( con_status + ' -> But failed to handshake? Ignoring..\n' @@ -522,24 +522,50 @@ class Actor: # the peer was cancelled we try to wait for them # to tear down their side of the connection before # moving on with closing our own side. - if local_nursery: - if chan._cancel_called: - log.cancel( - 'Waiting on cancel request to peer\n' - f'`Portal.cancel_actor()` => {chan.uid}\n' - ) + if ( + local_nursery + and ( + self._cancel_called + or + chan._cancel_called + ) + # + # ^-TODO-^ along with this is there another condition + # that we should filter with to avoid entering this + # waiting block needlessly? + # -[ ] maybe `and local_nursery.cancelled` and/or + # only if the `._children` table is empty or has + # only `Portal`s with .chan._cancel_called == + # True` as per what we had below; the MAIN DIFF + # BEING that just bc one `Portal.cancel_actor()` + # was called, doesn't mean the whole actor-nurse + # is gonna exit any time soon right!? + # + # or + # all(chan._cancel_called for chan in chans) + + ): + log.cancel( + 'Waiting on cancel request to peer\n' + f'`Portal.cancel_actor()` => {chan.uid}\n' + ) # XXX: this is a soft wait on the channel (and its # underlying transport protocol) to close from the # remote peer side since we presume that any channel - # which is mapped to a sub-actor (i.e. it's managed by - # one of our local nurseries) has a message is sent to - # the peer likely by this actor (which is now in - # a cancelled condition) when the local runtime here is - # now cancelled while (presumably) in the middle of msg - # loop processing. - with trio.move_on_after(0.5) as cs: - cs.shield = True + # which is mapped to a sub-actor (i.e. it's managed + # by local actor-nursery) has a message that is sent + # to the peer likely by this actor (which may be in + # a shutdown sequence due to cancellation) when the + # local runtime here is now cancelled while + # (presumably) in the middle of msg loop processing. + chan_info: str = ( + f'{chan.uid}\n' + f'|_{chan}\n' + f' |_{chan.transport}\n\n' + ) + with trio.move_on_after(0.5) as drain_cs: + drain_cs.shield = True # attempt to wait for the far end to close the # channel and bail after timeout (a 2-generals @@ -556,10 +582,7 @@ class Actor: # TODO: factor this into a helper? log.warning( 'Draining msg from disconnected peer\n' - f'{chan.uid}\n' - f'|_{chan}\n' - f' |_{chan.transport}\n\n' - + f'{chan_info}' f'{pformat(msg)}\n' ) # cid: str|None = msg.get('cid') @@ -571,31 +594,62 @@ class Actor: cid, msg, ) - - # NOTE: when no call to `open_root_actor()` was - # made, we implicitly make that call inside - # the first `.open_nursery()`, in this case we - # can assume that we are the root actor and do - # not have to wait for the nursery-enterer to - # exit before shutting down the actor runtime. - # - # see matching note inside `._supervise.open_nursery()` - if not local_nursery._implicit_runtime_started: - log.runtime( - 'Waiting on local actor nursery to exit..\n' - f'|_{local_nursery}\n' - ) - await local_nursery.exited.wait() - - if ( - cs.cancelled_caught - and not local_nursery._implicit_runtime_started - ): + if drain_cs.cancelled_caught: log.warning( - 'Failed to exit local actor nursery?\n' + 'Timed out waiting on IPC transport channel to drain?\n' + f'{chan_info}' + ) + + # XXX NOTE XXX when no explicit call to + # `open_root_actor()` was made by the application + # (normally we implicitly make that call inside + # the first `.open_nursery()` in root-actor + # user/app code), we can assume that either we + # are NOT the root actor or are root but the + # runtime was started manually. and thus DO have + # to wait for the nursery-enterer to exit before + # shutting down the local runtime to avoid + # clobbering any ongoing subactor + # teardown/debugging/graceful-cancel. + # + # see matching note inside `._supervise.open_nursery()` + # + # TODO: should we have a separate cs + timeout + # block here? + if ( + # XXX SO either, + # - not root OR, + # - is root but `open_root_actor()` was + # entered manually (in which case we do + # the equiv wait there using the + # `devx._debug` sub-sys APIs). + not local_nursery._implicit_runtime_started + ): + log.runtime( + 'Waiting on local actor nursery to exit..\n' f'|_{local_nursery}\n' ) - # await _debug.pause() + with trio.move_on_after(0.5) as an_exit_cs: + an_exit_cs.shield = True + await local_nursery.exited.wait() + + # TODO: currently this is always triggering for every + # sub-daemon spawned from the `piker.services._mngr`? + # -[ ] how do we ensure that the IPC is supposed to + # be long lived and isn't just a register? + # |_ in the register case how can we signal that the + # ephemeral msg loop was intentional? + if ( + # not local_nursery._implicit_runtime_started + # and + an_exit_cs.cancelled_caught + ): + log.warning( + 'Timed out waiting on local actor-nursery to exit?\n' + f'{local_nursery}\n' + f' |_{pformat(local_nursery._children)}\n' + ) + # await _debug.pause() if disconnected: # if the transport died and this actor is still -- 2.34.1 From 778710efbb444c24bfe0c4970bae5cf106e12d28 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 18 Jun 2024 18:14:58 -0400 Subject: [PATCH 167/305] Flip `infected_asyncio` status msg to `.runtime()` --- tractor/to_asyncio.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 585b0b00..d1451b4c 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -577,14 +577,18 @@ def run_as_asyncio_guest( log.runtime(f"trio_main finished: {main_outcome!r}") # start the infection: run trio on the asyncio loop in "guest mode" - log.info(f"Infecting asyncio process with {trio_main}") + log.runtime( + 'Infecting `asyncio`-process with a `trio` guest-run of\n\n' + f'{trio_main!r}\n\n' + f'{trio_done_callback}\n' + ) trio.lowlevel.start_guest_run( trio_main, run_sync_soon_threadsafe=loop.call_soon_threadsafe, done_callback=trio_done_callback, ) - # ``.unwrap()`` will raise here on error + # NOTE `.unwrap()` will raise on error return (await trio_done_fut).unwrap() # might as well if it's installed. -- 2.34.1 From 5b9a2642f6b932eaba1bb24f12facd8c4cda3ffc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Jun 2024 16:00:14 -0400 Subject: [PATCH 168/305] Add a `Context.portal`, more cancel tooing Might as well add a public maybe-getter for use on the "parent" side since it can be handy to check out-of-band cancellation conditions (like from `Portal.cancel_actor()`). Buncha bitty tweaks for more easily debugging cancel conditions: - add a `@.cancel_called.setter` for hooking into `.cancel_called = True` being set in hard to decipher "who cancelled us" scenarios. - use a new `self_ctxc: bool` var in `.cancel()` to capture the output state from `._is_self_cancelled(remote_error)` at call time so it can be compared against the measured value at crash-time (when REPL-ing it can often have already changed due to runtime teardown sequencing vs. the crash handler hook entry). - proxy `hide_tb` to `.drain_to_final_msg()` from `.wait_for_result()`. - use `remote_error.sender` attr directly instead of through `RAE.msgdata: dict` lookup. - change var name `our_uid` -> `peer_uid`; it's not "ours".. Other various docs/comment updates: - extend the main class doc to include some other name ideas. - change over all remaining `.result()` refs to `.wait_for_result()`. - doc more details on how we want `.outcome` to eventually signature. --- tractor/_context.py | 126 +++++++++++++++++++++++++++++++------------- 1 file changed, 88 insertions(+), 38 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index be492e4e..33d8597b 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -121,10 +121,19 @@ class Unresolved: @dataclass class Context: ''' - An inter-actor, SC transitive, `Task` communication context. + An inter-actor, SC transitive, `trio.Task` (pair) + communication context. - NB: This class should **never be instatiated directly**, it is allocated - by the runtime in 2 ways: + (We've also considered other names and ideas: + - "communicating tasks scope": cts + - "distributed task scope": dts + - "communicating tasks context": ctc + + **Got a better idea for naming? Make an issue dawg!** + ) + + NB: This class should **never be instatiated directly**, it is + allocated by the runtime in 2 ways: - by entering `Portal.open_context()` which is the primary public API for any "parent" task or, - by the RPC machinery's `._rpc._invoke()` as a `ctx` arg @@ -210,6 +219,16 @@ class Context: # more the the `Context` is needed? _portal: Portal | None = None + @property + def portal(self) -> Portal|None: + ''' + Return any wrapping memory-`Portal` if this is + a 'parent'-side task which called `Portal.open_context()`, + otherwise `None`. + + ''' + return self._portal + # NOTE: each side of the context has its own cancel scope # which is exactly the primitive that allows for # cross-actor-task-supervision and thus SC. @@ -299,6 +318,8 @@ class Context: # boxed exception. NOW, it's used for spawning overrun queuing # tasks when `.allow_overruns == True` !!! _scope_nursery: trio.Nursery|None = None + # ^-TODO-^ change name? + # -> `._scope_tn` "scope task nursery" # streaming overrun state tracking _in_overrun: bool = False @@ -408,10 +429,23 @@ class Context: ''' return self._cancel_called + @cancel_called.setter + def cancel_called(self, val: bool) -> None: + ''' + Set the self-cancelled request `bool` value. + + ''' + # to debug who frickin sets it.. + # if val: + # from .devx import pause_from_sync + # pause_from_sync() + + self._cancel_called = val + @property def canceller(self) -> tuple[str, str]|None: ''' - ``Actor.uid: tuple[str, str]`` of the (remote) + `Actor.uid: tuple[str, str]` of the (remote) actor-process who's task was cancelled thus causing this (side of the) context to also be cancelled. @@ -515,7 +549,7 @@ class Context: # the local scope was never cancelled # and instead likely we received a remote side - # # cancellation that was raised inside `.result()` + # # cancellation that was raised inside `.wait_for_result()` # or ( # (se := self._local_error) # and se is re @@ -585,6 +619,10 @@ class Context: self, error: BaseException, + # TODO: manual toggle for cases where we wouldn't normally + # mark ourselves cancelled but want to? + # set_cancel_called: bool = False, + ) -> None: ''' (Maybe) cancel this local scope due to a received remote @@ -603,7 +641,7 @@ class Context: - `Portal.open_context()` - `Portal.result()` - `Context.open_stream()` - - `Context.result()` + - `Context.wait_for_result()` when called/closed by actor local task(s). @@ -729,7 +767,7 @@ class Context: # Cancel the local `._scope`, catch that # `._scope.cancelled_caught` and re-raise any remote error - # once exiting (or manually calling `.result()`) the + # once exiting (or manually calling `.wait_for_result()`) the # `.open_context()` block. cs: trio.CancelScope = self._scope if ( @@ -764,8 +802,9 @@ class Context: # `trio.Cancelled` subtype here ;) # https://github.com/goodboy/tractor/issues/368 message: str = 'Cancelling `Context._scope` !\n\n' + # from .devx import pause_from_sync + # pause_from_sync() self._scope.cancel() - else: message: str = 'NOT cancelling `Context._scope` !\n\n' # from .devx import mk_pdb @@ -845,15 +884,15 @@ class Context: @property def repr_api(self) -> str: + return 'Portal.open_context()' + + # TODO: use `.dev._frame_stack` scanning to find caller! # ci: CallerInfo|None = self._caller_info # if ci: # return ( # f'{ci.api_nsp}()\n' # ) - # TODO: use `.dev._frame_stack` scanning to find caller! - return 'Portal.open_context()' - async def cancel( self, timeout: float = 0.616, @@ -889,7 +928,8 @@ class Context: ''' side: str = self.side - self._cancel_called: bool = True + # XXX for debug via the `@.setter` + self.cancel_called = True header: str = ( f'Cancelling ctx with peer from {side.upper()} side\n\n' @@ -912,7 +952,7 @@ class Context: # `._scope.cancel()` since we expect the eventual # `ContextCancelled` from the other side to trigger this # when the runtime finally receives it during teardown - # (normally in `.result()` called from + # (normally in `.wait_for_result()` called from # `Portal.open_context().__aexit__()`) if side == 'parent': if not self._portal: @@ -1025,10 +1065,10 @@ class Context: ''' __tracebackhide__: bool = hide_tb - our_uid: tuple = self.chan.uid + peer_uid: tuple = self.chan.uid # XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption - # for "graceful cancellation" case: + # for "graceful cancellation" case(s): # # Whenever a "side" of a context (a `Task` running in # an actor) **is** the side which requested ctx @@ -1045,9 +1085,11 @@ class Context: # set to the `Actor.uid` of THIS task (i.e. the # cancellation requesting task's actor is the actor # checking whether it should absorb the ctxc). + self_ctxc: bool = self._is_self_cancelled(remote_error) if ( + self_ctxc + and not raise_ctxc_from_self_call - and self._is_self_cancelled(remote_error) # TODO: ?potentially it is useful to emit certain # warning/cancel logs for the cases where the @@ -1077,8 +1119,8 @@ class Context: and isinstance(remote_error, RemoteActorError) and remote_error.boxed_type is StreamOverrun - # and tuple(remote_error.msgdata['sender']) == our_uid - and tuple(remote_error.sender) == our_uid + # and tuple(remote_error.msgdata['sender']) == peer_uid + and tuple(remote_error.sender) == peer_uid ): # NOTE: we set the local scope error to any "self # cancellation" error-response thus "absorbing" @@ -1140,9 +1182,9 @@ class Context: of the remote cancellation. ''' - __tracebackhide__ = hide_tb + __tracebackhide__: bool = hide_tb assert self._portal, ( - "Context.result() can not be called from callee side!" + '`Context.wait_for_result()` can not be called from callee side!' ) if self._final_result_is_set(): return self._result @@ -1197,10 +1239,11 @@ class Context: # raising something we know might happen # during cancellation ;) (not self._cancel_called) - ) + ), + hide_tb=hide_tb, ) # TODO: eventually make `.outcome: Outcome` and thus return - # `self.outcome.unwrap()` here! + # `self.outcome.unwrap()` here? return self.outcome # TODO: switch this with above! @@ -1284,17 +1327,24 @@ class Context: Any| RemoteActorError| ContextCancelled + # TODO: make this a `outcome.Outcome`! ): ''' - The final "outcome" from an IPC context which can either be - some Value returned from the target `@context`-decorated - remote task-as-func, or an `Error` wrapping an exception - raised from an RPC task fault or cancellation. + Return the "final outcome" (state) of the far end peer task + non-blocking. If the remote task has not completed then this + field always resolves to the module defined `Unresolved` + handle. - Note that if the remote task has not terminated then this - field always resolves to the module defined `Unresolved` handle. + ------ - ------ + TODO->( this is doc-driven-dev content not yet actual ;P ) - TODO: implement this using `outcome.Outcome` types? + The final "outcome" from an IPC context which can be any of: + - some `outcome.Value` which boxes the returned output from the peer task's + `@context`-decorated remote task-as-func, or + - an `outcome.Error` wrapping an exception raised that same RPC task + after a fault or cancellation, or + - an unresolved `outcome.Outcome` when the peer task is still + executing and has not yet completed. ''' return ( @@ -1583,7 +1633,7 @@ class Context: - NEVER `return` early before delivering the msg! bc if the error is a ctxc and there is a task waiting on - `.result()` we need the msg to be + `.wait_for_result()` we need the msg to be `send_chan.send_nowait()`-ed over the `._rx_chan` so that the error is relayed to that waiter task and thus raised in user code! @@ -1828,7 +1878,7 @@ async def open_context_from_portal( When the "callee" (side that is "called"/started by a call to *this* method) returns, the caller side (this) unblocks and any final value delivered from the other end can be - retrieved using the `Contex.result()` api. + retrieved using the `Contex.wait_for_result()` api. The yielded ``Context`` instance further allows for opening bidirectional streams, explicit cancellation and @@ -1965,14 +2015,14 @@ async def open_context_from_portal( yield ctx, first # ??TODO??: do we still want to consider this or is - # the `else:` block handling via a `.result()` + # the `else:` block handling via a `.wait_for_result()` # call below enough?? # - # -[ ] pretty sure `.result()` internals do the + # -[ ] pretty sure `.wait_for_result()` internals do the # same as our ctxc handler below so it ended up # being same (repeated?) behaviour, but ideally we # wouldn't have that duplication either by somehow - # factoring the `.result()` handler impl in a way + # factoring the `.wait_for_result()` handler impl in a way # that we can re-use it around the `yield` ^ here # or vice versa? # @@ -2110,7 +2160,7 @@ async def open_context_from_portal( # AND a group-exc is only raised if there was > 1 # tasks started *here* in the "caller" / opener # block. If any one of those tasks calls - # `.result()` or `MsgStream.receive()` + # `.wait_for_result()` or `MsgStream.receive()` # `._maybe_raise_remote_err()` will be transitively # called and the remote error raised causing all # tasks to be cancelled. @@ -2180,7 +2230,7 @@ async def open_context_from_portal( f'|_{ctx._task}\n' ) # XXX NOTE XXX: the below call to - # `Context.result()` will ALWAYS raise + # `Context.wait_for_result()` will ALWAYS raise # a `ContextCancelled` (via an embedded call to # `Context._maybe_raise_remote_err()`) IFF # a `Context._remote_error` was set by the runtime @@ -2190,10 +2240,10 @@ async def open_context_from_portal( # ALWAYS SET any time "callee" side fails and causes "caller # side" cancellation via a `ContextCancelled` here. try: - result_or_err: Exception|Any = await ctx.result() + result_or_err: Exception|Any = await ctx.wait_for_result() except BaseException as berr: # on normal teardown, if we get some error - # raised in `Context.result()` we still want to + # raised in `Context.wait_for_result()` we still want to # save that error on the ctx's state to # determine things like `.cancelled_caught` for # cases where there was remote cancellation but -- 2.34.1 From 74048b06a709d4193f7744b8b3819ded57671f7c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 27 Jun 2024 16:25:46 -0400 Subject: [PATCH 169/305] Prep for legacy RPC API factor-n-remove This change is adding commentary about the upcoming API removal and simplification of nursery + portal internals; no actual code changes are included. The plan to (re)move the old RPC methods: - `ActorNursery.run_in_actor()` - `Portal.run()` - `Portal.run_from_ns()` and any related impl internals out of each conc-primitive and instead into something like a `.hilevel.rpc` set of APIs which then are all implemented using the newer and more lowlevel `Context`/`MsgStream` primitives instead Bo Further, - formally deprecate the `Portal.result()` meth for `.wait_for_result()`. - only `log.info()` about runtime shutdown in the implicit root case. --- tractor/_portal.py | 42 +++++++++++++++++++++++++++++++++++++---- tractor/_spawn.py | 4 ++-- tractor/_supervise.py | 44 +++++++++++++++++++++++++++++++------------ 3 files changed, 72 insertions(+), 18 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 2c676e12..0f698836 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -121,7 +121,8 @@ class Portal: ) return self.chan - # TODO: factor this out into an `ActorNursery` wrapper + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. async def _submit_for_result( self, ns: str, @@ -141,13 +142,22 @@ class Portal: portal=self, ) + # TODO: we should deprecate this API right? since if we remove + # `.run_in_actor()` (and instead move it to a `.highlevel` + # wrapper api (around a single `.open_context()` call) we don't + # really have any notion of a "main" remote task any more? + # # @api_frame - async def result(self) -> Any: + async def wait_for_result( + self, + hide_tb: bool = True, + ) -> Any: ''' - Return the result(s) from the remote actor's "main" task. + Return the final result delivered by a `Return`-msg from the + remote peer actor's "main" task's `return` statement. ''' - __tracebackhide__ = True + __tracebackhide__: bool = hide_tb # Check for non-rpc errors slapped on the # channel for which we always raise exc = self.channel._exc @@ -182,6 +192,23 @@ class Portal: return self._final_result_pld + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. + async def result( + self, + *args, + **kwargs, + ) -> Any|Exception: + typname: str = type(self).__name__ + log.warning( + f'`{typname}.result()` is DEPRECATED!\n' + f'Use `{typname}.wait_for_result()` instead!\n' + ) + return await self.wait_for_result( + *args, + **kwargs, + ) + async def _cancel_streams(self): # terminate all locally running async generator # IPC calls @@ -240,6 +267,7 @@ class Portal: f'{reminfo}' ) + # XXX the one spot we set it? self.channel._cancel_called: bool = True try: # send cancel cmd - might not get response @@ -279,6 +307,8 @@ class Portal: ) return False + # TODO: do we still need this for low level `Actor`-runtime + # method calls or can we also remove it? async def run_from_ns( self, namespace_path: str, @@ -316,6 +346,8 @@ class Portal: expect_msg=Return, ) + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. async def run( self, func: str, @@ -370,6 +402,8 @@ class Portal: expect_msg=Return, ) + # TODO: factor this out into a `.highlevel` API-wrapper that uses + # a single `.open_context()` call underneath. @acm async def open_stream_from( self, diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 481e2981..986c2e29 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -149,7 +149,7 @@ async def exhaust_portal( # XXX: streams should never be reaped here since they should # always be established and shutdown using a context manager api - final: Any = await portal.result() + final: Any = await portal.wait_for_result() except ( Exception, @@ -223,8 +223,8 @@ async def cancel_on_completion( async def hard_kill( proc: trio.Process, - terminate_after: int = 1.6, + terminate_after: int = 1.6, # NOTE: for mucking with `.pause()`-ing inside the runtime # whilst also hacking on it XD # terminate_after: int = 99999, diff --git a/tractor/_supervise.py b/tractor/_supervise.py index 8f3574bb..fb737c12 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -80,6 +80,7 @@ class ActorNursery: ''' def __init__( self, + # TODO: maybe def these as fields of a struct looking type? actor: Actor, ria_nursery: trio.Nursery, da_nursery: trio.Nursery, @@ -88,8 +89,10 @@ class ActorNursery: ) -> None: # self.supervisor = supervisor # TODO self._actor: Actor = actor - self._ria_nursery = ria_nursery + + # TODO: rename to `._tn` for our conventional "task-nursery" self._da_nursery = da_nursery + self._children: dict[ tuple[str, str], tuple[ @@ -98,15 +101,13 @@ class ActorNursery: Portal | None, ] ] = {} - # portals spawned with ``run_in_actor()`` are - # cancelled when their "main" result arrives - self._cancel_after_result_on_exit: set = set() + self.cancelled: bool = False self._join_procs = trio.Event() self._at_least_one_child_in_debug: bool = False self.errors = errors - self.exited = trio.Event() self._scope_error: BaseException|None = None + self.exited = trio.Event() # NOTE: when no explicit call is made to # `.open_root_actor()` by application code, @@ -116,6 +117,13 @@ class ActorNursery: # and syncing purposes to any actor opened nurseries. self._implicit_runtime_started: bool = False + # TODO: remove the `.run_in_actor()` API and thus this 2ndary + # nursery when that API get's moved outside this primitive! + self._ria_nursery = ria_nursery + # portals spawned with ``run_in_actor()`` are + # cancelled when their "main" result arrives + self._cancel_after_result_on_exit: set = set() + async def start_actor( self, name: str, @@ -126,10 +134,14 @@ class ActorNursery: rpc_module_paths: list[str]|None = None, enable_modules: list[str]|None = None, loglevel: str|None = None, # set log level per subactor - nursery: trio.Nursery|None = None, debug_mode: bool|None = None, infect_asyncio: bool = False, + # TODO: ideally we can rm this once we no longer have + # a `._ria_nursery` since the dependent APIs have been + # removed! + nursery: trio.Nursery|None = None, + ) -> Portal: ''' Start a (daemon) actor: an process that has no designated @@ -200,6 +212,7 @@ class ActorNursery: # |_ dynamic @context decoration on child side # |_ implicit `Portal.open_context() as (ctx, first):` # and `return first` on parent side. + # |_ mention how it's similar to `trio-parallel` API? # -[ ] use @api_frame on the wrapper async def run_in_actor( self, @@ -269,11 +282,14 @@ class ActorNursery: ) -> None: ''' - Cancel this nursery by instructing each subactor to cancel - itself and wait for all subactors to terminate. + Cancel this actor-nursery by instructing each subactor's + runtime to cancel and wait for all underlying sub-processes + to terminate. - If ``hard_killl`` is set to ``True`` then kill the processes - directly without any far end graceful ``trio`` cancellation. + If `hard_kill` is set then kill the processes directly using + the spawning-backend's API/OS-machinery without any attempt + at (graceful) `trio`-style cancellation using our + `Actor.cancel()`. ''' __runtimeframe__: int = 1 # noqa @@ -629,8 +645,12 @@ async def open_nursery( f'|_{an}\n' ) - # shutdown runtime if it was started if implicit_runtime: + # shutdown runtime if it was started and report noisly + # that we're did so. msg += '=> Shutting down actor runtime <=\n' + log.info(msg) - log.info(msg) + else: + # keep noise low during std operation. + log.runtime(msg) -- 2.34.1 From 6af320273be17e1b77efba1dcc948b47c682c3ae Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 12:59:31 -0400 Subject: [PATCH 170/305] Always reset `._state._ctxvar_Context` to prior Not sure how I forgot this but, obviously it's correct context-var semantics to revert the current IPC `Context` (set in the latest `.open_context()` block) such that any prior instance is reset.. This ensures the sanity `assert`s pass inside `.msg._ops.maybe_limit_plds()` and just in general ensures for any task that the last opened `Context` is the one returned from `current_ipc_ctx()`. --- tractor/_context.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tractor/_context.py b/tractor/_context.py index 33d8597b..9b4cb6e6 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -38,6 +38,7 @@ from collections import deque from contextlib import ( asynccontextmanager as acm, ) +from contextvars import Token from dataclasses import ( dataclass, field, @@ -1943,7 +1944,7 @@ async def open_context_from_portal( ) assert ctx._remote_func_type == 'context' assert ctx._caller_info - _ctxvar_Context.set(ctx) + prior_ctx_tok: Token = _ctxvar_Context.set(ctx) # placeholder for any exception raised in the runtime # or by user tasks which cause this context's closure. @@ -2394,6 +2395,9 @@ async def open_context_from_portal( None, ) + # XXX revert to prior IPC-task-ctx scope + _ctxvar_Context.reset(prior_ctx_tok) + def mk_context( chan: Channel, -- 2.34.1 From 9811db9ac50c9f307b6c92ddf5aee57b91a6a1a9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 14:25:53 -0400 Subject: [PATCH 171/305] Further formalize `greenback` integration Since we more or less require it for `tractor.pause_from_sync()` this refines enable toggles and their relay down the actor tree as well as more explicit logging around init and activation. Tweaks summary: - `.info()` report the module if discovered during root boot. - use a `._state._runtime_vars['use_greenback']: bool` activation flag inside `Actor._from_parent()` to determine if the sub should try to use it and set to `False` if mod-loading fails / not installed. - expose `maybe_init_greenback()` from `.devx` sugpkg. - comment out RTE in `._pause()` for now since we already have it in `.pause_from_sync()`. - always `.exception()` on `maybe_init_greenback()` import errors to clarify the underlying failure deats. - always explicitly report if `._state._runtime_vars['use_greenback']` was NOT set when `.pause_from_sync()` is called. Other `._runtime.async_main()` adjustments: - combine the "internal error call ur parents" message and the failed registry contact status into one new `err_report: str`. - drop the final exception handler's call to `Actor.lifetime_stack.close()` since we're already doing it in the `finally:` block and the earlier call has no currently known benefit. - only report on the `.lifetime_stack()` callbacks if any are detected as registered. --- tractor/_root.py | 27 +++++--- tractor/_runtime.py | 137 +++++++++++++++++++++++++++------------ tractor/_state.py | 2 +- tractor/devx/__init__.py | 1 + tractor/devx/_debug.py | 87 ++++++++++++++++--------- 5 files changed, 173 insertions(+), 81 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index bbd9d93d..882285a5 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -21,6 +21,7 @@ Root actor runtime ignition(s). from contextlib import asynccontextmanager as acm from functools import partial import importlib +import inspect import logging import os import signal @@ -115,10 +116,16 @@ async def open_root_actor( if ( debug_mode and maybe_enable_greenback - and await _debug.maybe_init_greenback( - raise_not_found=False, + and ( + maybe_mod := await _debug.maybe_init_greenback( + raise_not_found=False, + ) ) ): + logger.info( + f'Found `greenback` installed @ {maybe_mod}\n' + 'Enabling `tractor.pause_from_sync()` support!\n' + ) os.environ['PYTHONBREAKPOINT'] = ( 'tractor.devx._debug._sync_pause_from_builtin' ) @@ -264,7 +271,9 @@ async def open_root_actor( except OSError: # TODO: make this a "discovery" log level? - logger.warning(f'No actor registry found @ {addr}') + logger.info( + f'No actor registry found @ {addr}\n' + ) async with trio.open_nursery() as tn: for addr in registry_addrs: @@ -278,7 +287,6 @@ async def open_root_actor( # Create a new local root-actor instance which IS NOT THE # REGISTRAR if ponged_addrs: - if ensure_registry: raise RuntimeError( f'Failed to open `{name}`@{ponged_addrs}: ' @@ -365,24 +373,25 @@ async def open_root_actor( ) try: yield actor - except ( Exception, BaseExceptionGroup, ) as err: - - import inspect + # XXX NOTE XXX see equiv note inside + # `._runtime.Actor._stream_handler()` where in the + # non-root or root-that-opened-this-mahually case we + # wait for the local actor-nursery to exit before + # exiting the transport channel handler. entered: bool = await _debug._maybe_enter_pm( err, api_frame=inspect.currentframe(), ) - if ( not entered and not is_multi_cancelled(err) ): - logger.exception('Root actor crashed:\n') + logger.exception('Root actor crashed\n') # ALWAYS re-raise any error bubbled up from the # runtime! diff --git a/tractor/_runtime.py b/tractor/_runtime.py index dfa76cd8..efc9be9c 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -1048,6 +1048,10 @@ class Actor: # TODO: another `Struct` for rtvs.. rvs: dict[str, Any] = spawnspec._runtime_vars if rvs['_debug_mode']: + from .devx import ( + enable_stack_on_sig, + maybe_init_greenback, + ) try: # TODO: maybe return some status msgs upward # to that we can emit them in `con_status` @@ -1055,13 +1059,27 @@ class Actor: log.devx( 'Enabling `stackscope` traces on SIGUSR1' ) - from .devx import enable_stack_on_sig enable_stack_on_sig() + except ImportError: log.warning( '`stackscope` not installed for use in debug mode!' ) + if rvs.get('use_greenback', False): + maybe_mod: ModuleType|None = await maybe_init_greenback() + if maybe_mod: + log.devx( + 'Activated `greenback` ' + 'for `tractor.pause_from_sync()` support!' + ) + else: + rvs['use_greenback'] = False + log.warning( + '`greenback` not installed for use in debug mode!\n' + '`tractor.pause_from_sync()` not available!' + ) + rvs['_is_root'] = False _state._runtime_vars.update(rvs) @@ -1720,8 +1738,8 @@ async def async_main( # Register with the arbiter if we're told its addr log.runtime( - f'Registering `{actor.name}` ->\n' - f'{pformat(accept_addrs)}' + f'Registering `{actor.name}` => {pformat(accept_addrs)}\n' + # ^-TODO-^ we should instead show the maddr here^^ ) # TODO: ideally we don't fan out to all registrars @@ -1779,57 +1797,90 @@ async def async_main( # Blocks here as expected until the root nursery is # killed (i.e. this actor is cancelled or signalled by the parent) - except Exception as err: - log.runtime("Closing all actor lifetime contexts") - actor.lifetime_stack.close() - + except Exception as internal_err: if not is_registered: + err_report: str = ( + '\n' + "Actor runtime (internally) failed BEFORE contacting the registry?\n" + f'registrars -> {actor.reg_addrs} ?!?!\n\n' + + '^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n' + '\t>> CALMLY CANCEL YOUR CHILDREN AND CALL YOUR PARENTS <<\n\n' + + '\tIf this is a sub-actor hopefully its parent will keep running ' + 'and cancel/reap this sub-process..\n' + '(well, presuming this error was propagated upward)\n\n' + + '\t---------------------------------------------\n' + '\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT @ ' # oneline + 'https://github.com/goodboy/tractor/issues\n' + '\t---------------------------------------------\n' + ) + # TODO: I guess we could try to connect back # to the parent through a channel and engage a debugger # once we have that all working with std streams locking? - log.exception( - f"Actor errored and failed to register with arbiter " - f"@ {actor.reg_addrs[0]}?") - log.error( - "\n\n\t^^^ THIS IS PROBABLY AN INTERNAL `tractor` BUG! ^^^\n\n" - "\t>> CALMLY CALL THE AUTHORITIES AND HIDE YOUR CHILDREN <<\n\n" - "\tIf this is a sub-actor hopefully its parent will keep running " - "correctly presuming this error was safely ignored..\n\n" - "\tPLEASE REPORT THIS TRACEBACK IN A BUG REPORT: " - "https://github.com/goodboy/tractor/issues\n" - ) + log.exception(err_report) if actor._parent_chan: await try_ship_error_to_remote( actor._parent_chan, - err, + internal_err, ) # always! - match err: + match internal_err: case ContextCancelled(): log.cancel( f'Actor: {actor.uid} was task-context-cancelled with,\n' - f'str(err)' + f'str(internal_err)' ) case _: - log.exception("Actor errored:") - raise + log.exception( + 'Main actor-runtime task errored\n' + f' Closing all actor lifetime contexts..' + teardown_report: str = ( + 'Main actor-runtime task completed\n' ) - # tear down all lifetime contexts if not in guest mode - # XXX: should this just be in the entrypoint? - actor.lifetime_stack.close() - # TODO: we can't actually do this bc the debugger - # uses the _service_n to spawn the lock task, BUT, - # in theory if we had the root nursery surround this finally - # block it might be actually possible to debug THIS - # machinery in the same way as user task code? + # ?TODO? should this be in `._entry`/`._root` mods instead? + # + # teardown any actor-lifetime-bound contexts + ls: ExitStack = actor.lifetime_stack + # only report if there are any registered + cbs: list[Callable] = [ + repr(tup[1].__wrapped__) + for tup in ls._exit_callbacks + ] + if cbs: + cbs_str: str = '\n'.join(cbs) + teardown_report += ( + '-> Closing actor-lifetime-bound callbacks\n\n' + f'}}>\n' + f' |_{ls}\n' + f' |_{cbs_str}\n' + ) + # XXX NOTE XXX this will cause an error which + # prevents any `infected_aio` actor from continuing + # and any callbacks in the `ls` here WILL NOT be + # called!! + # await _debug.pause(shield=True) + + ls.close() + + # XXX TODO but hard XXX + # we can't actually do this bc the debugger uses the + # _service_n to spawn the lock task, BUT, in theory if we had + # the root nursery surround this finally block it might be + # actually possible to debug THIS machinery in the same way + # as user task code? + # # if actor.name == 'brokerd.ib': # with CancelScope(shield=True): # await _debug.breakpoint() @@ -1859,9 +1910,9 @@ async def async_main( failed = True if failed: - log.warning( - f'Failed to unregister {actor.name} from ' - f'registar @ {addr}' + teardown_report += ( + f'-> Failed to unregister {actor.name} from ' + f'registar @ {addr}\n' ) # Ensure all peers (actors connected to us as clients) are finished @@ -1869,13 +1920,17 @@ async def async_main( if any( chan.connected() for chan in chain(*actor._peers.values()) ): - log.runtime( - f"Waiting for remaining peers {actor._peers} to clear") + teardown_report += ( + f'-> Waiting for remaining peers {actor._peers} to clear..\n' + ) + log.runtime(teardown_report) with CancelScope(shield=True): await actor._no_more_peers.wait() - log.runtime("All peer channels are complete") - log.runtime("Runtime completed") + teardown_report += ('-> All peer channels are complete\n') + + teardown_report += ('Actor runtime exited') + log.info(teardown_report) # TODO: rename to `Registry` and move to `._discovery`! diff --git a/tractor/_state.py b/tractor/_state.py index 8c5cca14..9f896005 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -44,7 +44,7 @@ _runtime_vars: dict[str, Any] = { '_root_mailbox': (None, None), '_registry_addrs': [], - # for `breakpoint()` support + # for `tractor.pause_from_sync()` & `breakpoint()` support 'use_greenback': False, } diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index bb72d1f9..cfcff931 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -29,6 +29,7 @@ from ._debug import ( shield_sigint_handler as shield_sigint_handler, open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, + maybe_init_greenback as maybe_init_greenback, post_mortem as post_mortem, mk_pdb as mk_pdb, ) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index ccf57d62..1135932c 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -69,6 +69,7 @@ from trio import ( import tractor from tractor.log import get_logger from tractor._context import Context +from tractor import _state from tractor._state import ( current_actor, is_root_process, @@ -87,9 +88,6 @@ if TYPE_CHECKING: from tractor._runtime import ( Actor, ) - from tractor.msg import ( - _codec, - ) log = get_logger(__name__) @@ -1599,12 +1597,16 @@ async def _pause( try: task: Task = current_task() except RuntimeError as rte: - log.exception('Failed to get current task?') - if actor.is_infected_aio(): - raise RuntimeError( - '`tractor.pause[_from_sync]()` not yet supported ' - 'for infected `asyncio` mode!' - ) from rte + __tracebackhide__: bool = False + log.exception( + 'Failed to get current `trio`-task?' + ) + # if actor.is_infected_aio(): + # mk_pdb().set_trace() + # raise RuntimeError( + # '`tractor.pause[_from_sync]()` not yet supported ' + # 'directly (infected) `asyncio` tasks!' + # ) from rte raise @@ -2163,22 +2165,22 @@ def maybe_import_greenback( return False -async def maybe_init_greenback( - **kwargs, -) -> None|ModuleType: - - if mod := maybe_import_greenback(**kwargs): - await mod.ensure_portal() - log.devx( - '`greenback` portal opened!\n' - 'Sync debug support activated!\n' - ) - return mod +async def maybe_init_greenback(**kwargs) -> None|ModuleType: + try: + if mod := maybe_import_greenback(**kwargs): + await mod.ensure_portal() + log.devx( + '`greenback` portal opened!\n' + 'Sync debug support activated!\n' + ) + return mod + except BaseException: + log.exception('Failed to init `greenback`..') + raise return None - async def _pause_from_bg_root_thread( behalf_of_thread: Thread, repl: PdbREPL, @@ -2324,6 +2326,12 @@ def pause_from_sync( # TODO: once supported, remove this AND the one # inside `._pause()`! + # outstanding impl fixes: + # -[ ] need to make `.shield_sigint()` below work here! + # -[ ] how to handle `asyncio`'s new SIGINT-handler + # injection? + # -[ ] should `breakpoint()` work and what does it normally + # do in `asyncio` ctxs? if actor.is_infected_aio(): raise RuntimeError( '`tractor.pause[_from_sync]()` not yet supported ' @@ -2399,18 +2407,37 @@ def pause_from_sync( else: # we are presumably the `trio.run()` + main thread # raises on not-found by default greenback: ModuleType = maybe_import_greenback() + + # TODO: how to ensure this is either dynamically (if + # needed) called here (in some bg tn??) or that the + # subactor always already called it? + # greenback: ModuleType = await maybe_init_greenback() + message += f'-> imported {greenback}\n' repl_owner: Task = current_task() message += '-> calling `greenback.await_(_pause(debug_func=None))` from sync caller..\n' - out = greenback.await_( - _pause( - debug_func=None, - repl=repl, - hide_tb=hide_tb, - called_from_sync=True, - **_pause_kwargs, + try: + out = greenback.await_( + _pause( + debug_func=None, + repl=repl, + hide_tb=hide_tb, + called_from_sync=True, + **_pause_kwargs, + ) ) - ) + except RuntimeError as rte: + if not _state._runtime_vars.get( + 'use_greenback', + False, + ): + raise RuntimeError( + '`greenback` was never initialized in this actor!?\n\n' + f'{_state._runtime_vars}\n' + ) from rte + + raise + if out: bg_task, repl = out assert repl is repl @@ -2801,10 +2828,10 @@ def open_crash_handler( `trio.run()`. ''' + err: BaseException try: yield except tuple(catch) as err: - if type(err) not in ignore: pdbp.xpm() -- 2.34.1 From bd66450a7963287da518a51b3b32423a8e7436ef Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 14:46:29 -0400 Subject: [PATCH 172/305] Update `MsgTypeError` content matching to latest --- tests/test_pldrx_limiting.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_pldrx_limiting.py b/tests/test_pldrx_limiting.py index e5ce691a..bb9a3ef7 100644 --- a/tests/test_pldrx_limiting.py +++ b/tests/test_pldrx_limiting.py @@ -285,14 +285,14 @@ def test_basic_payload_spec( if invalid_started: msg_type_str: str = 'Started' - bad_value_str: str = '10' + bad_value: int = 10 elif invalid_return: msg_type_str: str = 'Return' - bad_value_str: str = "'yo'" + bad_value: str = 'yo' else: # XXX but should never be used below then.. msg_type_str: str = '' - bad_value_str: str = '' + bad_value: str = '' maybe_mte: MsgTypeError|None = None should_raise: Exception|None = ( @@ -307,8 +307,10 @@ def test_basic_payload_spec( raises=should_raise, ensure_in_message=[ f"invalid `{msg_type_str}` msg payload", - f"value: `{bad_value_str}` does not " - f"match type-spec: `{msg_type_str}.pld: PldMsg|NoneType`", + f'{bad_value}', + f'has type {type(bad_value)!r}', + 'not match type-spec', + f'`{msg_type_str}.pld: PldMsg|NoneType`', ], # only for debug # post_mortem=True, -- 2.34.1 From 6927767d399d9266e35f7325c7feba695d24beff Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 18:45:52 -0400 Subject: [PATCH 173/305] Update `._entry` actor status log Log-report the different types of actor exit conditions including cancel via KBI, error or normal return with varying levels depending on case. Also, start proto-ing out this weird ascii-syntax idea for describing conc system states and implement the first bit in a `nest_from_op()` log-message fmter that joins and indents an obj `repr()` with a tree-like `'>)\n|_'` header. --- tractor/_entry.py | 160 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 141 insertions(+), 19 deletions(-) diff --git a/tractor/_entry.py b/tractor/_entry.py index e22a4f1f..60050ea4 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -20,7 +20,8 @@ Sub-process entry points. """ from __future__ import annotations from functools import partial -# import textwrap +import os +import textwrap from typing import ( Any, TYPE_CHECKING, @@ -58,7 +59,7 @@ def _mp_main( ) -> None: ''' - The routine called *after fork* which invokes a fresh ``trio.run`` + The routine called *after fork* which invokes a fresh `trio.run()` ''' actor._forkserver_info = forkserver_info @@ -96,6 +97,107 @@ def _mp_main( log.info(f"Subactor {actor.uid} terminated") +# TODO: move this func to some kinda `.devx._conc_lang.py` eventually +# as we work out our multi-domain state-flow-syntax! +def nest_from_op( + input_op: str, + # + # ?TODO? an idea for a syntax to the state of concurrent systems + # as a "3-domain" (execution, scope, storage) model and using + # a minimal ascii/utf-8 operator-set. + # + # try not to take any of this seriously yet XD + # + # > is a "play operator" indicating (CPU bound) + # exec/work/ops required at the "lowest level computing" + # + # execution primititves (tasks, threads, actors..) denote their + # lifetime with '(' and ')' since parentheses normally are used + # in many langs to denote function calls. + # + # starting = ( + # >( opening/starting; beginning of the thread-of-exec (toe?) + # (> opened/started, (finished spawning toe) + # |_ repr of toe, in py these look like + # + # >) closing/exiting/stopping, + # )> closed/exited/stopped, + # |_ + # [OR <), )< ?? ] + # + # ending = ) + # >c) cancelling to close/exit + # c)> cancelled (caused close), OR? + # |_ + # OR maybe "x) erroring to eventuall exit + # x)> errored and terminated + # |_ + # + # scopes: supers/nurseries, IPC-ctxs, sessions, perms, etc. + # >{ opening + # {> opened + # }> closed + # >} closing + # + # storage: like queues, shm-buffers, files, etc.. + # >[ opening + # [> opened + # |_ + # + # >] closing + # ]> closed + + # IPC ops: channels, transports, msging + # => req msg + # <= resp msg + # <=> 2-way streaming (of msgs) + # <- recv 1 msg + # -> send 1 msg + # + # TODO: still not sure on R/L-HS approach..? + # =>( send-req to exec start (task, actor, thread..) + # (<= recv-req to ^ + # + # (<= recv-req ^ + # <=( recv-resp opened remote exec primitive + # <=) recv-resp closed + # + # )<=c req to stop due to cancel + # c=>) req to stop due to cancel + # + # =>{ recv-req to open + # <={ send-status that it closed + + tree_str: str, + + # NOTE: so move back-from-the-left of the `input_op` by + # this amount. + back_from_op: int = 0, +) -> str: + ''' + Depth-increment the input (presumably hierarchy/supervision) + input "tree string" below the provided `input_op` execution + operator, so injecting a `"\n|_{input_op}\n"`and indenting the + `tree_str` to nest content aligned with the ops last char. + + ''' + return ( + f'{input_op}\n' + + + textwrap.indent( + tree_str, + prefix=( + len(input_op) + - + (back_from_op + 1) + ) * ' ', + ) + ) + + def _trio_main( actor: Actor, *, @@ -107,7 +209,6 @@ def _trio_main( Entry point for a `trio_run_in_process` subactor. ''' - # __tracebackhide__: bool = True _debug.hide_runtime_frames() _state._current_actor = actor @@ -119,7 +220,6 @@ def _trio_main( if actor.loglevel is not None: get_console_log(actor.loglevel) - import os actor_info: str = ( f'|_{actor}\n' f' uid: {actor.uid}\n' @@ -128,13 +228,23 @@ def _trio_main( f' loglevel: {actor.loglevel}\n' ) log.info( - 'Started new trio subactor:\n' + 'Starting new `trio` subactor:\n' + - '>\n' # like a "started/play"-icon from super perspective - + - actor_info, + nest_from_op( + input_op='>(', # see syntax ideas above + tree_str=actor_info, + back_from_op=1, + ) ) - + logmeth = log.info + exit_status: str = ( + 'Subactor exited\n' + + + nest_from_op( + input_op=')>', # like a "closed-to-play"-icon from super perspective + tree_str=actor_info, + ) + ) try: if infect_asyncio: actor._infected_aio = True @@ -143,16 +253,28 @@ def _trio_main( trio.run(trio_main) except KeyboardInterrupt: - log.cancel( - 'Actor received KBI\n' + logmeth = log.cancel + exit_status: str = ( + 'Actor received KBI (aka an OS-cancel)\n' + - actor_info + nest_from_op( + input_op='c)>', # closed due to cancel (see above) + tree_str=actor_info, + ) ) + except BaseException as err: + logmeth = log.error + exit_status: str = ( + 'Main actor task crashed during exit?\n' + + + nest_from_op( + input_op='x)>', # closed by error + tree_str=actor_info, + ) + ) + # NOTE since we raise a tb will already be shown on the + # console, thus we do NOT use `.exception()` above. + raise err + finally: - log.info( - 'Subactor terminated\n' - + - 'x\n' # like a "crossed-out/killed" from super perspective - + - actor_info - ) + logmeth(exit_status) -- 2.34.1 From 521a2e353ddfeb0640e6c0aafee0daed1d6daef5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:06:17 -0400 Subject: [PATCH 174/305] Don't use pretty struct stuff in `._invoke` It's too fragile to put in side core RPC machinery since `msgspec.Struct` defs can fail if a field type can't be looked up at creation time (like can easily happen if you conditionally import using `if TYPE_CHECKING:`) Also, - rename `cs` to `rpc_ctx_cs: CancelScope` since it's literally the wrapping RPC `Context._scope`. - report self cancellation via `explain: str` and add tail case for "unknown cause". - put a ?TODO? around what to do about KBIs if a context is opened from an `infected_aio`-actor task. - similar to our nursery and portal add TODO list for moving all `_invoke_non_context()` content out the RPC core and instead implement them as `.hilevel` endpoint helpers (maybe as decorators?)which under neath define `@context`-funcs. --- tractor/_rpc.py | 70 +++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 37fea079..09d203b7 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -67,7 +67,7 @@ from .msg import ( MsgCodec, PayloadT, NamespacePath, - pretty_struct, + # pretty_struct, _ops as msgops, ) from tractor.msg.types import ( @@ -88,6 +88,16 @@ if TYPE_CHECKING: log = get_logger('tractor') +# ?TODO? move to a `tractor.lowlevel._rpc` with the below +# func-type-cases implemented "on top of" `@context` defs: +# -[ ] std async func helper decorated with `@rpc_func`? +# -[ ] `Portal.open_stream_from()` with async-gens? +# |_ possibly a duplex form of this with a +# `sent_from_peer = yield send_to_peer` form, which would require +# syncing the send/recv side with possibly `.receive_nowait()` +# on each `yield`? +# -[ ] some kinda `@rpc_acm` maybe that does a fixture style with +# user only defining a single-`yield` generator-func? async def _invoke_non_context( actor: Actor, cancel_scope: CancelScope, @@ -107,8 +117,9 @@ async def _invoke_non_context( ] = trio.TASK_STATUS_IGNORED, ): __tracebackhide__: bool = True + cs: CancelScope|None = None # ref when activated - # TODO: can we unify this with the `context=True` impl below? + # ?TODO? can we unify this with the `context=True` impl below? if inspect.isasyncgen(coro): await chan.send( StartAck( @@ -159,10 +170,6 @@ async def _invoke_non_context( functype='asyncgen', ) ) - # XXX: the async-func may spawn further tasks which push - # back values like an async-generator would but must - # manualy construct the response dict-packet-responses as - # above with cancel_scope as cs: ctx._scope = cs task_status.started(ctx) @@ -174,15 +181,13 @@ async def _invoke_non_context( await chan.send( Stop(cid=cid) ) + + # simplest function/method request-response pattern + # XXX: in the most minimally used case, just a scheduled internal runtime + # call to `Actor._cancel_task()` from the ctx-peer task since we + # don't (yet) have a dedicated IPC msg. + # ------ - ------ else: - # regular async function/method - # XXX: possibly just a scheduled `Actor._cancel_task()` - # from a remote request to cancel some `Context`. - # ------ - ------ - # TODO: ideally we unify this with the above `context=True` - # block such that for any remote invocation ftype, we - # always invoke the far end RPC task scheduling the same - # way: using the linked IPC context machinery. failed_resp: bool = False try: ack = StartAck( @@ -353,8 +358,15 @@ async def _errors_relayed_via_ipc( # channel. task_status.started(err) - # always reraise KBIs so they propagate at the sys-process level. - if isinstance(err, KeyboardInterrupt): + # always propagate KBIs at the sys-process level. + if ( + isinstance(err, KeyboardInterrupt) + + # ?TODO? except when running in asyncio mode? + # |_ wut if you want to open a `@context` FROM an + # infected_aio task? + # and not actor.is_infected_aio() + ): raise # RPC task bookeeping. @@ -457,7 +469,6 @@ async def _invoke( # tb: TracebackType = None cancel_scope = CancelScope() - cs: CancelScope|None = None # ref when activated ctx = actor.get_context( chan=chan, cid=cid, @@ -606,6 +617,8 @@ async def _invoke( # `@context` marked RPC function. # - `._portal` is never set. try: + tn: trio.Nursery + rpc_ctx_cs: CancelScope async with ( trio.open_nursery() as tn, msgops.maybe_limit_plds( @@ -615,7 +628,7 @@ async def _invoke( ), ): ctx._scope_nursery = tn - ctx._scope = tn.cancel_scope + rpc_ctx_cs = ctx._scope = tn.cancel_scope task_status.started(ctx) # TODO: better `trionics` tooling: @@ -641,7 +654,7 @@ async def _invoke( # itself calls `ctx._maybe_cancel_and_set_remote_error()` # which cancels the scope presuming the input error # is not a `.cancel_acked` pleaser. - if ctx._scope.cancelled_caught: + if rpc_ctx_cs.cancelled_caught: our_uid: tuple = actor.uid # first check for and raise any remote error @@ -651,9 +664,7 @@ async def _invoke( if re := ctx._remote_error: ctx._maybe_raise_remote_err(re) - cs: CancelScope = ctx._scope - - if cs.cancel_called: + if rpc_ctx_cs.cancel_called: canceller: tuple = ctx.canceller explain: str = f'{ctx.side!r}-side task was cancelled by ' @@ -679,9 +690,15 @@ async def _invoke( elif canceller == ctx.chan.uid: explain += f'its {ctx.peer_side!r}-side peer' - else: + elif canceller == our_uid: + explain += 'itself' + + elif canceller: explain += 'a remote peer' + else: + explain += 'an unknown cause?' + explain += ( add_div(message=explain) + @@ -910,7 +927,10 @@ async def process_messages( f'IPC msg from peer\n' f'<= {chan.uid}\n\n' - # TODO: avoid fmting depending on loglevel for perf? + # TODO: use of the pprinting of structs is + # FRAGILE and should prolly not be + # + # avoid fmting depending on loglevel for perf? # -[ ] specifically `pretty_struct.pformat()` sub-call..? # - how to only log-level-aware actually call this? # -[ ] use `.msg.pretty_struct` here now instead! @@ -1237,7 +1257,7 @@ async def process_messages( 'Exiting IPC msg loop with final msg\n\n' f'<= peer: {chan.uid}\n' f' |_{chan}\n\n' - f'{pretty_struct.pformat(msg)}' + # f'{pretty_struct.pformat(msg)}' ) log.runtime(message) -- 2.34.1 From 3eef9aeac5a88eb3298d8fe74c69b826c6c0b02d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:17:05 -0400 Subject: [PATCH 175/305] Use `msgspec.Struct.__repr__()` failover impl In case the struct doesn't import a field type (which will cause the `.pformat()` to raise) just report the issue and try to fall back to the original `repr()` version. --- tractor/msg/pretty_struct.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index f27fb89c..15e469e2 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -34,6 +34,9 @@ from pprint import ( saferepr, ) +from tractor.log import get_logger + +log = get_logger() # TODO: auto-gen type sig for input func both for # type-msgs and logging of RPC tasks? # taken and modified from: @@ -143,7 +146,13 @@ def pformat( else: # the `pprint` recursion-safe format: # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - val_str: str = saferepr(v) + try: + val_str: str = saferepr(v) + except Exception: + log.exception( + 'Failed to `saferepr({type(struct)})` !?\n' + ) + return _Struct.__repr__(struct) # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') @@ -194,12 +203,20 @@ class Struct( return sin_props pformat = pformat + # __repr__ = pformat # __str__ = __repr__ = pformat # TODO: use a pprint.PrettyPrinter instance around ONLY rendering # inside a known tty? # def __repr__(self) -> str: # ... - __repr__ = pformat + def __repr__(self) -> str: + try: + return pformat(self) + except Exception: + log.exception( + f'Failed to `pformat({type(self)})` !?\n' + ) + return _Struct.__repr__(self) def copy( self, -- 2.34.1 From d105da0fcf19fe2e1b85d2065c7ee2ecce452e6a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:24:03 -0400 Subject: [PATCH 176/305] (Re)type annot some tests - For the (still not finished) `test_caps_based_msging`, switch to using the new `PayloadMsg`. - add `testdir` fixture type. --- tests/test_caps_based_msging.py | 45 ++++++++++++--------------------- tests/test_docs_examples.py | 2 +- 2 files changed, 17 insertions(+), 30 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 9a73ba8d..6064c2cf 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -11,9 +11,6 @@ from typing import ( Type, Union, ) -from contextvars import ( - Context, -) from msgspec import ( structs, @@ -27,6 +24,7 @@ import tractor from tractor import ( _state, MsgTypeError, + Context, ) from tractor.msg import ( _codec, @@ -41,7 +39,7 @@ from tractor.msg import ( from tractor.msg.types import ( _payload_msgs, log, - Msg, + PayloadMsg, Started, mk_msg_spec, ) @@ -61,7 +59,7 @@ def mk_custom_codec( uid: tuple[str, str] = tractor.current_actor().uid # XXX NOTE XXX: despite defining `NamespacePath` as a type - # field on our `Msg.pld`, we still need a enc/dec_hook() pair + # field on our `PayloadMsg.pld`, we still need a enc/dec_hook() pair # to cast to/from that type on the wire. See the docs: # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types @@ -321,12 +319,12 @@ def dec_type_union( import importlib types: list[Type] = [] for type_name in type_names: - for ns in [ + for mod in [ typing, importlib.import_module(__name__), ]: if type_ref := getattr( - ns, + mod, type_name, False, ): @@ -744,7 +742,7 @@ def chk_pld_type( # 'Error', .pld: ErrorData codec: MsgCodec = mk_codec( - # NOTE: this ONLY accepts `Msg.pld` fields of a specified + # NOTE: this ONLY accepts `PayloadMsg.pld` fields of a specified # type union. ipc_pld_spec=payload_spec, ) @@ -752,7 +750,7 @@ def chk_pld_type( # make a one-off dec to compare with our `MsgCodec` instance # which does the below `mk_msg_spec()` call internally ipc_msg_spec: Union[Type[Struct]] - msg_types: list[Msg[payload_spec]] + msg_types: list[PayloadMsg[payload_spec]] ( ipc_msg_spec, msg_types, @@ -761,7 +759,7 @@ def chk_pld_type( ) _enc = msgpack.Encoder() _dec = msgpack.Decoder( - type=ipc_msg_spec or Any, # like `Msg[Any]` + type=ipc_msg_spec or Any, # like `PayloadMsg[Any]` ) assert ( @@ -806,7 +804,7 @@ def chk_pld_type( 'cid': '666', 'pld': pld, } - enc_msg: Msg = typedef(**kwargs) + enc_msg: PayloadMsg = typedef(**kwargs) _wire_bytes: bytes = _enc.encode(enc_msg) wire_bytes: bytes = codec.enc.encode(enc_msg) @@ -883,25 +881,16 @@ def test_limit_msgspec(): debug_mode=True ): - # ensure we can round-trip a boxing `Msg` + # ensure we can round-trip a boxing `PayloadMsg` assert chk_pld_type( - # Msg, - Any, - None, + payload_spec=Any, + pld=None, expect_roundtrip=True, ) - # TODO: don't need this any more right since - # `msgspec>=0.15` has the nice generics stuff yah?? - # - # manually override the type annot of the payload - # field and ensure it propagates to all msg-subtypes. - # Msg.__annotations__['pld'] = Any - # verify that a mis-typed payload value won't decode assert not chk_pld_type( - # Msg, - int, + payload_spec=int, pld='doggy', ) @@ -913,18 +902,16 @@ def test_limit_msgspec(): value: Any assert not chk_pld_type( - # Msg, - CustomPayload, + payload_spec=CustomPayload, pld='doggy', ) assert chk_pld_type( - # Msg, - CustomPayload, + payload_spec=CustomPayload, pld=CustomPayload(name='doggy', value='urmom') ) - # uhh bc we can `.pause_from_sync()` now! :surfer: + # yah, we can `.pause_from_sync()` now! # breakpoint() trio.run(main) diff --git a/tests/test_docs_examples.py b/tests/test_docs_examples.py index 63ad07a2..fdf54bca 100644 --- a/tests/test_docs_examples.py +++ b/tests/test_docs_examples.py @@ -19,7 +19,7 @@ from tractor._testing import ( @pytest.fixture def run_example_in_subproc( loglevel: str, - testdir, + testdir: pytest.Testdir, reg_addr: tuple[str, int], ): -- 2.34.1 From 8de79372b72a8cbc2e6e8f972cc278b420e9e7b3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:27:13 -0400 Subject: [PATCH 177/305] Woops, set `.cancel()` level in custom levels table.. --- tractor/log.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tractor/log.py b/tractor/log.py index edb058e3..47f1f259 100644 --- a/tractor/log.py +++ b/tractor/log.py @@ -54,11 +54,12 @@ LOG_FORMAT = ( DATE_FORMAT = '%b %d %H:%M:%S' # FYI, ERROR is 40 +# TODO: use a `bidict` to avoid the :155 check? CUSTOM_LEVELS: dict[str, int] = { 'TRANSPORT': 5, 'RUNTIME': 15, 'DEVX': 17, - 'CANCEL': 18, + 'CANCEL': 22, 'PDB': 500, } STD_PALETTE = { @@ -147,6 +148,8 @@ class StackLevelAdapter(LoggerAdapter): Delegate a log call to the underlying logger, after adding contextual information from this adapter instance. + NOTE: all custom level methods (above) delegate to this! + ''' if self.isEnabledFor(level): stacklevel: int = 3 -- 2.34.1 From 417f4f7255a555162fcbb970c5a5f41f710653bd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:28:12 -0400 Subject: [PATCH 178/305] Refine some `.trionics` docs and logging - allow passing and report the lib name (`trio` or `tractor`) from `maybe_open_nursery()`. - use `.runtime()` level when reporting `_Cache`-hits in `maybe_open_context()`. - tidy up some doc strings. --- tractor/trionics/_broadcast.py | 9 +++--- tractor/trionics/_mngrs.py | 53 ++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 23 deletions(-) diff --git a/tractor/trionics/_broadcast.py b/tractor/trionics/_broadcast.py index a5d31871..977b6828 100644 --- a/tractor/trionics/_broadcast.py +++ b/tractor/trionics/_broadcast.py @@ -156,11 +156,12 @@ class BroadcastState(Struct): class BroadcastReceiver(ReceiveChannel): ''' - A memory receive channel broadcaster which is non-lossy for the - fastest consumer. + A memory receive channel broadcaster which is non-lossy for + the fastest consumer. - Additional consumer tasks can receive all produced values by registering - with ``.subscribe()`` and receiving from the new instance it delivers. + Additional consumer tasks can receive all produced values by + registering with ``.subscribe()`` and receiving from the new + instance it delivers. ''' def __init__( diff --git a/tractor/trionics/_mngrs.py b/tractor/trionics/_mngrs.py index 08e70ad2..fd224d65 100644 --- a/tractor/trionics/_mngrs.py +++ b/tractor/trionics/_mngrs.py @@ -18,8 +18,12 @@ Async context manager primitives with hard ``trio``-aware semantics ''' -from contextlib import asynccontextmanager as acm +from __future__ import annotations +from contextlib import ( + asynccontextmanager as acm, +) import inspect +from types import ModuleType from typing import ( Any, AsyncContextManager, @@ -30,13 +34,16 @@ from typing import ( Optional, Sequence, TypeVar, + TYPE_CHECKING, ) import trio - from tractor._state import current_actor from tractor.log import get_logger +if TYPE_CHECKING: + from tractor import ActorNursery + log = get_logger(__name__) @@ -46,8 +53,10 @@ T = TypeVar("T") @acm async def maybe_open_nursery( - nursery: trio.Nursery | None = None, + nursery: trio.Nursery|ActorNursery|None = None, shield: bool = False, + lib: ModuleType = trio, + ) -> AsyncGenerator[trio.Nursery, Any]: ''' Create a new nursery if None provided. @@ -58,13 +67,12 @@ async def maybe_open_nursery( if nursery is not None: yield nursery else: - async with trio.open_nursery() as nursery: + async with lib.open_nursery() as nursery: nursery.cancel_scope.shield = shield yield nursery async def _enter_and_wait( - mngr: AsyncContextManager[T], unwrapped: dict[int, T], all_entered: trio.Event, @@ -91,7 +99,6 @@ async def _enter_and_wait( @acm async def gather_contexts( - mngrs: Sequence[AsyncContextManager[T]], ) -> AsyncGenerator[ @@ -102,15 +109,17 @@ async def gather_contexts( None, ]: ''' - Concurrently enter a sequence of async context managers, each in - a separate ``trio`` task and deliver the unwrapped values in the - same order once all managers have entered. On exit all contexts are - subsequently and concurrently exited. + Concurrently enter a sequence of async context managers (acms), + each from a separate `trio` task and deliver the unwrapped + `yield`-ed values in the same order once all managers have entered. - This function is somewhat similar to common usage of - ``contextlib.AsyncExitStack.enter_async_context()`` (in a loop) in - combo with ``asyncio.gather()`` except the managers are concurrently - entered and exited, and cancellation just works. + On exit, all acms are subsequently and concurrently exited. + + This function is somewhat similar to a batch of non-blocking + calls to `contextlib.AsyncExitStack.enter_async_context()` + (inside a loop) *in combo with* a `asyncio.gather()` to get the + `.__aenter__()`-ed values, except the managers are both + concurrently entered and exited and *cancellation just works*(R). ''' seed: int = id(mngrs) @@ -210,9 +219,10 @@ async def maybe_open_context( ) -> AsyncIterator[tuple[bool, T]]: ''' - Maybe open a context manager if there is not already a _Cached - version for the provided ``key`` for *this* actor. Return the - _Cached instance on a _Cache hit. + Maybe open an async-context-manager (acm) if there is not already + a `_Cached` version for the provided (input) `key` for *this* actor. + + Return the `_Cached` instance on a _Cache hit. ''' fid = id(acm_func) @@ -273,8 +283,13 @@ async def maybe_open_context( else: _Cache.users += 1 log.runtime( - f'Reusing resource for `_Cache` user {_Cache.users}\n\n' - f'{ctx_key!r} -> {yielded!r}\n' + f'Re-using cached resource for user {_Cache.users}\n\n' + f'{ctx_key!r} -> {type(yielded)}\n' + + # TODO: make this work with values but without + # `msgspec.Struct` causing frickin crashes on field-type + # lookups.. + # f'{ctx_key!r} -> {yielded!r}\n' ) lock.release() yield True, yielded -- 2.34.1 From ef7f34ca1cef91bc9ab399e5aeee941486222ac9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 12:21:26 -0400 Subject: [PATCH 179/305] More formal `TransportClosed` reporting/raising Since it was all ad-hoc defined inside `._ipc.MsgpackTCPStream._iter_pkts()` more or less, this starts formalizing a way for particular transport backends to indicate whether a disconnect condition should be re-raised in the RPC msg loop and if not what log level to report it at (if any). Based on our lone transport currently we try to suppress any logging noise from ephemeral connections expected during normal actor interaction and discovery subsys ops: - any short lived discovery related TCP connects are only logged as `.transport()` level. - both `.error()` and raise on any underlying `trio.ClosedResource` cause since that normally means some task touched transport layer internals that it shouldn't have. - do a `.warning()` on anything else unexpected. Impl deats: - extend the `._exceptions.TransportClosed` to accept an input log level, raise-on-report toggle and custom reporting & raising via a new `.report_n_maybe_raise()` method. - construct the TCs with inputs per case in (the newly named) `._iter_pkts(). - call ^ this method from the `TransportClosed` handler block inside the RPC msg loop thus delegating reporting levels and/or raising to the backend's per-case TC instantiating. Related `._ipc` changes: - mask out all the `MsgpackTCPStream._codec` debug helper stuff and drop any lingering cruft from the initial proto-ing of msg-codecs. - rename some attrs/methods: |_`MsgpackTCPStream._iter_packets()` -> `._iter_pkts()` and `._agen` -> `_aiter_pkts`. |_`Channel._aiter_recv()` -> `._aiter_msgs()` and `._agen` -> `_aiter_msgs`. - add `hide_tb: bool` support to `Channel.send()` and only show the frame on non-MTEs. --- tractor/__init__.py | 1 + tractor/_exceptions.py | 55 +++++++- tractor/_ipc.py | 284 +++++++++++++++++++++++++++-------------- tractor/_rpc.py | 20 ++- 4 files changed, 259 insertions(+), 101 deletions(-) diff --git a/tractor/__init__.py b/tractor/__init__.py index c15a391b..4f9e592d 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -50,6 +50,7 @@ from ._exceptions import ( ModuleNotExposed as ModuleNotExposed, MsgTypeError as MsgTypeError, RemoteActorError as RemoteActorError, + TransportClosed as TransportClosed, ) from .devx import ( breakpoint as breakpoint, diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 7164d6ab..108134ca 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -906,8 +906,59 @@ class StreamOverrun( ''' -class TransportClosed(trio.ClosedResourceError): - "Underlying channel transport was closed prior to use" +class TransportClosed(trio.BrokenResourceError): + ''' + IPC transport (protocol) connection was closed or broke and + indicates that the wrapping communication `Channel` can no longer + be used to send/receive msgs from the remote peer. + + ''' + def __init__( + self, + message: str, + loglevel: str = 'transport', + cause: BaseException|None = None, + raise_on_report: bool = False, + + ) -> None: + self.message: str = message + self._loglevel = loglevel + super().__init__(message) + + if cause is not None: + self.__cause__ = cause + + # flag to toggle whether the msg loop should raise + # the exc in its `TransportClosed` handler block. + self._raise_on_report = raise_on_report + + def report_n_maybe_raise( + self, + message: str|None = None, + + ) -> None: + ''' + Using the init-specified log level emit a logging report + for this error. + + ''' + message: str = message or self.message + # when a cause is set, slap it onto the log emission. + if cause := self.__cause__: + cause_tb_str: str = ''.join( + traceback.format_tb(cause.__traceback__) + ) + message += ( + f'{cause_tb_str}\n' # tb + f' {cause}\n' # exc repr + ) + + getattr(log, self._loglevel)(message) + + # some errors we want to blow up from + # inside the RPC msg loop + if self._raise_on_report: + raise self from cause class NoResult(RuntimeError): diff --git a/tractor/_ipc.py b/tractor/_ipc.py index e5e3d10f..a1cb0359 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -54,7 +54,7 @@ from tractor._exceptions import ( ) from tractor.msg import ( _ctxvar_MsgCodec, - _codec, + # _codec, XXX see `self._codec` sanity/debug checks MsgCodec, types as msgtypes, pretty_struct, @@ -65,8 +65,18 @@ log = get_logger(__name__) _is_windows = platform.system() == 'Windows' -def get_stream_addrs(stream: trio.SocketStream) -> tuple: - # should both be IP sockets +def get_stream_addrs( + stream: trio.SocketStream +) -> tuple[ + tuple[str, int], # local + tuple[str, int], # remote +]: + ''' + Return the `trio` streaming transport prot's socket-addrs for + both the local and remote sides as a pair. + + ''' + # rn, should both be IP sockets lsockname = stream.socket.getsockname() rsockname = stream.socket.getpeername() return ( @@ -75,17 +85,22 @@ def get_stream_addrs(stream: trio.SocketStream) -> tuple: ) -# TODO: this should be our `Union[*msgtypes.__spec__]` now right? -MsgType = TypeVar("MsgType") - -# TODO: consider using a generic def and indexing with our eventual -# msg definition/types? -# - https://docs.python.org/3/library/typing.html#typing.Protocol -# - https://jcristharif.com/msgspec/usage.html#structs +# from tractor.msg.types import MsgType +# ?TODO? this should be our `Union[*msgtypes.__spec__]` alias now right..? +# => BLEH, except can't bc prots must inherit typevar or param-spec +# vars.. +MsgType = TypeVar('MsgType') +# TODO: break up this mod into a subpkg so we can start adding new +# backends and move this type stuff into a dedicated file.. Bo +# @runtime_checkable class MsgTransport(Protocol[MsgType]): +# +# ^-TODO-^ consider using a generic def and indexing with our +# eventual msg definition/types? +# - https://docs.python.org/3/library/typing.html#typing.Protocol stream: trio.SocketStream drained: list[MsgType] @@ -120,9 +135,9 @@ class MsgTransport(Protocol[MsgType]): ... -# TODO: not sure why we have to inherit here, but it seems to be an -# issue with ``get_msg_transport()`` returning a ``Type[Protocol]``; -# probably should make a `mypy` issue? +# TODO: typing oddity.. not sure why we have to inherit here, but it +# seems to be an issue with `get_msg_transport()` returning +# a `Type[Protocol]`; probably should make a `mypy` issue? class MsgpackTCPStream(MsgTransport): ''' A ``trio.SocketStream`` delivering ``msgpack`` formatted data @@ -145,7 +160,7 @@ class MsgpackTCPStream(MsgTransport): # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types # # TODO: define this as a `Codec` struct which can be - # overriden dynamically by the application/runtime. + # overriden dynamically by the application/runtime? codec: tuple[ Callable[[Any], Any]|None, # coder Callable[[type, Any], Any]|None, # decoder @@ -160,7 +175,7 @@ class MsgpackTCPStream(MsgTransport): self._laddr, self._raddr = get_stream_addrs(stream) # create read loop instance - self._agen = self._iter_packets() + self._aiter_pkts = self._iter_packets() self._send_lock = trio.StrictFIFOLock() # public i guess? @@ -174,15 +189,12 @@ class MsgpackTCPStream(MsgTransport): # allow for custom IPC msg interchange format # dynamic override Bo self._task = trio.lowlevel.current_task() - self._codec: MsgCodec = ( - codec - or - _codec._ctxvar_MsgCodec.get() - ) - # TODO: mask out before release? - # log.runtime( - # f'New {self} created with codec\n' - # f'codec: {self._codec}\n' + + # XXX for ctxvar debug only! + # self._codec: MsgCodec = ( + # codec + # or + # _codec._ctxvar_MsgCodec.get() # ) async def _iter_packets(self) -> AsyncGenerator[dict, None]: @@ -190,6 +202,11 @@ class MsgpackTCPStream(MsgTransport): Yield `bytes`-blob decoded packets from the underlying TCP stream using the current task's `MsgCodec`. + This is a streaming routine implemented as an async generator + func (which was the original design, but could be changed?) + and is allocated by a `.__call__()` inside `.__init__()` where + it is assigned to the `._aiter_pkts` attr. + ''' decodes_failed: int = 0 @@ -204,16 +221,82 @@ class MsgpackTCPStream(MsgTransport): # seem to be getting racy failures here on # arbiter/registry name subs.. trio.BrokenResourceError, - ): - raise TransportClosed( - f'transport {self} was already closed prior ro read' - ) + ) as trans_err: + + loglevel = 'transport' + match trans_err: + # case ( + # ConnectionResetError() + # ): + # loglevel = 'transport' + + # peer actor (graceful??) TCP EOF but `tricycle` + # seems to raise a 0-bytes-read? + case ValueError() if ( + 'unclean EOF' in trans_err.args[0] + ): + pass + + # peer actor (task) prolly shutdown quickly due + # to cancellation + case trio.BrokenResourceError() if ( + 'Connection reset by peer' in trans_err.args[0] + ): + pass + + # unless the disconnect condition falls under "a + # normal operation breakage" we usualy console warn + # about it. + case _: + loglevel: str = 'warning' + + + raise TransportClosed( + message=( + f'IPC transport already closed by peer\n' + f'x)> {type(trans_err)}\n' + f' |_{self}\n' + ), + loglevel=loglevel, + ) from trans_err + + # XXX definitely can happen if transport is closed + # manually by another `trio.lowlevel.Task` in the + # same actor; we use this in some simulated fault + # testing for ex, but generally should never happen + # under normal operation! + # + # NOTE: as such we always re-raise this error from the + # RPC msg loop! + except trio.ClosedResourceError as closure_err: + raise TransportClosed( + message=( + f'IPC transport already manually closed locally?\n' + f'x)> {type(closure_err)} \n' + f' |_{self}\n' + ), + loglevel='error', + raise_on_report=( + closure_err.args[0] == 'another task closed this fd' + or + closure_err.args[0] in ['another task closed this fd'] + ), + ) from closure_err + + # graceful TCP EOF disconnect if header == b'': raise TransportClosed( - f'transport {self} was already closed prior ro read' + message=( + f'IPC transport already gracefully closed\n' + f')>\n' + f'|_{self}\n' + ), + loglevel='transport', + # cause=??? # handy or no? ) + size: int size, = struct.unpack(" None: ''' Send a msgpack encoded py-object-blob-as-msg over TCP. @@ -304,21 +375,24 @@ class MsgpackTCPStream(MsgTransport): invalid msg type ''' - # __tracebackhide__: bool = hide_tb + __tracebackhide__: bool = hide_tb + + # XXX see `trio._sync.AsyncContextManagerMixin` for details + # on the `.acquire()`/`.release()` sequencing.. async with self._send_lock: # NOTE: lookup the `trio.Task.context`'s var for # the current `MsgCodec`. codec: MsgCodec = _ctxvar_MsgCodec.get() - # TODO: mask out before release? - if self._codec.pld_spec != codec.pld_spec: - self._codec = codec - log.runtime( - f'Using new codec in {self}.send()\n' - f'codec: {self._codec}\n\n' - f'msg: {msg}\n' - ) + # XXX for ctxvar debug only! + # if self._codec.pld_spec != codec.pld_spec: + # self._codec = codec + # log.runtime( + # f'Using new codec in {self}.send()\n' + # f'codec: {self._codec}\n\n' + # f'msg: {msg}\n' + # ) if type(msg) not in msgtypes.__msg_types__: if strict_types: @@ -352,6 +426,16 @@ class MsgpackTCPStream(MsgTransport): size: bytes = struct.pack(" + # except BaseException as _err: + # err = _err + # if not isinstance(err, MsgTypeError): + # __tracebackhide__: bool = False + # raise + @property def laddr(self) -> tuple[str, int]: return self._laddr @@ -361,7 +445,7 @@ class MsgpackTCPStream(MsgTransport): return self._raddr async def recv(self) -> Any: - return await self._agen.asend(None) + return await self._aiter_pkts.asend(None) async def drain(self) -> AsyncIterator[dict]: ''' @@ -378,7 +462,7 @@ class MsgpackTCPStream(MsgTransport): yield msg def __aiter__(self): - return self._agen + return self._aiter_pkts def connected(self) -> bool: return self.stream.socket.fileno() != -1 @@ -433,7 +517,7 @@ class Channel: # set after handshake - always uid of far end self.uid: tuple[str, str]|None = None - self._agen = self._aiter_recv() + self._aiter_msgs = self._iter_msgs() self._exc: Exception|None = None # set if far end actor errors self._closed: bool = False @@ -497,8 +581,6 @@ class Channel: ) return self._transport - # TODO: something simliar at the IPC-`Context` - # level so as to support @cm def apply_codec( self, @@ -517,6 +599,7 @@ class Channel: finally: self._transport.codec = orig + # TODO: do a .src/.dst: str for maddrs? def __repr__(self) -> str: if not self._transport: return '' @@ -560,27 +643,43 @@ class Channel: ) return transport + # TODO: something like, + # `pdbp.hideframe_on(errors=[MsgTypeError])` + # instead of the `try/except` hack we have rn.. + # seems like a pretty useful thing to have in general + # along with being able to filter certain stack frame(s / sets) + # possibly based on the current log-level? async def send( self, payload: Any, - # hide_tb: bool = False, + hide_tb: bool = False, ) -> None: ''' Send a coded msg-blob over the transport. ''' - # __tracebackhide__: bool = hide_tb - log.transport( - '=> send IPC msg:\n\n' - f'{pformat(payload)}\n' - ) # type: ignore - assert self._transport - await self._transport.send( - payload, - # hide_tb=hide_tb, - ) + __tracebackhide__: bool = hide_tb + try: + log.transport( + '=> send IPC msg:\n\n' + f'{pformat(payload)}\n' + ) + # assert self._transport # but why typing? + await self._transport.send( + payload, + hide_tb=hide_tb, + ) + except BaseException as _err: + err = _err # bind for introspection + if not isinstance(_err, MsgTypeError): + # assert err + __tracebackhide__: bool = False + else: + assert err.cid + + raise async def recv(self) -> Any: assert self._transport @@ -617,8 +716,11 @@ class Channel: await self.aclose(*args) def __aiter__(self): - return self._agen + return self._aiter_msgs + # ?TODO? run any reconnection sequence? + # -[ ] prolly should be impl-ed as deco-API? + # # async def _reconnect(self) -> None: # """Handle connection failures by polling until a reconnect can be # established. @@ -636,7 +738,6 @@ class Channel: # else: # log.transport("Stream connection re-established!") - # # TODO: run any reconnection sequence # # on_recon = self._recon_seq # # if on_recon: # # await on_recon(self) @@ -650,11 +751,17 @@ class Channel: # " for re-establishment") # await trio.sleep(1) - async def _aiter_recv( + async def _iter_msgs( self ) -> AsyncGenerator[Any, None]: ''' - Async iterate items from underlying stream. + Yield `MsgType` IPC msgs decoded and deliverd from + an underlying `MsgTransport` protocol. + + This is a streaming routine alo implemented as an async-gen + func (same a `MsgTransport._iter_pkts()`) gets allocated by + a `.__call__()` inside `.__init__()` where it is assigned to + the `._aiter_msgs` attr. ''' assert self._transport @@ -680,15 +787,6 @@ class Channel: case _: yield msg - # TODO: if we were gonna do this it should be - # done up at the `MsgStream` layer! - # - # sent = yield item - # if sent is not None: - # # optimization, passing None through all the - # # time is pointless - # await self._transport.send(sent) - except trio.BrokenResourceError: # if not self._autorecon: diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 09d203b7..a77c2af7 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -1196,7 +1196,7 @@ async def process_messages( parent_chan=chan, ) - except TransportClosed: + except TransportClosed as tc: # channels "breaking" (for TCP streams by EOF or 104 # connection-reset) is ok since we don't have a teardown # handshake for them (yet) and instead we simply bail out of @@ -1204,12 +1204,20 @@ async def process_messages( # up.. # # TODO: maybe add a teardown handshake? and, - # -[ ] don't show this msg if it's an ephemeral discovery ep call? + # -[x] don't show this msg if it's an ephemeral discovery ep call? + # |_ see the below `.report_n_maybe_raise()` impl as well as + # tc-exc input details in `MsgpackTCPStream._iter_pkts()` + # for different read-failure cases. # -[ ] figure out how this will break with other transports? - log.runtime( - f'IPC channel closed abruptly\n' - f'<=x peer: {chan.uid}\n' - f' |_{chan.raddr}\n' + tc.report_n_maybe_raise( + message=( + f'peer IPC channel closed abruptly?\n\n' + f'<=x {chan}\n' + f' |_{chan.raddr}\n\n' + ) + + + tc.message + ) # transport **WAS** disconnected -- 2.34.1 From 1d9e60626c1918eaff664d33cc2411b51a13f9b0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 15:53:33 -0400 Subject: [PATCH 180/305] Add `Portal.chan` property, to wrap `._chan` attr --- tractor/_portal.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index 0f698836..3dc7f3a1 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -97,7 +97,7 @@ class Portal: channel: Channel, ) -> None: - self.chan = channel + self._chan: Channel = channel # during the portal's lifetime self._final_result_pld: Any|None = None self._final_result_msg: PayloadMsg|None = None @@ -109,6 +109,10 @@ class Portal: self._streams: set[MsgStream] = set() self.actor: Actor = current_actor() + @property + def chan(self) -> Channel: + return self._chan + @property def channel(self) -> Channel: ''' -- 2.34.1 From 9bc7be30bf2adaa034c2b1e63bb105b08d4f9122 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 16:00:26 -0400 Subject: [PATCH 181/305] Reraise RAEs in `MsgStream.receive()`; truncate tbs To avoid showing lowlevel details of exception handling around the underlying call to `return await self._ctx._pld_rx.recv_pld(ipc=self)`, any time a `RemoteActorError` is unpacked (an raised locally) we re-raise it directly from the captured `src_err` captured so as to present to the user/app caller-code an exception raised directly from the `.receive()` frame. This simplifies traceback call-stacks for any `log.exception()` or `pdb`-REPL output filtering out the lower `PldRx` frames by default. --- tractor/_streaming.py | 109 +++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 59 deletions(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 314a93b8..bc87164e 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -36,8 +36,8 @@ import warnings import trio from ._exceptions import ( - # _raise_from_no_key_in_msg, ContextCancelled, + RemoteActorError, ) from .log import get_logger from .trionics import ( @@ -101,7 +101,7 @@ class MsgStream(trio.abc.Channel): @property def ctx(self) -> Context: ''' - This stream's IPC `Context` ref. + A read-only ref to this stream's inter-actor-task `Context`. ''' return self._ctx @@ -145,9 +145,8 @@ class MsgStream(trio.abc.Channel): ''' __tracebackhide__: bool = hide_tb - # NOTE: `trio.ReceiveChannel` implements - # EOC handling as follows (aka uses it - # to gracefully exit async for loops): + # NOTE FYI: `trio.ReceiveChannel` implements EOC handling as + # follows (aka uses it to gracefully exit async for loops): # # async def __anext__(self) -> ReceiveType: # try: @@ -165,48 +164,29 @@ class MsgStream(trio.abc.Channel): src_err: Exception|None = None # orig tb try: - ctx: Context = self._ctx return await ctx._pld_rx.recv_pld(ipc=self) # XXX: the stream terminates on either of: - # - via `self._rx_chan.receive()` raising after manual closure - # by the rpc-runtime OR, - # - via a received `{'stop': ...}` msg from remote side. - # |_ NOTE: previously this was triggered by calling - # ``._rx_chan.aclose()`` on the send side of the channel inside - # `Actor._deliver_ctx_payload()`, but now the 'stop' message handling - # has been put just above inside `_raise_from_no_key_in_msg()`. - except ( - trio.EndOfChannel, - ) as eoc: - src_err = eoc + # - `self._rx_chan.receive()` raising after manual closure + # by the rpc-runtime, + # OR + # - via a `Stop`-msg received from remote peer task. + # NOTE + # |_ previously this was triggered by calling + # ``._rx_chan.aclose()`` on the send side of the channel + # inside `Actor._deliver_ctx_payload()`, but now the 'stop' + # message handling gets delegated to `PldRFx.recv_pld()` + # internals. + except trio.EndOfChannel as eoc: + # a graceful stream finished signal self._eoc = eoc + src_err = eoc - # TODO: Locally, we want to close this stream gracefully, by - # terminating any local consumers tasks deterministically. - # Once we have broadcast support, we **don't** want to be - # closing this stream and not flushing a final value to - # remaining (clone) consumers who may not have been - # scheduled to receive it yet. - # try: - # maybe_err_msg_or_res: dict = self._rx_chan.receive_nowait() - # if maybe_err_msg_or_res: - # log.warning( - # 'Discarding un-processed msg:\n' - # f'{maybe_err_msg_or_res}' - # ) - # except trio.WouldBlock: - # # no queued msgs that might be another remote - # # error, so just raise the original EoC - # pass - - # raise eoc - - # a ``ClosedResourceError`` indicates that the internal - # feeder memory receive channel was closed likely by the - # runtime after the associated transport-channel - # disconnected or broke. + # a `ClosedResourceError` indicates that the internal feeder + # memory receive channel was closed likely by the runtime + # after the associated transport-channel disconnected or + # broke. except trio.ClosedResourceError as cre: # by self._rx_chan.receive() src_err = cre log.warning( @@ -218,14 +198,15 @@ class MsgStream(trio.abc.Channel): # terminated and signal this local iterator to stop drained: list[Exception|dict] = await self.aclose() if drained: + # ?TODO? pass these to the `._ctx._drained_msgs: deque` + # and then iterate them as part of any `.wait_for_result()` call? + # # from .devx import pause # await pause() log.warning( - 'Drained context msgs during closure:\n' + 'Drained context msgs during closure\n\n' f'{drained}' ) - # TODO: pass these to the `._ctx._drained_msgs: deque` - # and then iterate them as part of any `.result()` call? # NOTE XXX: if the context was cancelled or remote-errored # but we received the stream close msg first, we @@ -238,28 +219,36 @@ class MsgStream(trio.abc.Channel): from_src_exc=src_err, ) - # propagate any error but hide low-level frame details - # from the caller by default for debug noise reduction. + # propagate any error but hide low-level frame details from + # the caller by default for console/debug-REPL noise + # reduction. if ( hide_tb + and ( - # XXX NOTE XXX don't reraise on certain - # stream-specific internal error types like, - # - # - `trio.EoC` since we want to use the exact instance - # to ensure that it is the error that bubbles upward - # for silent absorption by `Context.open_stream()`. - and not self._eoc + # XXX NOTE special conditions: don't reraise on + # certain stream-specific internal error types like, + # + # - `trio.EoC` since we want to use the exact instance + # to ensure that it is the error that bubbles upward + # for silent absorption by `Context.open_stream()`. + not self._eoc - # - `RemoteActorError` (or `ContextCancelled`) if it gets - # raised from `_raise_from_no_key_in_msg()` since we - # want the same (as the above bullet) for any - # `.open_context()` block bubbled error raised by - # any nearby ctx API remote-failures. - # and not isinstance(src_err, RemoteActorError) + # - `RemoteActorError` (or subtypes like ctxc) + # since we want to present the error as though it is + # "sourced" directly from this `.receive()` call and + # generally NOT include the stack frames raised from + # inside the `PldRx` and/or the transport stack + # layers. + or isinstance(src_err, RemoteActorError) + ) ): raise type(src_err)(*src_err.args) from src_err else: + # for any non-graceful-EOC we want to NOT hide this frame + if not self._eoc: + __tracebackhide__: bool = False + raise src_err async def aclose(self) -> list[Exception|dict]: @@ -385,6 +374,8 @@ class MsgStream(trio.abc.Channel): if not self._eoc: message: str = ( f'Stream self-closed by {self._ctx.side!r}-side before EoC\n' + # } bc a stream is a "scope"/msging-phase inside an IPC + f'x}}>\n' f'|_{self}\n' ) log.cancel(message) -- 2.34.1 From b2087404e3d20f3e8bb4105f368fc0c69dde41ff Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 16:31:58 -0400 Subject: [PATCH 182/305] Use `._entry` proto-ed "lifetime ops" in logging As per a WIP scribbled out TODO in `._entry.nest_from_op()`, change a bunch of "supervisor/lifetime mgmt ops" related log messages to contain some supervisor-annotation "headers" in an effort to give a terser "visual indication" of how some execution/scope/storage primitive entity (like an actor/task/ctx/connection) is being operated on (like, opening/started/closed/cancelled/erroring) from a "supervisor action" POV. Also tweak a bunch more emissions to lower levels to reduce noise around normal inter-actor operations like process and IPC ctx supervision. --- tractor/_context.py | 26 ++++++++++++++++++++------ tractor/_entry.py | 2 +- tractor/_runtime.py | 42 +++++++++++++++++++++++++----------------- tractor/_spawn.py | 25 ++++++++++++++++--------- 4 files changed, 62 insertions(+), 33 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 9b4cb6e6..7b1ac296 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -933,13 +933,14 @@ class Context: self.cancel_called = True header: str = ( - f'Cancelling ctx with peer from {side.upper()} side\n\n' + f'Cancelling ctx from {side.upper()}-side\n' ) reminfo: str = ( # ' =>\n' - f'Context.cancel() => {self.chan.uid}\n' + # f'Context.cancel() => {self.chan.uid}\n' + f'c)=> {self.chan.uid}\n' # f'{self.chan.uid}\n' - f' |_ @{self.dst_maddr}\n' + f' |_ @{self.dst_maddr}\n' f' >> {self.repr_rpc}\n' # f' >> {self._nsf}() -> {codec}[dict]:\n\n' # TODO: pull msg-type from spec re #320 @@ -1267,6 +1268,12 @@ class Context: @property def maybe_error(self) -> BaseException|None: + ''' + Return the (remote) error as outcome or `None`. + + Remote errors take precedence over local ones. + + ''' le: BaseException|None = self._local_error re: RemoteActorError|ContextCancelled|None = self._remote_error @@ -2182,9 +2189,16 @@ async def open_context_from_portal( # handled in the block above ^^^ !! # await _debug.pause() # log.cancel( - log.exception( - f'{ctx.side}-side of `Context` terminated with ' - f'.outcome => {ctx.repr_outcome()}\n' + match scope_err: + case trio.Cancelled: + logmeth = log.cancel + + # XXX explicitly report on any non-graceful-taskc cases + case _: + logmeth = log.exception + + logmeth( + f'ctx {ctx.side!r}-side exited with {ctx.repr_outcome()}\n' ) if debug_mode(): diff --git a/tractor/_entry.py b/tractor/_entry.py index 60050ea4..3f174528 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -265,7 +265,7 @@ def _trio_main( except BaseException as err: logmeth = log.error exit_status: str = ( - 'Main actor task crashed during exit?\n' + 'Main actor task exited due to crash?\n' + nest_from_op( input_op='x)>', # closed by error diff --git a/tractor/_runtime.py b/tractor/_runtime.py index efc9be9c..8bdcf6a5 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -66,10 +66,11 @@ from trio import ( ) from tractor.msg import ( - pretty_struct, - NamespacePath, - types as msgtypes, MsgType, + NamespacePath, + Stop, + pretty_struct, + types as msgtypes, ) from ._ipc import Channel from ._context import ( @@ -547,7 +548,8 @@ class Actor: ): log.cancel( 'Waiting on cancel request to peer\n' - f'`Portal.cancel_actor()` => {chan.uid}\n' + f'c)=>\n' + f' |_{chan.uid}\n' ) # XXX: this is a soft wait on the channel (and its @@ -644,12 +646,14 @@ class Actor: # and an_exit_cs.cancelled_caught ): - log.warning( + report: str = ( 'Timed out waiting on local actor-nursery to exit?\n' f'{local_nursery}\n' - f' |_{pformat(local_nursery._children)}\n' ) - # await _debug.pause() + if children := local_nursery._children: + report += f' |_{pformat(children)}\n' + + log.warning(report) if disconnected: # if the transport died and this actor is still @@ -821,14 +825,17 @@ class Actor: # side, )] except KeyError: - log.warning( + report: str = ( 'Ignoring invalid IPC ctx msg!\n\n' - f'<= sender: {uid}\n\n' - # XXX don't need right since it's always in msg? - # f'=> cid: {cid}\n\n' - - f'{pretty_struct.pformat(msg)}\n' + f'<=? {uid}\n\n' + f' |_{pretty_struct.pformat(msg)}\n' ) + match msg: + case Stop(): + log.runtime(report) + case _: + log.warning(report) + return # if isinstance(msg, MsgTypeError): @@ -1340,10 +1347,11 @@ class Actor: return True log.cancel( - 'Cancel request for RPC task\n\n' - f'<= Actor._cancel_task(): {requesting_uid}\n\n' - f'=> {ctx._task}\n' - f' |_ >> {ctx.repr_rpc}\n' + 'Rxed cancel request for RPC task\n' + f'<=c) {requesting_uid}\n' + f' |_{ctx._task}\n' + f' >> {ctx.repr_rpc}\n' + # f'=> {ctx._task}\n' # f' >> Actor._cancel_task() => {ctx._task}\n' # f' |_ {ctx._task}\n\n' diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 986c2e29..562c7e5b 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -250,8 +250,9 @@ async def hard_kill( ''' log.cancel( - 'Terminating sub-proc:\n' - f'|_{proc}\n' + 'Terminating sub-proc\n' + f'>x)\n' + f' |_{proc}\n' ) # NOTE: this timeout used to do nothing since we were shielding # the ``.wait()`` inside ``new_proc()`` which will pretty much @@ -297,8 +298,8 @@ async def hard_kill( log.critical( # 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n' '#T-800 deployed to collect zombie B0\n' - f'|\n' - f'|_{proc}\n' + f'>x)\n' + f' |_{proc}\n' ) proc.kill() @@ -326,8 +327,9 @@ async def soft_kill( uid: tuple[str, str] = portal.channel.uid try: log.cancel( - 'Soft killing sub-actor via `Portal.cancel_actor()`\n' - f'|_{proc}\n' + 'Soft killing sub-actor via portal request\n' + f'c)> {portal.chan.uid}\n' + f' |_{proc}\n' ) # wait on sub-proc to signal termination await wait_func(proc) @@ -556,8 +558,9 @@ async def trio_proc( # cancel result waiter that may have been spawned in # tandem if not done already log.cancel( - 'Cancelling existing result waiter task for ' - f'{subactor.uid}' + 'Cancelling portal result reaper task\n' + f'>c)\n' + f' |_{subactor.uid}\n' ) nursery.cancel_scope.cancel() @@ -566,7 +569,11 @@ async def trio_proc( # allowed! Do this **after** cancellation/teardown to avoid # killing the process too early. if proc: - log.cancel(f'Hard reap sequence starting for {subactor.uid}') + log.cancel( + f'Hard reap sequence starting for subactor\n' + f'>x)\n' + f' |_{subactor}@{subactor.uid}\n' + ) with trio.CancelScope(shield=True): # don't clobber an ongoing pdb -- 2.34.1 From 276f88fd0c87a12038bf5bb2963408c4b42b09c5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 3 Jul 2024 17:01:37 -0400 Subject: [PATCH 183/305] Quieter `Stop` handling on ctx result capture In the `drain_to_final_msg()` impl, since a stream terminating gracefully requires this msg, there's really no reason to `log.cancel()` about it; go `.runtime()` level instead since we're trying de-noise under "normal operation". Also, - passthrough `hide_tb` to taskc-handler's `ctx.maybe_raise()` call. - raise `MessagingError` for the `MsgType` unmatched `case _:`. - detail the doc string motivation a little more. --- tractor/msg/_ops.py | 55 +++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 91c0ddea..f0f3b6b2 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -374,7 +374,7 @@ class PldRx(Struct): case _: src_err = InternalError( - 'Unknown IPC msg ??\n\n' + 'Invalid IPC msg ??\n\n' f'{msg}\n' ) @@ -499,7 +499,7 @@ async def maybe_limit_plds( yield None return - # sanity on scoping + # sanity check on IPC scoping curr_ctx: Context = current_ipc_ctx() assert ctx is curr_ctx @@ -510,6 +510,8 @@ async def maybe_limit_plds( ) as msgdec: yield msgdec + # when the applied spec is unwound/removed, the same IPC-ctx + # should still be in scope. curr_ctx: Context = current_ipc_ctx() assert ctx is curr_ctx @@ -525,16 +527,26 @@ async def drain_to_final_msg( list[MsgType] ]: ''' - Drain IPC msgs delivered to the underlying IPC primitive's - rx-mem-chan (eg. `Context._rx_chan`) from the runtime in - search for a final result or error. + Drain IPC msgs delivered to the underlying IPC context's + rx-mem-chan (i.e. from `Context._rx_chan`) in search for a final + `Return` or `Error` msg. - The motivation here is to ideally capture errors during ctxc - conditions where a canc-request/or local error is sent but the - local task also excepts and enters the - `Portal.open_context().__aexit__()` block wherein we prefer to - capture and raise any remote error or ctxc-ack as part of the - `ctx.result()` cleanup and teardown sequence. + Deliver the `Return` + preceding drained msgs (`list[MsgType]`) + as a pair unless an `Error` is found, in which unpack and raise + it. + + The motivation here is to always capture any remote error relayed + by the remote peer task during a ctxc condition. + + For eg. a ctxc-request may be sent to the peer as part of the + local task's (request for) cancellation but then that same task + **also errors** before executing the teardown in the + `Portal.open_context().__aexit__()` block. In such error-on-exit + cases we want to always capture and raise any delivered remote + error (like an expected ctxc-ACK) as part of the final + `ctx.wait_for_result()` teardown sequence such that the + `Context.outcome` related state always reflect what transpired + even after ctx closure and the `.open_context()` block exit. ''' __tracebackhide__: bool = hide_tb @@ -572,7 +584,6 @@ async def drain_to_final_msg( # |_from tractor.devx._debug import pause # await pause() - # NOTE: we get here if the far end was # `ContextCancelled` in 2 cases: # 1. we requested the cancellation and thus @@ -580,13 +591,13 @@ async def drain_to_final_msg( # 2. WE DID NOT REQUEST that cancel and thus # SHOULD RAISE HERE! except trio.Cancelled as taskc: - # CASE 2: mask the local cancelled-error(s) # only when we are sure the remote error is # the source cause of this local task's # cancellation. ctx.maybe_raise( - # TODO: when use this/ + hide_tb=hide_tb, + # TODO: when use this? # from_src_exc=taskc, ) @@ -659,7 +670,7 @@ async def drain_to_final_msg( # Stop() case Stop(): pre_result_drained.append(msg) - log.cancel( + log.runtime( # normal/expected shutdown transaction 'Remote stream terminated due to "stop" msg:\n\n' f'{pretty_struct.pformat(msg)}\n' ) @@ -719,13 +730,19 @@ async def drain_to_final_msg( pre_result_drained.append(msg) # It's definitely an internal error if any other # msg type without a`'cid'` field arrives here! + report: str = ( + f'Invalid or unknown msg type {type(msg)!r}!?\n' + ) if not msg.cid: - raise InternalError( - 'Unexpected cid-missing msg?\n\n' - f'{msg}\n' + report += ( + '\nWhich also has no `.cid` field?\n' ) - raise RuntimeError('Unknown msg type: {msg}') + raise MessagingError( + report + + + f'\n{msg}\n' + ) else: log.cancel( -- 2.34.1 From dba2d87bafaccece42be4a0793c02f2596ed1dd2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 4 Jul 2024 15:06:15 -0400 Subject: [PATCH 184/305] More-n-more scops annots in logging --- tractor/_context.py | 11 ++++++---- tractor/_entry.py | 1 + tractor/_portal.py | 6 +++--- tractor/_runtime.py | 50 ++++++++++++++++++++++++++++----------------- 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 7b1ac296..e52ee6b7 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -2376,8 +2376,9 @@ async def open_context_from_portal( and ctx.cancel_acked ): log.cancel( - f'Context cancelled by {ctx.side!r}-side task\n' - f'|_{ctx._task}\n\n' + f'Context cancelled by local {ctx.side!r}-side task\n' + f'c)>\n' + f' |_{ctx._task}\n\n' f'{repr(scope_err)}\n' ) @@ -2393,8 +2394,10 @@ async def open_context_from_portal( # type_only=True, ) log.cancel( - f'Context terminated due to local {ctx.side!r}-side error:\n\n' - f'{ctx.chan.uid} => {outcome_str}\n' + f'Context terminated due to {ctx.side!r}-side\n\n' + # TODO: do an x)> on err and c)> only for ctxc? + f'c)> {outcome_str}\n' + f' |_{ctx.repr_rpc}\n' ) # FINALLY, remove the context from runtime tracking and diff --git a/tractor/_entry.py b/tractor/_entry.py index 3f174528..a072706c 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -243,6 +243,7 @@ def _trio_main( nest_from_op( input_op=')>', # like a "closed-to-play"-icon from super perspective tree_str=actor_info, + back_from_op=1, ) ) try: diff --git a/tractor/_portal.py b/tractor/_portal.py index 3dc7f3a1..f5a66836 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -263,11 +263,11 @@ class Portal: return False reminfo: str = ( - f'Portal.cancel_actor() => {self.channel.uid}\n' - f'|_{chan}\n' + f'c)=> {self.channel.uid}\n' + f' |_{chan}\n' ) log.cancel( - f'Requesting runtime cancel for peer\n\n' + f'Requesting actor-runtime cancel for peer\n\n' f'{reminfo}' ) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 8bdcf6a5..46d12854 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -441,10 +441,10 @@ class Actor: TransportClosed, ): - # XXX: This may propagate up from ``Channel._aiter_recv()`` - # and ``MsgpackStream._inter_packets()`` on a read from the + # XXX: This may propagate up from `Channel._aiter_recv()` + # and `MsgpackStream._inter_packets()` on a read from the # stream particularly when the runtime is first starting up - # inside ``open_root_actor()`` where there is a check for + # inside `open_root_actor()` where there is a check for # a bound listener on the "arbiter" addr. the reset will be # because the handshake was never meant took place. log.runtime( @@ -509,8 +509,9 @@ class Actor: ) except trio.Cancelled: log.cancel( - 'IPC transport msg loop was cancelled for \n' - f'|_{chan}\n' + 'IPC transport msg loop was cancelled\n' + f'c)>\n' + f' |_{chan}\n' ) raise @@ -547,9 +548,9 @@ class Actor: ): log.cancel( - 'Waiting on cancel request to peer\n' + 'Waiting on cancel request to peer..\n' f'c)=>\n' - f' |_{chan.uid}\n' + f' |_{chan.uid}\n' ) # XXX: this is a soft wait on the channel (and its @@ -648,10 +649,14 @@ class Actor: ): report: str = ( 'Timed out waiting on local actor-nursery to exit?\n' - f'{local_nursery}\n' + f'c)>\n' + f' |_{local_nursery}\n' ) if children := local_nursery._children: - report += f' |_{pformat(children)}\n' + # indent from above local-nurse repr + report += ( + f' |_{pformat(children)}\n' + ) log.warning(report) @@ -1238,8 +1243,9 @@ class Actor: # TODO: just use the new `Context.repr_rpc: str` (and # other) repr fields instead of doing this all manual.. msg: str = ( - f'Runtime cancel request from {requester_type}:\n\n' - f'<= .cancel(): {requesting_uid}\n\n' + f'Actor-runtime cancel request from {requester_type}\n\n' + f'<=c) {requesting_uid}\n' + f' |_{self}\n' ) # TODO: what happens here when we self-cancel tho? @@ -1349,7 +1355,7 @@ class Actor: log.cancel( 'Rxed cancel request for RPC task\n' f'<=c) {requesting_uid}\n' - f' |_{ctx._task}\n' + f' |_{ctx._task}\n' f' >> {ctx.repr_rpc}\n' # f'=> {ctx._task}\n' # f' >> Actor._cancel_task() => {ctx._task}\n' @@ -1467,17 +1473,17 @@ class Actor: "IPC channel's " ) rent_chan_repr: str = ( - f' |_{parent_chan}\n\n' + f' |_{parent_chan}\n\n' if parent_chan else '' ) log.cancel( f'Cancelling {descr} RPC tasks\n\n' - f'<= canceller: {req_uid}\n' + f'<=c) {req_uid} [canceller]\n' f'{rent_chan_repr}' - f'=> cancellee: {self.uid}\n' - f' |_{self}.cancel_rpc_tasks()\n' - f' |_tasks: {len(tasks)}\n' + f'c)=> {self.uid} [cancellee]\n' + f' |_{self} [with {len(tasks)} tasks]\n' + # f' |_tasks: {len(tasks)}\n' # f'{tasks_str}' ) for ( @@ -1935,9 +1941,15 @@ async def async_main( with CancelScope(shield=True): await actor._no_more_peers.wait() - teardown_report += ('-> All peer channels are complete\n') + teardown_report += ( + '-> All peer channels are complete\n' + ) - teardown_report += ('Actor runtime exited') + teardown_report += ( + 'Actor runtime exiting\n' + f'>)\n' + f'|_{actor}\n' + ) log.info(teardown_report) -- 2.34.1 From 2df7ffd7021dbc23ae8ac3fe98cfd4381a393ed5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 4 Jul 2024 19:40:11 -0400 Subject: [PATCH 185/305] Finally implement peer-lookup optimization.. There's a been a todo for soo long for this XD Since all `Actor`'s store a set of `._peers` we can try a lookup on that table as a shortcut before pinging the registry Bo Impl deats: - add a new `._discovery.get_peer_by_name()` routine which attempts the `._peers` lookup by combining a copy of that `dict` + an entry added for `Actor._parent_chan` (since all subs have a parent and often the desired contact is just that connection). - change `.find_actor()` (for the `only_first == True` case), `.query_actor()` and `.wait_for_actor()` to call the new helper and deliver appropriate outputs if possible. Other, - deprecate `get_arbiter()` def and all usage in tests and examples. - drop lingering use of `arbiter_sockaddr` arg to various routines. - tweak the `Actor` doc str as well as some code fmting and a tweak to the `._stream_handler()`'s initial `con_status: str` logging value since the way it was could never be reached.. oh and `.warning()` on any new connections which already have a `_pre_chan: Channel` entry in `._peers` so we can start minimizing IPC duplications. --- examples/service_discovery.py | 2 +- tests/test_discovery.py | 6 +- tests/test_local.py | 2 +- tests/test_multi_program.py | 4 +- tractor/__init__.py | 2 +- tractor/_discovery.py | 170 +++++++++++++++++++--------------- tractor/_runtime.py | 96 +++++++++---------- 7 files changed, 151 insertions(+), 131 deletions(-) diff --git a/examples/service_discovery.py b/examples/service_discovery.py index 858f7f12..a0f37b88 100644 --- a/examples/service_discovery.py +++ b/examples/service_discovery.py @@ -9,7 +9,7 @@ async def main(service_name): async with tractor.open_nursery() as an: await an.start_actor(service_name) - async with tractor.get_arbiter('127.0.0.1', 1616) as portal: + async with tractor.get_registry('127.0.0.1', 1616) as portal: print(f"Arbiter is listening on {portal.channel}") async with tractor.wait_for_actor(service_name) as sockaddr: diff --git a/tests/test_discovery.py b/tests/test_discovery.py index cd9dc022..508fdbe1 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -26,7 +26,7 @@ async def test_reg_then_unreg(reg_addr): portal = await n.start_actor('actor', enable_modules=[__name__]) uid = portal.channel.uid - async with tractor.get_arbiter(*reg_addr) as aportal: + async with tractor.get_registry(*reg_addr) as aportal: # this local actor should be the arbiter assert actor is aportal.actor @@ -160,7 +160,7 @@ async def spawn_and_check_registry( async with tractor.open_root_actor( registry_addrs=[reg_addr], ): - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: # runtime needs to be up to call this actor = tractor.current_actor() @@ -298,7 +298,7 @@ async def close_chans_before_nursery( async with tractor.open_root_actor( registry_addrs=[reg_addr], ): - async with tractor.get_arbiter(*reg_addr) as aportal: + async with tractor.get_registry(*reg_addr) as aportal: try: get_reg = partial(unpack_reg, aportal) diff --git a/tests/test_local.py b/tests/test_local.py index a019d771..ecdad5fe 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -38,7 +38,7 @@ async def test_self_is_registered_localportal(reg_addr): "Verify waiting on the arbiter to register itself using a local portal." actor = tractor.current_actor() assert actor.is_arbiter - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: assert isinstance(portal, tractor._portal.LocalPortal) with trio.fail_after(0.2): diff --git a/tests/test_multi_program.py b/tests/test_multi_program.py index 92f4c52d..860eeebb 100644 --- a/tests/test_multi_program.py +++ b/tests/test_multi_program.py @@ -32,7 +32,7 @@ def test_abort_on_sigint(daemon): @tractor_test async def test_cancel_remote_arbiter(daemon, reg_addr): assert not tractor.current_actor().is_arbiter - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: await portal.cancel_actor() time.sleep(0.1) @@ -41,7 +41,7 @@ async def test_cancel_remote_arbiter(daemon, reg_addr): # no arbiter socket should exist with pytest.raises(OSError): - async with tractor.get_arbiter(*reg_addr) as portal: + async with tractor.get_registry(*reg_addr) as portal: pass diff --git a/tractor/__init__.py b/tractor/__init__.py index 4f9e592d..a27a3b59 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -31,7 +31,7 @@ from ._streaming import ( stream as stream, ) from ._discovery import ( - get_arbiter as get_arbiter, + get_registry as get_registry, find_actor as find_actor, wait_for_actor as wait_for_actor, query_actor as query_actor, diff --git a/tractor/_discovery.py b/tractor/_discovery.py index 99a4dd68..a681c63b 100644 --- a/tractor/_discovery.py +++ b/tractor/_discovery.py @@ -26,8 +26,8 @@ from typing import ( TYPE_CHECKING, ) from contextlib import asynccontextmanager as acm -import warnings +from tractor.log import get_logger from .trionics import gather_contexts from ._ipc import _connect_chan, Channel from ._portal import ( @@ -40,11 +40,13 @@ from ._state import ( _runtime_vars, ) - if TYPE_CHECKING: from ._runtime import Actor +log = get_logger(__name__) + + @acm async def get_registry( host: str, @@ -56,14 +58,12 @@ async def get_registry( ]: ''' Return a portal instance connected to a local or remote - arbiter. + registry-service actor; if a connection already exists re-use it + (presumably to call a `.register_actor()` registry runtime RPC + ep). ''' - actor = current_actor() - - if not actor: - raise RuntimeError("No actor instance has been defined yet?") - + actor: Actor = current_actor() if actor.is_registrar: # we're already the arbiter # (likely a re-entrant call from the arbiter actor) @@ -72,6 +72,8 @@ async def get_registry( Channel((host, port)) ) else: + # TODO: try to look pre-existing connection from + # `Actor._peers` and use it instead? async with ( _connect_chan(host, port) as chan, open_portal(chan) as regstr_ptl, @@ -80,19 +82,6 @@ async def get_registry( -# TODO: deprecate and this remove _arbiter form! -@acm -async def get_arbiter(*args, **kwargs): - warnings.warn( - '`tractor.get_arbiter()` is now deprecated!\n' - 'Use `.get_registry()` instead!', - DeprecationWarning, - stacklevel=2, - ) - async with get_registry(*args, **kwargs) as to_yield: - yield to_yield - - @acm async def get_root( **kwargs, @@ -110,22 +99,53 @@ async def get_root( yield portal +def get_peer_by_name( + name: str, + # uuid: str|None = None, + +) -> list[Channel]|None: # at least 1 + ''' + Scan for an existing connection (set) to a named actor + and return any channels from `Actor._peers`. + + This is an optimization method over querying the registrar for + the same info. + + ''' + actor: Actor = current_actor() + to_scan: dict[tuple, list[Channel]] = actor._peers.copy() + pchan: Channel|None = actor._parent_chan + if pchan: + to_scan[pchan.uid].append(pchan) + + for aid, chans in to_scan.items(): + _, peer_name = aid + if name == peer_name: + if not chans: + log.warning( + 'No IPC chans for matching peer {peer_name}\n' + ) + continue + return chans + + return None + + @acm async def query_actor( name: str, - arbiter_sockaddr: tuple[str, int] | None = None, - regaddr: tuple[str, int] | None = None, + regaddr: tuple[str, int]|None = None, ) -> AsyncGenerator[ - tuple[str, int] | None, + tuple[str, int]|None, None, ]: ''' - Make a transport address lookup for an actor name to a specific - registrar. + Lookup a transport address (by actor name) via querying a registrar + listening @ `regaddr`. - Returns the (socket) address or ``None`` if no entry under that - name exists for the given registrar listening @ `regaddr`. + Returns the transport protocol (socket) address or `None` if no + entry under that name exists. ''' actor: Actor = current_actor() @@ -137,14 +157,10 @@ async def query_actor( 'The current actor IS the registry!?' ) - if arbiter_sockaddr is not None: - warnings.warn( - '`tractor.query_actor(regaddr=)` is deprecated.\n' - 'Use `registry_addrs: list[tuple]` instead!', - DeprecationWarning, - stacklevel=2, - ) - regaddr: list[tuple[str, int]] = arbiter_sockaddr + maybe_peers: list[Channel]|None = get_peer_by_name(name) + if maybe_peers: + yield maybe_peers[0].raddr + return reg_portal: Portal regaddr: tuple[str, int] = regaddr or actor.reg_addrs[0] @@ -159,10 +175,28 @@ async def query_actor( yield sockaddr +@acm +async def maybe_open_portal( + addr: tuple[str, int], + name: str, +): + async with query_actor( + name=name, + regaddr=addr, + ) as sockaddr: + pass + + if sockaddr: + async with _connect_chan(*sockaddr) as chan: + async with open_portal(chan) as portal: + yield portal + else: + yield None + + @acm async def find_actor( name: str, - arbiter_sockaddr: tuple[str, int]|None = None, registry_addrs: list[tuple[str, int]]|None = None, only_first: bool = True, @@ -179,29 +213,12 @@ async def find_actor( known to the arbiter. ''' - if arbiter_sockaddr is not None: - warnings.warn( - '`tractor.find_actor(arbiter_sockaddr=)` is deprecated.\n' - 'Use `registry_addrs: list[tuple]` instead!', - DeprecationWarning, - stacklevel=2, - ) - registry_addrs: list[tuple[str, int]] = [arbiter_sockaddr] - - @acm - async def maybe_open_portal_from_reg_addr( - addr: tuple[str, int], - ): - async with query_actor( - name=name, - regaddr=addr, - ) as sockaddr: - if sockaddr: - async with _connect_chan(*sockaddr) as chan: - async with open_portal(chan) as portal: - yield portal - else: - yield None + # optimization path, use any pre-existing peer channel + maybe_peers: list[Channel]|None = get_peer_by_name(name) + if maybe_peers and only_first: + async with open_portal(maybe_peers[0]) as peer_portal: + yield peer_portal + return if not registry_addrs: # XXX NOTE: make sure to dynamically read the value on @@ -217,10 +234,13 @@ async def find_actor( maybe_portals: list[ AsyncContextManager[tuple[str, int]] ] = list( - maybe_open_portal_from_reg_addr(addr) + maybe_open_portal( + addr=addr, + name=name, + ) for addr in registry_addrs ) - + portals: list[Portal] async with gather_contexts( mngrs=maybe_portals, ) as portals: @@ -254,31 +274,31 @@ async def find_actor( @acm async def wait_for_actor( name: str, - arbiter_sockaddr: tuple[str, int] | None = None, registry_addr: tuple[str, int] | None = None, ) -> AsyncGenerator[Portal, None]: ''' - Wait on an actor to register with the arbiter. - - A portal to the first registered actor is returned. + Wait on at least one peer actor to register `name` with the + registrar, yield a `Portal to the first registree. ''' actor: Actor = current_actor() - if arbiter_sockaddr is not None: - warnings.warn( - '`tractor.wait_for_actor(arbiter_sockaddr=)` is deprecated.\n' - 'Use `registry_addr: tuple` instead!', - DeprecationWarning, - stacklevel=2, - ) - registry_addr: tuple[str, int] = arbiter_sockaddr + # optimization path, use any pre-existing peer channel + maybe_peers: list[Channel]|None = get_peer_by_name(name) + if maybe_peers: + async with open_portal(maybe_peers[0]) as peer_portal: + yield peer_portal + return + regaddr: tuple[str, int] = ( + registry_addr + or + actor.reg_addrs[0] + ) # TODO: use `.trionics.gather_contexts()` like # above in `find_actor()` as well? reg_portal: Portal - regaddr: tuple[str, int] = registry_addr or actor.reg_addrs[0] async with get_registry(*regaddr) as reg_portal: sockaddrs = await reg_portal.run_from_ns( 'self', diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 46d12854..63f04837 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -111,25 +111,26 @@ class Actor: ''' The fundamental "runtime" concurrency primitive. - An *actor* is the combination of a regular Python process executing - a ``trio`` task tree, communicating with other actors through - "memory boundary portals" - which provide a native async API around - IPC transport "channels" which themselves encapsulate various - (swappable) network protocols. + An "actor" is the combination of a regular Python process + executing a `trio.run()` task tree, communicating with other + "actors" through "memory boundary portals": `Portal`, which + provide a high-level async API around IPC "channels" (`Channel`) + which themselves encapsulate various (swappable) network + transport protocols for sending msgs between said memory domains + (processes, hosts, non-GIL threads). - - Each "actor" is ``trio.run()`` scheduled "runtime" composed of - many concurrent tasks in a single thread. The "runtime" tasks - conduct a slew of low(er) level functions to make it possible - for message passing between actors as well as the ability to - create new actors (aka new "runtimes" in new processes which - are supervised via a nursery construct). Each task which sends - messages to a task in a "peer" (not necessarily a parent-child, + Each "actor" is `trio.run()` scheduled "runtime" composed of many + concurrent tasks in a single thread. The "runtime" tasks conduct + a slew of low(er) level functions to make it possible for message + passing between actors as well as the ability to create new + actors (aka new "runtimes" in new processes which are supervised + via an "actor-nursery" construct). Each task which sends messages + to a task in a "peer" actor (not necessarily a parent-child, depth hierarchy) is able to do so via an "address", which maps IPC connections across memory boundaries, and a task request id - which allows for per-actor tasks to send and receive messages - to specific peer-actor tasks with which there is an ongoing - RPC/IPC dialog. + which allows for per-actor tasks to send and receive messages to + specific peer-actor tasks with which there is an ongoing RPC/IPC + dialog. ''' # ugh, we need to get rid of this and replace with a "registry" sys @@ -226,17 +227,20 @@ class Actor: # by the user (currently called the "arbiter") self._spawn_method: str = spawn_method - self._peers: defaultdict = defaultdict(list) + self._peers: defaultdict[ + str, # uaid + list[Channel], # IPC conns from peer + ] = defaultdict(list) self._peer_connected: dict[tuple[str, str], trio.Event] = {} self._no_more_peers = trio.Event() self._no_more_peers.set() + + # RPC state self._ongoing_rpc_tasks = trio.Event() self._ongoing_rpc_tasks.set() - - # (chan, cid) -> (cancel_scope, func) self._rpc_tasks: dict[ - tuple[Channel, str], - tuple[Context, Callable, trio.Event] + tuple[Channel, str], # (chan, cid) + tuple[Context, Callable, trio.Event] # (ctx=>, fn(), done?) ] = {} # map {actor uids -> Context} @@ -313,7 +317,10 @@ class Actor: event = self._peer_connected.setdefault(uid, trio.Event()) await event.wait() log.debug(f'{uid!r} successfully connected back to us') - return event, self._peers[uid][-1] + return ( + event, + self._peers[uid][-1], + ) def load_modules( self, @@ -404,32 +411,11 @@ class Actor: ''' self._no_more_peers = trio.Event() # unset by making new chan = Channel.from_stream(stream) - their_uid: tuple[str, str]|None = chan.uid - if their_uid: - log.warning( - f'Re-connection from already known {their_uid}' - ) - else: - log.runtime(f'New connection to us @{chan.raddr}') - - con_status: str = '' - - # TODO: remove this branch since can never happen? - # NOTE: `.uid` is only set after first contact - if their_uid: - con_status = ( - 'IPC Re-connection from already known peer?\n' - ) - else: - con_status = ( - 'New inbound IPC connection <=\n' - ) - - con_status += ( + con_status: str = ( + 'New inbound IPC connection <=\n' f'|_{chan}\n' - # f' |_@{chan.raddr}\n\n' - # ^-TODO-^ remove since alfready in chan.__repr__()? ) + # send/receive initial handshake response try: uid: tuple|None = await self._do_handshake(chan) @@ -454,9 +440,22 @@ class Actor: ) return + familiar: str = 'new-peer' + if _pre_chan := self._peers.get(uid): + familiar: str = 'pre-existing-peer' + uid_short: str = f'{uid[0]}[{uid[1][-6:]}]' con_status += ( - f' -> Handshake with actor `{uid[0]}[{uid[1][-6:]}]` complete\n' + f' -> Handshake with {familiar} `{uid_short}` complete\n' ) + + if _pre_chan: + log.warning( + # con_status += ( + # ^TODO^ swap once we minimize conn duplication + f' -> Wait, we already have IPC with `{uid_short}`??\n' + f' |_{_pre_chan}\n' + ) + # IPC connection tracking for both peers and new children: # - if this is a new channel to a locally spawned # sub-actor there will be a spawn wait even registered @@ -1552,7 +1551,7 @@ class Actor: def accept_addr(self) -> tuple[str, int]: ''' Primary address to which the IPC transport server is - bound. + bound and listening for new connections. ''' # throws OSError on failure @@ -1569,6 +1568,7 @@ class Actor: def get_chans( self, uid: tuple[str, str], + ) -> list[Channel]: ''' Return all IPC channels to the actor with provided `uid`. -- 2.34.1 From aa7448793a63307a586af0d1fd540da02ec6a45c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Jul 2024 13:31:29 -0400 Subject: [PATCH 186/305] Adjusts advanced fault tests to match new `TransportClosed` semantics --- tests/test_advanced_faults.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tests/test_advanced_faults.py b/tests/test_advanced_faults.py index dfaeb68e..a4d17791 100644 --- a/tests/test_advanced_faults.py +++ b/tests/test_advanced_faults.py @@ -91,7 +91,8 @@ def test_ipc_channel_break_during_stream( # non-`trio` spawners should never hit the hang condition that # requires the user to do ctl-c to cancel the actor tree. - expect_final_exc = trio.ClosedResourceError + # expect_final_exc = trio.ClosedResourceError + expect_final_exc = tractor.TransportClosed mod: ModuleType = import_path( examples_dir() / 'advanced_faults' @@ -157,7 +158,7 @@ def test_ipc_channel_break_during_stream( if pre_aclose_msgstream: expect_final_exc = KeyboardInterrupt - # NOTE when the parent IPC side dies (even if the child's does as well + # NOTE when the parent IPC side dies (even if the child does as well # but the child fails BEFORE the parent) we always expect the # IPC layer to raise a closed-resource, NEVER do we expect # a stop msg since the parent-side ctx apis will error out @@ -169,7 +170,8 @@ def test_ipc_channel_break_during_stream( and ipc_break['break_child_ipc_after'] is False ): - expect_final_exc = trio.ClosedResourceError + # expect_final_exc = trio.ClosedResourceError + expect_final_exc = tractor.TransportClosed # BOTH but, PARENT breaks FIRST elif ( @@ -180,7 +182,8 @@ def test_ipc_channel_break_during_stream( ipc_break['break_parent_ipc_after'] ) ): - expect_final_exc = trio.ClosedResourceError + # expect_final_exc = trio.ClosedResourceError + expect_final_exc = tractor.TransportClosed with pytest.raises( expected_exception=( @@ -199,8 +202,8 @@ def test_ipc_channel_break_during_stream( **ipc_break, ) ) - except KeyboardInterrupt as kbi: - _err = kbi + except KeyboardInterrupt as _kbi: + kbi = _kbi if expect_final_exc is not KeyboardInterrupt: pytest.fail( 'Rxed unexpected KBI !?\n' @@ -209,6 +212,21 @@ def test_ipc_channel_break_during_stream( raise + except tractor.TransportClosed as _tc: + tc = _tc + if expect_final_exc is KeyboardInterrupt: + pytest.fail( + 'Unexpected transport failure !?\n' + f'{repr(tc)}' + ) + cause: Exception = tc.__cause__ + assert ( + type(cause) is trio.ClosedResourceError + and + cause.args[0] == 'another task closed this fd' + ) + raise + # get raw instance from pytest wrapper value = excinfo.value if isinstance(value, ExceptionGroup): -- 2.34.1 From ae163689493803b237d0ca90cb88fa56d07ff653 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 22 Aug 2024 17:22:53 -0400 Subject: [PATCH 187/305] Add `tb_hide: bool` ctl flag to `_open_and_supervise_one_cancels_all_nursery()` --- tractor/_supervise.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tractor/_supervise.py b/tractor/_supervise.py index fb737c12..4a538e9f 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -373,11 +373,12 @@ class ActorNursery: @acm async def _open_and_supervise_one_cancels_all_nursery( actor: Actor, + tb_hide: bool = False, ) -> typing.AsyncGenerator[ActorNursery, None]: # normally don't need to show user by default - __tracebackhide__: bool = True + __tracebackhide__: bool = tb_hide outer_err: BaseException|None = None inner_err: BaseException|None = None -- 2.34.1 From 49cd00232ee771b7c6bd5aea6558d049fac88dba Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 22 Aug 2024 20:19:55 -0400 Subject: [PATCH 188/305] Add `indent: str` suport to `Context.pformat()` using `textwrap` --- tractor/_context.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index e52ee6b7..31db2bad 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -46,6 +46,7 @@ from dataclasses import ( from functools import partial import inspect from pprint import pformat +import textwrap from typing import ( Any, AsyncGenerator, @@ -335,6 +336,7 @@ class Context: extra_fields: dict[str, Any]|None = None, # ^-TODO-^ some built-in extra state fields # we'll want in some devx specific cases? + indent: str|None = None, ) -> str: ds: str = '=' @@ -354,7 +356,6 @@ class Context: show_error_fields=True ) fmtstr: str = ( - f'\n' + f'{indent})>\n' ) + # NOTE: making this return a value that can be passed to # `eval()` is entirely **optional** dawggg B) # https://docs.python.org/3/library/functions.html#repr -- 2.34.1 From f14fb539581bd0dccc8c92f7d23bb6a4235b3edd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 26 Aug 2024 14:29:09 -0400 Subject: [PATCH 189/305] Report any external-rent-task-canceller during msg-drain As in whenever `Context.cancel()` is not (runtime internally) called (i.e. `._cancel_called` is not set), we can attempt to detect the parent `trio` nursery/cancel-scope that is the source. Emit the report with a `.cancel()` level and attempt to repr in "sclang" form as well as unhide the stack frame for debug/traceback-in. --- tractor/msg/_ops.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index f0f3b6b2..2faadb9f 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -590,15 +590,36 @@ async def drain_to_final_msg( # SHOULD NOT raise that far end error, # 2. WE DID NOT REQUEST that cancel and thus # SHOULD RAISE HERE! - except trio.Cancelled as taskc: + except trio.Cancelled as _taskc: + taskc: trio.Cancelled = _taskc + + # report when the cancellation wasn't (ostensibly) due to + # RPC operation, some surrounding parent cancel-scope. + if not ctx._scope.cancel_called: + task: trio.lowlevel.Task = trio.lowlevel.current_task() + rent_n: trio.Nursery = task.parent_nursery + if ( + (local_cs := rent_n.cancel_scope).cancel_called + ): + log.cancel( + 'RPC-ctx cancelled by local-parent scope during drain!\n\n' + f'c}}>\n' + f' |_{rent_n}\n' + f' |_.cancel_scope = {local_cs}\n' + f' |_>c}}\n' + f' |_{ctx.pformat(indent=" "*9)}' + # ^TODO, some (other) simpler repr here? + ) + __tracebackhide__: bool = False + # CASE 2: mask the local cancelled-error(s) # only when we are sure the remote error is # the source cause of this local task's # cancellation. ctx.maybe_raise( hide_tb=hide_tb, - # TODO: when use this? - # from_src_exc=taskc, + from_src_exc=taskc, + # ?TODO? when *should* we use this? ) # CASE 1: we DID request the cancel we simply -- 2.34.1 From 904d8ce8ff4d6d6835109d712aa0c02ec608b69c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 17:36:52 -0500 Subject: [PATCH 190/305] Denoise duplicate chan logging for now --- tractor/_runtime.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 63f04837..662dd67a 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -449,11 +449,14 @@ class Actor: ) if _pre_chan: - log.warning( # con_status += ( # ^TODO^ swap once we minimize conn duplication - f' -> Wait, we already have IPC with `{uid_short}`??\n' - f' |_{_pre_chan}\n' + # -[ ] last thing might be reg/unreg runtime reqs? + # log.warning( + log.debug( + f'?Wait?\n' + f'We already have IPC with peer {uid_short!r}\n' + f'|_{_pre_chan}\n' ) # IPC connection tracking for both peers and new children: -- 2.34.1 From eaa5d2354340c6e3ff1fa487f49ef83fb468ff10 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 24 Jun 2024 16:10:23 -0400 Subject: [PATCH 191/305] Hack `asyncio` to not abandon a guest-mode run? Took me a while to figure out what the heck was going on but, turns out `asyncio` changed their SIGINT handling in 3.11 as per: https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption I'm not entirely sure if it's the 3.11 changes or possibly wtv further updates were made in 3.12 but more or less due to the way our current main task was written the `trio` guest-run was getting abandoned on SIGINTs sent from the OS to the infected child proc.. Note that much of the bug and soln cases are layed out in very detailed comment-notes both in the new test and `run_as_asyncio_guest()`, right above the final "fix" lines. Add new `test_infected_aio.test_sigint_closes_lifetime_stack()` test suite which reliably triggers all abandonment issues with multiple cases of different parent behaviour post-sending-SIGINT-to-child: 1. briefly sleep then raise a KBI in the parent which was originally demonstrating the file leak not being cleaned up by `Actor.lifetime_stack.close()` and simulates a ctl-c from the console (relayed in tandem by the OS to the parent and child processes). 2. do `Context.wait_for_result()` on the child context which would hang and timeout since the actor runtime would never complete and thus never relay a `ContextCancelled`. 3. both with and without running a `asyncio` task in the `manage_file` child actor; originally it seemed that with an aio task scheduled in the child actor the guest-run abandonment always was the "loud" case where there seemed to be some actor teardown but with tbs from python failing to gracefully exit the `trio` runtime.. The (seemingly working) "fix" required 2 lines of code to be run inside a `asyncio.CancelledError` handler around the call to `await trio_done_fut`: - `Actor.cancel_soon()` which schedules the actor runtime to cancel on the next `trio` runner cycle and results in a "self cancellation" of the actor. - "pumping the `asyncio` event loop" with a non-0 `.sleep(0.1)` XD |_ seems that a "shielded" pump with some actual `delay: float >= 0` did the trick to get `asyncio` to allow the `trio` runner/loop to fully complete its guest-run without abandonment. Other supporting changes: - move `._exceptions.AsyncioCancelled`, our renamed `asyncio.CancelledError` error-sub-type-wrapper, to `.to_asyncio` and make it derive from `CancelledError` so as to be sure when raised by our `asyncio` x-> `trio` exception relay machinery that `asyncio` is getting the specific type it expects during cancellation. - do "summary status" style logging in `run_as_asyncio_guest()` wherein we compile the eventual `startup_msg: str` emitted just before waiting on the `trio_done_fut`. - shield-wait with `out: Outcome = await asyncio.shield(trio_done_fut)` even though it seems to do nothing in the SIGINT handling case..(I presume it might help avoid abandonment in a `asyncio.Task.cancel()` case maybe?) --- tests/test_infected_asyncio.py | 225 +++++++++++++++++++++++++++++++-- tractor/_exceptions.py | 13 +- tractor/to_asyncio.py | 153 ++++++++++++++++++---- 3 files changed, 344 insertions(+), 47 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 45722a63..8d4697fd 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -2,16 +2,25 @@ The hipster way to force SC onto the stdlib's "async": 'infection mode'. ''' -from typing import Optional, Iterable, Union import asyncio import builtins +from contextlib import ExitStack import itertools import importlib +import os +from pathlib import Path +import signal +from typing import ( + Callable, + Iterable, + Union, +) import pytest import trio import tractor from tractor import ( + current_actor, to_asyncio, RemoteActorError, ContextCancelled, @@ -25,8 +34,8 @@ async def sleep_and_err( # just signature placeholders for compat with # ``to_asyncio.open_channel_from()`` - to_trio: Optional[trio.MemorySendChannel] = None, - from_trio: Optional[asyncio.Queue] = None, + to_trio: trio.MemorySendChannel|None = None, + from_trio: asyncio.Queue|None = None, ): if to_trio: @@ -36,7 +45,7 @@ async def sleep_and_err( assert 0 -async def sleep_forever(): +async def aio_sleep_forever(): await asyncio.sleep(float('inf')) @@ -44,7 +53,7 @@ async def trio_cancels_single_aio_task(): # spawn an ``asyncio`` task to run a func and return result with trio.move_on_after(.2): - await tractor.to_asyncio.run_task(sleep_forever) + await tractor.to_asyncio.run_task(aio_sleep_forever) def test_trio_cancels_aio_on_actor_side(reg_addr): @@ -66,14 +75,13 @@ def test_trio_cancels_aio_on_actor_side(reg_addr): async def asyncio_actor( - target: str, expect_err: Exception|None = None ) -> None: assert tractor.current_actor().is_infected_aio() - target = globals()[target] + target: Callable = globals()[target] if '.' in expect_err: modpath, _, name = expect_err.rpartition('.') @@ -140,7 +148,7 @@ def test_tractor_cancels_aio(reg_addr): async with tractor.open_nursery() as n: portal = await n.run_in_actor( asyncio_actor, - target='sleep_forever', + target='aio_sleep_forever', expect_err='trio.Cancelled', infect_asyncio=True, ) @@ -164,7 +172,7 @@ def test_trio_cancels_aio(reg_addr): async with tractor.open_nursery() as n: await n.run_in_actor( asyncio_actor, - target='sleep_forever', + target='aio_sleep_forever', expect_err='trio.Cancelled', infect_asyncio=True, ) @@ -195,7 +203,7 @@ async def trio_ctx( # spawn another asyncio task for the cuck of it. n.start_soon( tractor.to_asyncio.run_task, - sleep_forever, + aio_sleep_forever, ) await trio.sleep_forever() @@ -285,7 +293,7 @@ async def aio_cancel(): # cancel and enter sleep task.cancel() - await sleep_forever() + await aio_sleep_forever() def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): @@ -355,7 +363,6 @@ async def push_from_aio_task( async def stream_from_aio( - exit_early: bool = False, raise_err: bool = False, aio_raise_err: bool = False, @@ -618,6 +625,200 @@ def test_echoserver_detailed_mechanics( trio.run(main) + +@tractor.context +async def manage_file( + ctx: tractor.Context, + tmp_path_str: str, + bg_aio_task: bool = False, +): + ''' + Start an `asyncio` task that just sleeps after registering a context + with `Actor.lifetime_stack`. Trigger a SIGINT to kill the actor tree + and ensure the stack is closed in the infected mode child. + + To verify the teardown state just write a tmpfile to the `testdir` + and delete it on actor close. + + ''' + + tmp_path: Path = Path(tmp_path_str) + tmp_file: Path = tmp_path / f'{" ".join(ctx._actor.uid)}.file' + + # create a the tmp file and tell the parent where it's at + assert not tmp_file.is_file() + tmp_file.touch() + + stack: ExitStack = current_actor().lifetime_stack + stack.callback(tmp_file.unlink) + + await ctx.started(( + str(tmp_file), + os.getpid(), + )) + + # expect to be cancelled from here! + try: + + # NOTE: turns out you don't even need to sched an aio task + # since the original issue, even though seemingly was due to + # the guest-run being abandoned + a `._debug.pause()` inside + # `._runtime._async_main()` (which was originally trying to + # debug the `.lifetime_stack` not closing), IS NOT actually + # the core issue? + # + # further notes: + # + # - `trio` only issues the " RuntimeWarning: Trio guest run + # got abandoned without properly finishing... weird stuff + # might happen" IFF you DO run a asyncio task here, BUT + # - the original issue of the `.lifetime_stack` not closing + # will still happen even if you don't run an `asyncio` task + # here even though the "abandon" messgage won't be shown.. + # + # => ????? honestly i'm lost but it seems to be some issue + # with `asyncio` and SIGINT.. + # + # XXX NOTE XXX SO, if this LINE IS UNCOMMENTED and + # `run_as_asyncio_guest()` is written WITHOUT THE + # `.cancel_soon()` soln, both of these tests will pass ?? + # so maybe it has something to do with `asyncio` loop init + # state?!? + # honestly, this REALLY reminds me why i haven't used + # `asyncio` by choice in years.. XD + # + # await tractor.to_asyncio.run_task(aio_sleep_forever) + if bg_aio_task: + async with trio.open_nursery() as tn: + tn.start_soon( + tractor.to_asyncio.run_task, + aio_sleep_forever, + ) + + await trio.sleep_forever() + + # signalled manually at the OS level (aka KBI) by the parent actor. + except KeyboardInterrupt: + print('child raised KBI..') + assert tmp_file.exists() + raise + else: + raise RuntimeError('shoulda received a KBI?') + + +@pytest.mark.parametrize( + 'bg_aio_task', + [ + False, + + # NOTE: (and see notes in `manage_file()` above as well) if + # we FOR SURE SPAWN AN AIO TASK in the child it seems the + # "silent-abandon" case (as is described in detail in + # `to_asyncio.run_as_asyncio_guest()`) does not happen and + # `asyncio`'s loop will at least abandon the `trio` side + # loudly? .. prolly the state-spot to start looking for + # a soln that results in NO ABANDONMENT.. XD + True, + ], + ids=[ + 'bg_aio_task', + 'just_trio_slee', + ], +) +@pytest.mark.parametrize( + 'wait_for_ctx', + [ + False, + True, + ], + ids=[ + 'raise_KBI_in_rent', + 'wait_for_ctx', + ], +) +def test_sigint_closes_lifetime_stack( + tmp_path: Path, + wait_for_ctx: bool, + bg_aio_task: bool, +): + ''' + Ensure that an infected child can use the `Actor.lifetime_stack` + to make a file on boot and it's automatically cleaned up by the + actor-lifetime-linked exit stack closure. + + ''' + async def main(): + try: + async with tractor.open_nursery() as n: + p = await n.start_actor( + 'file_mngr', + enable_modules=[__name__], + infect_asyncio=True, + ) + async with p.open_context( + manage_file, + tmp_path_str=str(tmp_path), + bg_aio_task=bg_aio_task, + ) as (ctx, first): + + path_str, cpid = first + tmp_file: Path = Path(path_str) + assert tmp_file.exists() + + # XXX originally to simulate what (hopefully) + # the below now triggers.. had to manually + # trigger a SIGINT from a ctl-c in the root. + # await trio.sleep_forever() + + # XXX NOTE XXX signal infected-`asyncio` child to + # OS-cancel with SIGINT; this should trigger the + # bad `asyncio` cancel behaviour that can cause + # a guest-run abandon as was seen causing + # shm-buffer leaks in `piker`'s live quote stream + # susbys! + # + # await trio.sleep(.5) + await trio.sleep(.2) + os.kill( + cpid, + signal.SIGINT, + ) + + # XXX CASE 1: without the bug fixed, in + # the non-KBI-raised-in-parent case, this + # timeout should trigger! + if wait_for_ctx: + print('waiting for ctx outcome in parent..') + try: + with trio.fail_after(.7): + await ctx.wait_for_result() + except tractor.ContextCancelled as ctxc: + assert ctxc.canceller == ctx.chan.uid + raise + + # XXX CASE 2: this seems to be the source of the + # original issue which exhibited BEFORE we put + # a `Actor.cancel_soon()` inside + # `run_as_asyncio_guest()`.. + else: + raise KeyboardInterrupt + + pytest.fail('should have raised some kinda error?!?') + + except ( + KeyboardInterrupt, + ContextCancelled, + ): + # XXX CASE 2: without the bug fixed, in the + # KBI-raised-in-parent case, the actor teardown should + # never get run (silently abaondoned by `asyncio`..) and + # thus the file should leak! + assert not tmp_file.exists() + assert ctx.maybe_error + + trio.run(main) + + # TODO: debug_mode tests once we get support for `asyncio`! # # -[ ] need tests to wrap both scripts: diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 108134ca..b85a1b83 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -973,15 +973,6 @@ class NoRuntime(RuntimeError): "The root actor has not been initialized yet" - -class AsyncioCancelled(Exception): - ''' - Asyncio cancelled translation (non-base) error - for use with the ``to_asyncio`` module - to be raised in the ``trio`` side task - - ''' - class MessagingError(Exception): ''' IPC related msg (typing), transaction (ordering) or dialog @@ -1375,7 +1366,9 @@ def _mk_recv_mte( any_pld: Any = msgpack.decode(msg.pld) message: str = ( f'invalid `{msg_type.__qualname__}` msg payload\n\n' - f'value: `{any_pld!r}` does not match type-spec: ' + f'{any_pld!r}\n\n' + f'has type {type(any_pld)!r}\n\n' + f'and does not match type-spec ' f'`{type(msg).__qualname__}.pld: {codec.pld_spec_str}`' ) bad_msg = msg diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index d1451b4c..e041721f 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -18,11 +18,13 @@ Infection apis for ``asyncio`` loops running ``trio`` using guest mode. ''' +from __future__ import annotations import asyncio from asyncio.exceptions import CancelledError from contextlib import asynccontextmanager as acm from dataclasses import dataclass import inspect +import traceback from typing import ( Any, Callable, @@ -30,20 +32,21 @@ from typing import ( Awaitable, ) -import trio -from outcome import Error - -from tractor.log import get_logger +import tractor from tractor._state import ( - current_actor, debug_mode, ) +from tractor.log import get_logger from tractor.devx import _debug -from tractor._exceptions import AsyncioCancelled from tractor.trionics._broadcast import ( broadcast_receiver, BroadcastReceiver, ) +import trio +from outcome import ( + Error, + Outcome, +) log = get_logger(__name__) @@ -161,7 +164,7 @@ def _run_asyncio_task( ''' __tracebackhide__ = True - if not current_actor().is_infected_aio(): + if not tractor.current_actor().is_infected_aio(): raise RuntimeError( "`infect_asyncio` mode is not enabled!?" ) @@ -172,7 +175,6 @@ def _run_asyncio_task( to_trio, from_aio = trio.open_memory_channel(qsize) # type: ignore args = tuple(inspect.getfullargspec(func).args) - if getattr(func, '_tractor_steam_function', None): # the assumption is that the target async routine accepts the # send channel then it intends to yield more then one return @@ -346,13 +348,22 @@ def _run_asyncio_task( # on a checkpoint. cancel_scope.cancel() - # raise any ``asyncio`` side error. + # raise any `asyncio` side error. raise aio_err task.add_done_callback(cancel_trio) return chan +class AsyncioCancelled(CancelledError): + ''' + Asyncio cancelled translation (non-base) error + for use with the ``to_asyncio`` module + to be raised in the ``trio`` side task + + ''' + + @acm async def translate_aio_errors( @@ -516,7 +527,6 @@ async def open_channel_from( def run_as_asyncio_guest( - trio_main: Callable, ) -> None: @@ -548,6 +558,11 @@ def run_as_asyncio_guest( loop = asyncio.get_running_loop() trio_done_fut = asyncio.Future() + startup_msg: str = ( + 'Starting `asyncio` guest-loop-run\n' + '-> got running loop\n' + '-> built a `trio`-done future\n' + ) if debug_mode(): # XXX make it obvi we know this isn't supported yet! @@ -562,34 +577,120 @@ def run_as_asyncio_guest( def trio_done_callback(main_outcome): if isinstance(main_outcome, Error): - error = main_outcome.error + error: BaseException = main_outcome.error + + # show an dedicated `asyncio`-side tb from the error + tb_str: str = ''.join(traceback.format_exception(error)) + log.exception( + 'Guest-run errored!?\n\n' + f'{main_outcome}\n' + f'{error}\n\n' + f'{tb_str}\n' + ) trio_done_fut.set_exception(error) - # TODO: explicit asyncio tb? - # traceback.print_exception(error) - - # XXX: do we need this? - # actor.cancel_soon() - + # raise inline main_outcome.unwrap() + else: trio_done_fut.set_result(main_outcome) - log.runtime(f"trio_main finished: {main_outcome!r}") + log.runtime(f'trio_main finished: {main_outcome!r}') + + startup_msg += ( + f'-> created {trio_done_callback!r}\n' + f'-> scheduling `trio_main`: {trio_main!r}\n' + ) # start the infection: run trio on the asyncio loop in "guest mode" log.runtime( - 'Infecting `asyncio`-process with a `trio` guest-run of\n\n' - f'{trio_main!r}\n\n' - - f'{trio_done_callback}\n' + f'{startup_msg}\n\n' + + + 'Infecting `asyncio`-process with a `trio` guest-run!\n' ) + trio.lowlevel.start_guest_run( trio_main, run_sync_soon_threadsafe=loop.call_soon_threadsafe, done_callback=trio_done_callback, ) - # NOTE `.unwrap()` will raise on error - return (await trio_done_fut).unwrap() + try: + # TODO: better SIGINT handling since shielding seems to + # make NO DIFFERENCE XD + # -[ ] maybe this is due to 3.11's recent SIGINT handling + # changes and we can better work with/around it? + # https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption + out: Outcome = await asyncio.shield(trio_done_fut) + # NOTE `Error.unwrap()` will raise + return out.unwrap() + + except asyncio.CancelledError: + actor: tractor.Actor = tractor.current_actor() + log.exception( + '`asyncio`-side main task was cancelled!\n' + 'Cancelling actor-runtime..\n' + f'c)>\n' + f' |_{actor}.cancel_soon()\n' + + ) + + # XXX NOTE XXX the next LOC is super important!!! + # => without it, we can get a guest-run abandonment case + # where asyncio will not trigger `trio` in a final event + # loop cycle! + # + # our test, + # `test_infected_asyncio.test_sigint_closes_lifetime_stack()` + # demonstrates how if when we raise a SIGINT-signal in an infected + # child we get a variable race condition outcome where + # either of the following can indeterminately happen, + # + # - "silent-abandon": `asyncio` abandons the `trio` + # guest-run task silently and no `trio`-guest-run or + # `tractor`-actor-runtime teardown happens whatsoever.. + # this is the WORST (race) case outcome. + # + # - OR, "loud-abandon": the guest run get's abaondoned "loudly" with + # `trio` reporting a console traceback and further tbs of all + # the failed shutdown routines also show on console.. + # + # our test can thus fail and (has been parametrized for) + # the 2 cases: + # + # - when the parent raises a KBI just after + # signalling the child, + # |_silent-abandon => the `Actor.lifetime_stack` will + # never be closed thus leaking a resource! + # -> FAIL! + # |_loud-abandon => despite the abandonment at least the + # stack will be closed out.. + # -> PASS + # + # - when the parent instead simply waits on `ctx.wait_for_result()` + # (i.e. DOES not raise a KBI itself), + # |_silent-abandon => test will just hang and thus the ctx + # and actor will never be closed/cancelled/shutdown + # resulting in leaking a (file) resource since the + # `trio`/`tractor` runtime never relays a ctxc back to + # the parent; the test's timeout will trigger.. + # -> FAIL! + # |_loud-abandon => this case seems to never happen?? + # + # XXX FIRST PART XXX, SO, this is a fix to the + # "silent-abandon" case, NOT the `trio`-guest-run + # abandonment issue in general, for which the NEXT LOC + # is apparently a working fix! + actor.cancel_soon() + + # XXX NOTE XXX PUMP the asyncio event loop to allow `trio`-side to + # `trio`-guest-run to complete and teardown !! + # + # XXX WITHOUT THIS the guest-run gets race-conditionally + # abandoned by `asyncio`!! + # XD XD XD + await asyncio.shield( + asyncio.sleep(.1) # NOPE! it can't be 0 either XD + ) + raise # might as well if it's installed. try: @@ -599,4 +700,6 @@ def run_as_asyncio_guest( except ImportError: pass - return asyncio.run(aio_main(trio_main)) + return asyncio.run( + aio_main(trio_main), + ) -- 2.34.1 From 10558b0986af01bddebb738781c6c18e9c6cf056 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 25 Jun 2024 19:36:31 -0400 Subject: [PATCH 192/305] Lel, revert `AsyncioCancelled` inherit, module.. Turns out it somehow breaks our `to_asyncio` error relay since obvi `asyncio`'s runtime seems to specially handle it (prolly via `isinstance()` ?) and it caused our `test_aio_cancelled_from_aio_causes_trio_cancelled()` to hang.. Further, obvi `unpack_error()` won't be able to find the type def if not kept inside `._exceptions`.. So given all that, revert the change/move as well as: - tweak the aio-from-aio cancel test to timeout. - do `trio.sleep()` conc with any bg aio task by moving out nursery block. - add a `send_sigint_to: str` parameter to `test_sigint_closes_lifetime_stack()` such that we test the SIGINT being relayed to just the parent or the child. --- tests/test_infected_asyncio.py | 69 ++++++++++++++++++++++++++-------- tractor/_exceptions.py | 11 ++++++ 2 files changed, 64 insertions(+), 16 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 8d4697fd..645dc4b6 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -289,23 +289,35 @@ async def aio_cancel(): ''' await asyncio.sleep(0.5) - task = asyncio.current_task() # cancel and enter sleep + task = asyncio.current_task() task.cancel() await aio_sleep_forever() def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): + ''' + When the `asyncio.Task` cancels itself the `trio` side cshould + also cancel and teardown and relay the cancellation cross-process + to the caller (parent). + ''' async def main(): - async with tractor.open_nursery() as n: - await n.run_in_actor( + + an: tractor.ActorNursery + async with tractor.open_nursery() as an: + p: tractor.Portal = await an.run_in_actor( asyncio_actor, target='aio_cancel', expect_err='tractor.to_asyncio.AsyncioCancelled', infect_asyncio=True, ) + # NOTE: normally the `an.__aexit__()` waits on the + # portal's result but we do it explicitly here + # to avoid indent levels. + with trio.fail_after(1): + await p.wait_for_result() with pytest.raises( expected_exception=(RemoteActorError, ExceptionGroup), @@ -313,7 +325,7 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): trio.run(main) # might get multiple `trio.Cancelled`s as well inside an inception - err = excinfo.value + err: RemoteActorError|ExceptionGroup = excinfo.value if isinstance(err, ExceptionGroup): err = next(itertools.dropwhile( lambda exc: not isinstance(exc, tractor.RemoteActorError), @@ -321,7 +333,8 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): )) assert err - # ensure boxed error is correct + # relayed boxed error should be our `trio`-task's + # cancel-signal-proxy-equivalent of `asyncio.CancelledError`. assert err.boxed_type == to_asyncio.AsyncioCancelled @@ -630,6 +643,7 @@ def test_echoserver_detailed_mechanics( async def manage_file( ctx: tractor.Context, tmp_path_str: str, + send_sigint_to: str, bg_aio_task: bool = False, ): ''' @@ -687,25 +701,39 @@ async def manage_file( # honestly, this REALLY reminds me why i haven't used # `asyncio` by choice in years.. XD # - # await tractor.to_asyncio.run_task(aio_sleep_forever) - if bg_aio_task: - async with trio.open_nursery() as tn: + async with trio.open_nursery() as tn: + if bg_aio_task: tn.start_soon( tractor.to_asyncio.run_task, aio_sleep_forever, ) - await trio.sleep_forever() + # XXX don't-need/doesn't-make-a-diff right + # since we're already doing it from parent? + # if send_sigint_to == 'child': + # os.kill( + # os.getpid(), + # signal.SIGINT, + # ) + await trio.sleep_forever() # signalled manually at the OS level (aka KBI) by the parent actor. except KeyboardInterrupt: print('child raised KBI..') assert tmp_file.exists() raise - else: - raise RuntimeError('shoulda received a KBI?') + + raise RuntimeError('shoulda received a KBI?') +@pytest.mark.parametrize( + 'send_sigint_to', + [ + 'child', + 'parent', + ], + ids='send_SIGINT_to={}'.format, +) @pytest.mark.parametrize( 'bg_aio_task', [ @@ -740,6 +768,8 @@ def test_sigint_closes_lifetime_stack( tmp_path: Path, wait_for_ctx: bool, bg_aio_task: bool, + debug_mode: bool, + send_sigint_to: str, ): ''' Ensure that an infected child can use the `Actor.lifetime_stack` @@ -749,8 +779,11 @@ def test_sigint_closes_lifetime_stack( ''' async def main(): try: - async with tractor.open_nursery() as n: - p = await n.start_actor( + an: tractor.ActorNursery + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + p: tractor.Portal = await an.start_actor( 'file_mngr', enable_modules=[__name__], infect_asyncio=True, @@ -758,6 +791,7 @@ def test_sigint_closes_lifetime_stack( async with p.open_context( manage_file, tmp_path_str=str(tmp_path), + send_sigint_to=send_sigint_to, bg_aio_task=bg_aio_task, ) as (ctx, first): @@ -777,10 +811,13 @@ def test_sigint_closes_lifetime_stack( # shm-buffer leaks in `piker`'s live quote stream # susbys! # - # await trio.sleep(.5) await trio.sleep(.2) + pid: int = ( + cpid if send_sigint_to == 'child' + else os.getpid() + ) os.kill( - cpid, + pid, signal.SIGINT, ) @@ -790,7 +827,7 @@ def test_sigint_closes_lifetime_stack( if wait_for_ctx: print('waiting for ctx outcome in parent..') try: - with trio.fail_after(.7): + with trio.fail_after(1): await ctx.wait_for_result() except tractor.ContextCancelled as ctxc: assert ctxc.canceller == ctx.chan.uid diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index b85a1b83..a0b6ff3f 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -980,6 +980,17 @@ class MessagingError(Exception): ''' +class AsyncioCancelled(Exception): + ''' + Asyncio cancelled translation (non-base) error + for use with the ``to_asyncio`` module + to be raised in the ``trio`` side task + + NOTE: this should NOT inherit from `asyncio.CancelledError` or + tests should break! + + ''' + def pack_error( exc: BaseException|RemoteActorError, -- 2.34.1 From 199247309ef0a02ff6f3f903e70585359c3dcce0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Jun 2024 11:44:31 -0400 Subject: [PATCH 193/305] Demo-abandonment on shielded `trio`-side work Finally this reproduces the issue as it (originally?) exhibited inside `piker` where the `Actor.lifetime_stack` wasn't closed in cases where during `infected_aio`-actor cancellation/shutdown `trio` side tasks which are doing shielded (teardown) work are NOT being watched/waited on from the `aio_main()` task-closure inside `run_as_asyncio_guest()`! This is then the root cause of the guest-run being abandoned since if our `aio_main()` task-closure doesn't know it should allow the run to finish, it's going to call `loop.close()` eventually resulting in the `GeneratorExit` thrown into `trio._core._run.unrolled_run()`.. So, this extends the `test_sigint_closes_lifetime_stack()` suite to include cases for such shielded `trio`-task ops: - add a new `trio_side_is_shielded: bool` which will toggle whether to add a shielded 0.5s `trio.sleep()` loop to `manage_file()` which should outlive the `asyncio` event-loop shutdown sequence and result in an abandoned guest-run and thus a leaked file. - parametrize the existing suite with this case resulting in a total 16 test set B) This patch demonstrates the problem with our `aio_main()` task-closure impl via the now 4 failing tests, a fix is coming in a follow up commit! --- tests/test_infected_asyncio.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 645dc4b6..42eb35b7 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -644,6 +644,7 @@ async def manage_file( ctx: tractor.Context, tmp_path_str: str, send_sigint_to: str, + trio_side_is_shielded: bool = True, bg_aio_task: bool = False, ): ''' @@ -693,11 +694,6 @@ async def manage_file( # => ????? honestly i'm lost but it seems to be some issue # with `asyncio` and SIGINT.. # - # XXX NOTE XXX SO, if this LINE IS UNCOMMENTED and - # `run_as_asyncio_guest()` is written WITHOUT THE - # `.cancel_soon()` soln, both of these tests will pass ?? - # so maybe it has something to do with `asyncio` loop init - # state?!? # honestly, this REALLY reminds me why i haven't used # `asyncio` by choice in years.. XD # @@ -715,6 +711,15 @@ async def manage_file( # os.getpid(), # signal.SIGINT, # ) + + # XXX spend a half sec doing shielded checkpointing to + # ensure that despite the `trio`-side task ignoring the + # SIGINT, the `asyncio` side won't abandon the guest-run! + if trio_side_is_shielded: + with trio.CancelScope(shield=True): + for i in range(5): + await trio.sleep(0.1) + await trio.sleep_forever() # signalled manually at the OS level (aka KBI) by the parent actor. @@ -726,6 +731,17 @@ async def manage_file( raise RuntimeError('shoulda received a KBI?') +@pytest.mark.parametrize( + 'trio_side_is_shielded', + [ + False, + True, + ], + ids=[ + 'trio_side_no_shielding', + 'trio_side_does_shielded_work', + ], +) @pytest.mark.parametrize( 'send_sigint_to', [ @@ -768,6 +784,7 @@ def test_sigint_closes_lifetime_stack( tmp_path: Path, wait_for_ctx: bool, bg_aio_task: bool, + trio_side_is_shielded: bool, debug_mode: bool, send_sigint_to: str, ): @@ -793,6 +810,7 @@ def test_sigint_closes_lifetime_stack( tmp_path_str=str(tmp_path), send_sigint_to=send_sigint_to, bg_aio_task=bg_aio_task, + trio_side_is_shielded=trio_side_is_shielded, ) as (ctx, first): path_str, cpid = first -- 2.34.1 From ddbda17338774ada4a016f6e33c10fa7b810771a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Jun 2024 13:48:36 -0400 Subject: [PATCH 194/305] Solve our abandonment issues.. To make the recent set of tests pass this (hopefully) finally solves all `asyncio` embedded `trio` guest-run abandonment by ensuring we "pump the event loop" until the guest-run future is fully complete. Accomplished via simple poll loop of the form `while not trio_done_fut.done(): await asyncio.sleep(.1)` in the `aio_main()` task's exception teardown sequence. The loop does a naive 10ms "pump-via-sleep & poll" for the `trio` side to complete before finally exiting (and presumably raising) from the SIGINT cancellation. Other related cleanups and refinements: - use `asyncio.Task.result()` inside `cancel_trio()` since it also inline-raises any exception outcome and we can also log-report the result in non-error cases. - comment out buncha not-sure-we-need-it stuff in `cancel_trio()`. - remove the botched `AsyncioCancelled(CancelledError):` idea obvi XD - comment `greenback` init for now in `aio_main()` since (pretty sure) we don't ever want to actually REPL in that specific func-as-task? - always capture any `fute_err: BaseException` from the `main_outcome: Outcome` delivered by the `trio` side guest-run task. - add and raise a new super noisy `AsyncioRuntimeTranslationError` whenever we detect that the guest-run `trio_done_fut` has not completed before task exit; should avoid abandonment issues ever happening again without knowing! --- tractor/to_asyncio.py | 246 ++++++++++++++++++++++++++++++------------ 1 file changed, 176 insertions(+), 70 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index e041721f..fb18ba88 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -33,11 +33,12 @@ from typing import ( ) import tractor +from tractor._exceptions import AsyncioCancelled from tractor._state import ( debug_mode, ) -from tractor.log import get_logger from tractor.devx import _debug +from tractor.log import get_logger from tractor.trionics._broadcast import ( broadcast_receiver, BroadcastReceiver, @@ -51,7 +52,10 @@ from outcome import ( log = get_logger(__name__) -__all__ = ['run_task', 'run_as_asyncio_guest'] +__all__ = [ + 'run_task', + 'run_as_asyncio_guest', +] @dataclass @@ -155,15 +159,16 @@ def _run_asyncio_task( *, qsize: int = 1, provide_channels: bool = False, + hide_tb: bool = False, **kwargs, ) -> LinkedTaskChannel: ''' Run an ``asyncio`` async function or generator in a task, return - or stream the result back to ``trio``. + or stream the result back to the caller `trio.lowleve.Task`. ''' - __tracebackhide__ = True + __tracebackhide__: bool = hide_tb if not tractor.current_actor().is_infected_aio(): raise RuntimeError( "`infect_asyncio` mode is not enabled!?" @@ -224,6 +229,7 @@ def _run_asyncio_task( try: result = await coro except BaseException as aio_err: + chan._aio_err = aio_err if isinstance(aio_err, CancelledError): log.runtime( '`asyncio` task was cancelled..\n' @@ -232,7 +238,6 @@ def _run_asyncio_task( log.exception( '`asyncio` task errored\n' ) - chan._aio_err = aio_err raise else: @@ -268,7 +273,7 @@ def _run_asyncio_task( aio_task_complete ) ) - chan._aio_task = task + chan._aio_task: asyncio.Task = task # XXX TODO XXX get this actually workin.. XD # maybe setup `greenback` for `asyncio`-side task REPLing @@ -284,19 +289,19 @@ def _run_asyncio_task( def cancel_trio(task: asyncio.Task) -> None: ''' - Cancel the calling ``trio`` task on error. + Cancel the calling `trio` task on error. ''' nonlocal chan - aio_err = chan._aio_err + aio_err: BaseException|None = chan._aio_err task_err: BaseException|None = None - # only to avoid ``asyncio`` complaining about uncaptured + # only to avoid `asyncio` complaining about uncaptured # task exceptions try: - task.exception() + res: Any = task.result() except BaseException as terr: - task_err = terr + task_err: BaseException = terr msg: str = ( 'Infected `asyncio` task {etype_str}\n' @@ -328,42 +333,49 @@ def _run_asyncio_task( if task_err is None: assert aio_err - aio_err.with_traceback(aio_err.__traceback__) - # log.error( - # 'infected task errorred' - # ) + # wait, wut? + # aio_err.with_traceback(aio_err.__traceback__) - # TODO: show that the cancellation originated - # from the ``trio`` side? right? - # elif type(aio_err) is CancelledError: + # TODO: show when cancellation originated + # from each side more pedantically? + # elif ( + # type(aio_err) is CancelledError + # and # trio was the cause? + # cancel_scope.cancel_called + # ): # log.cancel( - # 'infected task was cancelled' + # 'infected task was cancelled by `trio`-side' # ) + # raise aio_err from task_err - # if cancel_scope.cancelled: - # raise aio_err from err - - # XXX: alway cancel the scope on error - # in case the trio task is blocking - # on a checkpoint. + # XXX: if not already, alway cancel the scope + # on a task error in case the trio task is blocking on + # a checkpoint. cancel_scope.cancel() + if ( + task_err + and + aio_err is not task_err + ): + raise aio_err from task_err + # raise any `asyncio` side error. raise aio_err + log.info( + '`trio` received final result from {task}\n' + f'|_{res}\n' + ) + # TODO: do we need this? + # if task_err: + # cancel_scope.cancel() + # raise task_err + task.add_done_callback(cancel_trio) return chan -class AsyncioCancelled(CancelledError): - ''' - Asyncio cancelled translation (non-base) error - for use with the ``to_asyncio`` module - to be raised in the ``trio`` side task - - ''' - - @acm async def translate_aio_errors( @@ -386,7 +398,9 @@ async def translate_aio_errors( ) -> None: aio_err = chan._aio_err if ( - aio_err is not None and + aio_err is not None + and + # not isinstance(aio_err, CancelledError) type(aio_err) != CancelledError ): # always raise from any captured asyncio error @@ -418,13 +432,17 @@ async def translate_aio_errors( ): aio_err = chan._aio_err if ( - task.cancelled() and + task.cancelled() + and type(aio_err) is CancelledError ): - # if an underlying ``asyncio.CancelledError`` triggered this + # if an underlying `asyncio.CancelledError` triggered this # channel close, raise our (non-``BaseException``) wrapper # error: ``AsyncioCancelled`` from that source error. - raise AsyncioCancelled from aio_err + raise AsyncioCancelled( + f'Task cancelled\n' + f'|_{task}\n' + ) from aio_err else: raise @@ -467,8 +485,8 @@ async def run_task( ) -> Any: ''' - Run an ``asyncio`` async function or generator in a task, return - or stream the result back to ``trio``. + Run an `asyncio` async function or generator in a task, return + or stream the result back to `trio`. ''' # simple async func @@ -526,10 +544,27 @@ async def open_channel_from( chan._to_trio.close() +class AsyncioRuntimeTranslationError(RuntimeError): + ''' + We failed to correctly relay runtime semantics and/or maintain SC + supervision rules cross-event-loop. + + ''' + + def run_as_asyncio_guest( trio_main: Callable, + # ^-NOTE-^ when spawned with `infected_aio=True` this func is + # normally `Actor._async_main()` as is passed by some boostrap + # entrypoint like `._entry._trio_main()`. ) -> None: +# ^-TODO-^ technically whatever `trio_main` returns.. we should +# try to use func-typevar-params at leaast by 3.13! +# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#callback-protocols +# -[ ] https://peps.python.org/pep-0646/#using-type-variable-tuples-in-functions +# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#unpack-for-keyword-arguments +# -[ ] https://peps.python.org/pep-0718/ ''' Entry for an "infected ``asyncio`` actor". @@ -555,7 +590,13 @@ def run_as_asyncio_guest( # :) async def aio_main(trio_main): + ''' + Main `asyncio.Task` which calls + `trio.lowlevel.start_guest_run()` to "infect" the `asyncio` + event-loop by embedding the `trio` scheduler allowing us to + boot the `tractor` runtime and connect back to our parent. + ''' loop = asyncio.get_running_loop() trio_done_fut = asyncio.Future() startup_msg: str = ( @@ -564,17 +605,22 @@ def run_as_asyncio_guest( '-> built a `trio`-done future\n' ) - if debug_mode(): - # XXX make it obvi we know this isn't supported yet! - log.error( - 'Attempting to enter unsupported `greenback` init ' - 'from `asyncio` task..' - ) - await _debug.maybe_init_greenback( - force_reload=True, - ) + # TODO: shoudn't this be done in the guest-run trio task? + # if debug_mode(): + # # XXX make it obvi we know this isn't supported yet! + # log.error( + # 'Attempting to enter unsupported `greenback` init ' + # 'from `asyncio` task..' + # ) + # await _debug.maybe_init_greenback( + # force_reload=True, + # ) def trio_done_callback(main_outcome): + log.info( + f'trio_main finished with\n' + f'|_{main_outcome!r}' + ) if isinstance(main_outcome, Error): error: BaseException = main_outcome.error @@ -594,7 +640,6 @@ def run_as_asyncio_guest( else: trio_done_fut.set_result(main_outcome) - log.runtime(f'trio_main finished: {main_outcome!r}') startup_msg += ( f'-> created {trio_done_callback!r}\n' @@ -613,26 +658,48 @@ def run_as_asyncio_guest( run_sync_soon_threadsafe=loop.call_soon_threadsafe, done_callback=trio_done_callback, ) + fute_err: BaseException|None = None try: - # TODO: better SIGINT handling since shielding seems to - # make NO DIFFERENCE XD - # -[ ] maybe this is due to 3.11's recent SIGINT handling - # changes and we can better work with/around it? - # https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption out: Outcome = await asyncio.shield(trio_done_fut) - # NOTE `Error.unwrap()` will raise + + # NOTE will raise (via `Error.unwrap()`) from any + # exception packed into the guest-run's `main_outcome`. return out.unwrap() - except asyncio.CancelledError: + except ( + # XXX special SIGINT-handling is required since + # `asyncio.shield()`-ing seems to NOT handle that case as + # per recent changes in 3.11: + # https://docs.python.org/3/library/asyncio-runner.html#handling-keyboard-interruption + # + # NOTE: further, apparently ONLY need to handle this + # special SIGINT case since all other `asyncio`-side + # errors can be processed via our `chan._aio_err` + # relaying (right?); SIGINT seems to be totally diff + # error path in `asyncio`'s runtime..? + asyncio.CancelledError, + + ) as fute_err: + err_message: str = ( + 'main `asyncio` task ' + ) + if isinstance(fute_err, asyncio.CancelledError): + err_message += 'was cancelled!\n' + else: + err_message += f'errored with {out.error!r}\n' + actor: tractor.Actor = tractor.current_actor() log.exception( - '`asyncio`-side main task was cancelled!\n' - 'Cancelling actor-runtime..\n' + err_message + + + 'Cancelling `trio`-side `tractor`-runtime..\n' f'c)>\n' f' |_{actor}.cancel_soon()\n' - ) + # TODO: reduce this comment bloc since abandon issues are + # now solved? + # # XXX NOTE XXX the next LOC is super important!!! # => without it, we can get a guest-run abandonment case # where asyncio will not trigger `trio` in a final event @@ -681,16 +748,55 @@ def run_as_asyncio_guest( # is apparently a working fix! actor.cancel_soon() - # XXX NOTE XXX PUMP the asyncio event loop to allow `trio`-side to - # `trio`-guest-run to complete and teardown !! + # XXX NOTE XXX pump the `asyncio` event-loop to allow + # `trio`-side to `trio`-guest-run to complete and + # teardown !! # - # XXX WITHOUT THIS the guest-run gets race-conditionally - # abandoned by `asyncio`!! - # XD XD XD - await asyncio.shield( - asyncio.sleep(.1) # NOPE! it can't be 0 either XD - ) - raise + # *WITHOUT THIS* the guest-run can get race-conditionally abandoned!! + # XD + # + await asyncio.sleep(.1) # `delay` can't be 0 either XD + while not trio_done_fut.done(): + log.runtime( + 'Waiting on main guest-run `asyncio` task to complete..\n' + f'|_trio_done_fut: {trio_done_fut}\n' + ) + await asyncio.sleep(.1) + + # XXX: don't actually need the shield.. seems to + # make no difference (??) and we know it spawns an + # internal task.. + # await asyncio.shield(asyncio.sleep(.1)) + + # XXX alt approach but can block indefinitely.. + # so don't use? + # loop._run_once() + + try: + return trio_done_fut.result() + except asyncio.exceptions.InvalidStateError as state_err: + + # XXX be super dupere noisy about abandonment issues! + aio_task: asyncio.Task = asyncio.current_task() + message: str = ( + 'The `asyncio`-side task likely exited before the ' + '`trio`-side guest-run completed!\n\n' + ) + if fute_err: + message += ( + f'The main {aio_task}\n' + f'STOPPED due to {type(fute_err)}\n\n' + ) + + message += ( + f'Likely something inside our guest-run-as-task impl is ' + f'not effectively waiting on the `trio`-side to complete ?!\n' + f'This code -> {aio_main!r}\n\n' + + 'Below you will likely see a ' + '"RuntimeWarning: Trio guest run got abandoned.." !!\n' + ) + raise AsyncioRuntimeTranslationError(message) from state_err # might as well if it's installed. try: @@ -698,7 +804,7 @@ def run_as_asyncio_guest( loop = uvloop.new_event_loop() asyncio.set_event_loop(loop) except ImportError: - pass + log.runtime('`uvloop` not available..') return asyncio.run( aio_main(trio_main), -- 2.34.1 From 50ba23e602a535083a579ec900eb8175d2bebff8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 27 Jun 2024 19:27:59 -0400 Subject: [PATCH 195/305] Use `delay=0` in pump loop.. Turns out it does work XD Prior presumption was from before I had the fute poll-loop so makes sense we needed more then one sched-tick's worth of context switch vs. now we can just keep looping-n-pumping as fast possible until the guest-run's main task completes. Also, - minimize the preface commentary (as per todo) now that we have tests codifying all the edge cases :finger_crossed: - parameter-ize the pump-loop-cycle delay and default it to 0. --- tractor/to_asyncio.py | 116 ++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 61 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index fb18ba88..5564d0e1 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -558,6 +558,8 @@ def run_as_asyncio_guest( # normally `Actor._async_main()` as is passed by some boostrap # entrypoint like `._entry._trio_main()`. + _sigint_loop_pump_delay: float = 0, + ) -> None: # ^-TODO-^ technically whatever `trio_main` returns.. we should # try to use func-typevar-params at leaast by 3.13! @@ -598,7 +600,7 @@ def run_as_asyncio_guest( ''' loop = asyncio.get_running_loop() - trio_done_fut = asyncio.Future() + trio_done_fute = asyncio.Future() startup_msg: str = ( 'Starting `asyncio` guest-loop-run\n' '-> got running loop\n' @@ -633,13 +635,13 @@ def run_as_asyncio_guest( f'{error}\n\n' f'{tb_str}\n' ) - trio_done_fut.set_exception(error) + trio_done_fute.set_exception(error) # raise inline main_outcome.unwrap() else: - trio_done_fut.set_result(main_outcome) + trio_done_fute.set_result(main_outcome) startup_msg += ( f'-> created {trio_done_callback!r}\n' @@ -660,7 +662,7 @@ def run_as_asyncio_guest( ) fute_err: BaseException|None = None try: - out: Outcome = await asyncio.shield(trio_done_fut) + out: Outcome = await asyncio.shield(trio_done_fute) # NOTE will raise (via `Error.unwrap()`) from any # exception packed into the guest-run's `main_outcome`. @@ -697,83 +699,75 @@ def run_as_asyncio_guest( f' |_{actor}.cancel_soon()\n' ) - # TODO: reduce this comment bloc since abandon issues are - # now solved? + # XXX WARNING XXX the next LOCs are super important, since + # without them, we can get guest-run abandonment cases + # where `asyncio` will not schedule or wait on the `trio` + # guest-run task before final shutdown! This is + # particularly true if the `trio` side has tasks doing + # shielded work when a SIGINT condition occurs. # - # XXX NOTE XXX the next LOC is super important!!! - # => without it, we can get a guest-run abandonment case - # where asyncio will not trigger `trio` in a final event - # loop cycle! + # We now have the + # `test_infected_asyncio.test_sigint_closes_lifetime_stack()` + # suite to ensure we do not suffer this issues + # (hopefully) ever again. # - # our test, - # `test_infected_asyncio.test_sigint_closes_lifetime_stack()` - # demonstrates how if when we raise a SIGINT-signal in an infected - # child we get a variable race condition outcome where - # either of the following can indeterminately happen, + # The original abandonment issue surfaced as 2 different + # race-condition dependent types scenarios all to do with + # `asyncio` handling SIGINT from the system: # - # - "silent-abandon": `asyncio` abandons the `trio` - # guest-run task silently and no `trio`-guest-run or - # `tractor`-actor-runtime teardown happens whatsoever.. - # this is the WORST (race) case outcome. + # - "silent-abandon" (WORST CASE): + # `asyncio` abandons the `trio` guest-run task silently + # and no `trio`-guest-run or `tractor`-actor-runtime + # teardown happens whatsoever.. # - # - OR, "loud-abandon": the guest run get's abaondoned "loudly" with - # `trio` reporting a console traceback and further tbs of all - # the failed shutdown routines also show on console.. + # - "loud-abandon" (BEST-ish CASE): + # the guest run get's abaondoned "loudly" with `trio` + # reporting a console traceback and further tbs of all + # the (failed) GC-triggered shutdown routines which + # thankfully does get dumped to console.. # - # our test can thus fail and (has been parametrized for) - # the 2 cases: + # The abandonment is most easily reproduced if the `trio` + # side has tasks doing shielded work where those tasks + # ignore the normal `Cancelled` condition and continue to + # run, but obviously `asyncio` isn't aware of this and at + # some point bails on the guest-run unless we take manual + # intervention.. # - # - when the parent raises a KBI just after - # signalling the child, - # |_silent-abandon => the `Actor.lifetime_stack` will - # never be closed thus leaking a resource! - # -> FAIL! - # |_loud-abandon => despite the abandonment at least the - # stack will be closed out.. - # -> PASS + # To repeat, *WITHOUT THIS* stuff below the guest-run can + # get race-conditionally abandoned!! # - # - when the parent instead simply waits on `ctx.wait_for_result()` - # (i.e. DOES not raise a KBI itself), - # |_silent-abandon => test will just hang and thus the ctx - # and actor will never be closed/cancelled/shutdown - # resulting in leaking a (file) resource since the - # `trio`/`tractor` runtime never relays a ctxc back to - # the parent; the test's timeout will trigger.. - # -> FAIL! - # |_loud-abandon => this case seems to never happen?? + # XXX SOLUTION XXX + # ------ - ------ + # XXX FIRST PART: + # ------ - ------ + # the obvious fix to the "silent-abandon" case is to + # explicitly cancel the actor runtime such that no + # runtime tasks are even left unaware that the guest-run + # should be terminated due to OS cancellation. # - # XXX FIRST PART XXX, SO, this is a fix to the - # "silent-abandon" case, NOT the `trio`-guest-run - # abandonment issue in general, for which the NEXT LOC - # is apparently a working fix! actor.cancel_soon() - # XXX NOTE XXX pump the `asyncio` event-loop to allow + # ------ - ------ + # XXX SECOND PART: + # ------ - ------ + # Pump the `asyncio` event-loop to allow # `trio`-side to `trio`-guest-run to complete and # teardown !! # - # *WITHOUT THIS* the guest-run can get race-conditionally abandoned!! - # XD - # - await asyncio.sleep(.1) # `delay` can't be 0 either XD - while not trio_done_fut.done(): + # oh `asyncio`, how i don't miss you at all XD + while not trio_done_fute.done(): log.runtime( 'Waiting on main guest-run `asyncio` task to complete..\n' - f'|_trio_done_fut: {trio_done_fut}\n' + f'|_trio_done_fut: {trio_done_fute}\n' ) - await asyncio.sleep(.1) + await asyncio.sleep(_sigint_loop_pump_delay) - # XXX: don't actually need the shield.. seems to - # make no difference (??) and we know it spawns an - # internal task.. - # await asyncio.shield(asyncio.sleep(.1)) - - # XXX alt approach but can block indefinitely.. - # so don't use? + # XXX is there any alt API/approach like the internal + # call below but that doesn't block indefinitely..? # loop._run_once() try: - return trio_done_fut.result() + return trio_done_fute.result() except asyncio.exceptions.InvalidStateError as state_err: # XXX be super dupere noisy about abandonment issues! -- 2.34.1 From 32e12c8b034f1043e1859209bac311c513f9c2fc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 28 Jun 2024 19:26:35 -0400 Subject: [PATCH 196/305] Todo a test for sync-pausing from non-main-root-tasks --- tests/test_debugger.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index e4f28548..a32eb2cf 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -1336,6 +1336,23 @@ def test_shield_pause( child.expect(pexpect.EOF) +# TODO: better error for "non-ideal" usage from the root actor. +# -[ ] if called from an async scope emit a message that suggests +# using `await tractor.pause()` instead since it's less overhead +# (in terms of `greenback` and/or extra threads) and if it's from +# a sync scope suggest that usage must first call +# `ensure_portal()` in the (eventual parent) async calling scope? +def test_sync_pause_from_bg_task_in_root_actor_(): + ''' + When used from the root actor, normally we can only implicitly + support `.pause_from_sync()` from the main-parent-task (that + opens the runtime via `open_root_actor()`) since `greenback` + requires a `.ensure_portal()` call per `trio.Task` where it is + used. + + ''' + ... + # TODO: needs ANSI code stripping tho, see `assert_before()` # above! def test_correct_frames_below_hidden(): ''' -- 2.34.1 From 060ee1457eca5c9702b96d8afb444fa78280476d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 2 Jul 2024 17:06:50 -0400 Subject: [PATCH 197/305] More failed REPL-lock-request refinements In `lock_stdio_for_peer()` better internal-error handling/reporting: - only `Lock._blocked.remove(ctx.cid)` if that same cid was added on entry to avoid needless key-errors. - drop all `Lock.release(force: bool)` usage remnants. - if `req_ctx.cancel()` fails mention it with `ctx_err.add_note()`. - add more explicit internal-failed-request log messaging via a new `fail_reason: str`. - use and use new `x)<=\n|_` annots in any failure logging. Other cleanups/niceties: - drop `force: bool` flag entirely from the `Lock.release()`. - use more supervisor-op-annots in `.pdb()` logging with both `_pause/crash_msg: str` instead of double '|' lines when `.pdb()`-reported from `._set_trace()`/`._post_mortem()`. --- tractor/devx/_debug.py | 96 +++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 1135932c..113371d8 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -299,7 +299,6 @@ class Lock: @pdbp.hideframe def release( cls, - force: bool = False, raise_on_thread: bool = True, ) -> bool: @@ -347,12 +346,9 @@ class Lock: lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( - (lock.locked() or force) - # ^-TODO-NOTE-^ should we just remove this, since the - # RTE case above will always happen when you force - # from the wrong task? - - and (owner is task) + lock.locked() + and + (owner is task) # ^-NOTE-^ if we do NOT ensure this, `trio` will # raise a RTE when a non-owner tries to releasee the # lock. @@ -553,6 +549,7 @@ async def lock_stdio_for_peer( # can try to avoid clobbering any connection from a child # that's currently relying on it. we_finished = Lock.req_handler_finished = trio.Event() + lock_blocked: bool = False try: if ctx.cid in Lock._blocked: raise RuntimeError( @@ -565,7 +562,8 @@ async def lock_stdio_for_peer( 'Consider that an internal bug exists given the TTY ' '`Lock`ing IPC dialog..\n' ) - + Lock._blocked.add(ctx.cid) + lock_blocked = True root_task_name: str = current_task().name if tuple(subactor_uid) in Lock._blocked: log.warning( @@ -575,7 +573,11 @@ async def lock_stdio_for_peer( ) ctx._enter_debugger_on_cancel: bool = False message: str = ( - f'Debug lock blocked for {subactor_uid}\n' + f'Debug lock blocked for subactor\n\n' + f'x)<= {subactor_uid}\n\n' + + f'Likely because the root actor already started shutdown and is ' + 'closing IPC connections for this child!\n\n' 'Cancelling debug request!\n' ) log.cancel(message) @@ -589,7 +591,6 @@ async def lock_stdio_for_peer( f'remote task: {subactor_task_uid}\n' ) DebugStatus.shield_sigint() - Lock._blocked.add(ctx.cid) # NOTE: we use the IPC ctx's cancel scope directly in order to # ensure that on any transport failure, or cancellation request @@ -648,31 +649,34 @@ async def lock_stdio_for_peer( ) except BaseException as req_err: - message: str = ( - f'On behalf of remote peer {subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' - 'Forcing `Lock.release()` for req-ctx since likely an ' - 'internal error!\n\n' - f'{ctx}' + fail_reason: str = ( + f'on behalf of peer\n\n' + f'x)<=\n' + f' |_{subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' + + 'Forcing `Lock.release()` due to acquire failure!\n\n' + f'x)=> {ctx}\n' ) if isinstance(req_err, trio.Cancelled): - message = ( - 'Cancelled during root TTY-lock dialog\n' + fail_reason = ( + 'Cancelled during stdio-mutex request ' + - message + fail_reason ) else: - message = ( - 'Errored during root TTY-lock dialog\n' + fail_reason = ( + 'Failed to deliver stdio-mutex request ' + - message + fail_reason ) - log.exception(message) - Lock.release() #force=True) + log.exception(fail_reason) + Lock.release() raise finally: - Lock._blocked.remove(ctx.cid) + if lock_blocked: + Lock._blocked.remove(ctx.cid) # wakeup any waiters since the lock was (presumably) # released, possibly only temporarily. @@ -1167,7 +1171,7 @@ async def request_root_stdio_lock( ): log.cancel( 'Debug lock request was CANCELLED?\n\n' - f'{req_ctx}\n' + f'<=c) {req_ctx}\n' # f'{pformat_cs(req_cs, var_name="req_cs")}\n\n' # f'{pformat_cs(req_ctx._scope, var_name="req_ctx._scope")}\n\n' ) @@ -1179,22 +1183,26 @@ async def request_root_stdio_lock( message: str = ( 'Failed during debug request dialog with root actor?\n\n' ) - - if req_ctx: + if (req_ctx := DebugStatus.req_ctx): message += ( - f'{req_ctx}\n' + f'<=x) {req_ctx}\n\n' f'Cancelling IPC ctx!\n' ) - await req_ctx.cancel() + try: + await req_ctx.cancel() + except trio.ClosedResourceError as terr: + ctx_err.add_note( + # f'Failed with {type(terr)!r} x)> `req_ctx.cancel()` ' + f'Failed with `req_ctx.cancel()` (\n' + f' |_ {task} @ {actor.uid}\n' + # ^-TODO-^ more compact pformating? # -[ ] make an `Actor.__repr()__` # -[ ] should we use `log.pformat_task_uid()`? - f'|_ {task} @ {actor.uid}\n' ) # presuming the caller passed in the "api frame" # (the last frame before user code - like `.pause()`) @@ -2541,9 +2551,9 @@ def _post_mortem( # here! Bo log.pdb( f'{_crash_msg}\n' - '|\n' - # f'|_ {current_task()}\n' - f'|_ {current_task()} @ {actor.uid}\n' + # '|\n' + f'x>(\n' + f' |_ {current_task()} @ {actor.uid}\n' # f'|_ @{actor.uid}\n' # TODO: make an `Actor.__repr()__` -- 2.34.1 From 701dd135eb45c040a2d69989cb6f732d1f80c1a7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 5 Jul 2024 13:32:03 -0400 Subject: [PATCH 198/305] Another tweak to REPL entry `.pdb()` headers --- tractor/devx/_debug.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 113371d8..ae1c46db 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -1552,7 +1552,7 @@ def shield_sigint_handler( log.devx('exiting SIGINT') -_pause_msg: str = 'Attaching to pdb REPL in actor' +_pause_msg: str = 'Opening a pdb REPL in paused actor' class DebugRequestError(RuntimeError): @@ -2050,9 +2050,8 @@ def _set_trace( # root here? Bo log.pdb( f'{_pause_msg}\n' - # '|\n' f'>(\n' - f' |_ {task} @ {actor.uid}\n' + f'|_ {task} @ {actor.uid}\n' # ^-TODO-^ more compact pformating? # -[ ] make an `Actor.__repr()__` # -[ ] should we use `log.pformat_task_uid()`? @@ -2523,7 +2522,7 @@ async def breakpoint( _crash_msg: str = ( - 'Attaching to pdb REPL in crashed actor' + 'Opening a pdb REPL in crashed actor' ) @@ -2551,11 +2550,9 @@ def _post_mortem( # here! Bo log.pdb( f'{_crash_msg}\n' - # '|\n' f'x>(\n' - f' |_ {current_task()} @ {actor.uid}\n' + f' |_ {current_task()} @ {actor.uid}\n' - # f'|_ @{actor.uid}\n' # TODO: make an `Actor.__repr()__` # f'|_ {current_task()} @ {actor.name}\n' ) -- 2.34.1 From 5cdd012417ebbb520bff326f0f7a2de7453151cd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 8 Jul 2024 20:57:41 -0400 Subject: [PATCH 199/305] Get multi-threaded sync-pausing fully workin! The final issue was making sure we do the same thing on ctl-c/SIGINT from the user. That is, if there's already a bg-thread in REPL, we `log.pdb()` about SIGINT shielding and re-draw the prompt; the same UX as normal actor-runtime-task behaviour. Reasons this wasn't workin.. and the fix: - `.pause_from_sync()` was overriding the local `repl` var with `None` delivered by (transitive) calls to `_pause(debug_func=None)`.. so remove all that and only assign it OAOO prior to thread-type case branching. - always call `DebugStatus.shield_sigint()` as needed from all requesting threads/tasks: - in `_pause_from_bg_root_thread()` BEFORE calling `._pause()` AND BEFORE yielding back to the bg-thread via `.started(out)` to ensure we're definitely overriding the handler in the `trio`-main-thread task before unblocking the requesting bg-thread. - from any requesting bg-thread in the root actor such that both its main-`trio`-thread scheduled task (as per above bullet) AND it are SIGINT shielded. - always call `.shield_sigint()` BEFORE any `greenback._await()` case don't entirely grok why yet, but it works)? - for `greenback._await()` case always set `bg_task` to the current one.. - tweaks to the `SIGINT` handler, now renamed `sigint_shield()` so as not to name-collide with the methods when editor-searching: - always try to `repr()` the REPL thread/task "owner" as well as the active `PdbREPL` instance. - add `.devx()` notes around the prompt flushing deats and comments for any root-actor-bg-thread edge cases. Related/supporting refinements: - add `get_lock()`/`get_debug_req()` factory funcs since the plan is to eventually implement both as `@singleton` instances per actor. - fix `acquire_debug_lock()`'s call-sig-bug for scheduling `request_root_stdio_lock()`.. - in `._pause()` only call `mk_pdb()` when `debug_func != None`. - add some todo/warning notes around the `cls.repl = None` in `DebugStatus.release()` `test_pause_from_sync()` tweaks: - don't use a `attach_patts.copy()`, since we always `break` on match. - do `pytest.fail()` on that ^ loop's fallthrough.. - pass `do_ctlc(child, patt=attach_key)` such that we always match the the current thread's name with the ctl-c triggered `.pdb()` emission. - oh yeah, return the last `before: str` from `do_ctlc()`. - in the script, flip `abandon_on_cancel=True` since when `False` it seems to cause `trio.run()` to hang on exit from the last bg-thread case?!? --- examples/debugging/sync_bp.py | 24 ++++- tests/test_debugger.py | 57 +++++++++-- tractor/devx/__init__.py | 2 +- tractor/devx/_debug.py | 184 ++++++++++++++++++++++++++-------- 4 files changed, 211 insertions(+), 56 deletions(-) diff --git a/examples/debugging/sync_bp.py b/examples/debugging/sync_bp.py index 137710fc..95472c93 100644 --- a/examples/debugging/sync_bp.py +++ b/examples/debugging/sync_bp.py @@ -4,6 +4,13 @@ import time import trio import tractor +# TODO: only import these when not running from test harness? +# can we detect `pexpect` usage maybe? +# from tractor.devx._debug import ( +# get_lock, +# get_debug_req, +# ) + def sync_pause( use_builtin: bool = False, @@ -18,7 +25,13 @@ def sync_pause( breakpoint(hide_tb=hide_tb) else: + # TODO: maybe for testing some kind of cm style interface + # where the `._set_trace()` call doesn't happen until block + # exit? + # assert get_lock().ctx_in_debug is None + # assert get_debug_req().repl is None tractor.pause_from_sync() + # assert get_debug_req().repl is None if error: raise RuntimeError('yoyo sync code error') @@ -41,10 +54,11 @@ async def start_n_sync_pause( async def main() -> None: async with ( tractor.open_nursery( - # NOTE: required for pausing from sync funcs - maybe_enable_greenback=True, debug_mode=True, - # loglevel='cancel', + maybe_enable_greenback=True, + enable_stack_on_sig=True, + # loglevel='warning', + # loglevel='devx', ) as an, trio.open_nursery() as tn, ): @@ -138,7 +152,9 @@ async def main() -> None: # the case 2. from above still exists! use_builtin=True, ), - abandon_on_cancel=False, + # TODO: with this `False` we can hang!??! + # abandon_on_cancel=False, + abandon_on_cancel=True, thread_name='inline_root_bg_thread', ) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index a32eb2cf..347a6849 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -299,7 +299,9 @@ def do_ctlc( # needs some further investigation potentially... expect_prompt: bool = not _ci_env, -) -> None: +) -> str|None: + + before: str|None = None # make sure ctl-c sends don't do anything but repeat output for _ in range(count): @@ -309,15 +311,18 @@ def do_ctlc( # TODO: figure out why this makes CI fail.. # if you run this test manually it works just fine.. if expect_prompt: - before = str(child.before.decode()) time.sleep(delay) child.expect(PROMPT) + before = str(child.before.decode()) time.sleep(delay) if patt: # should see the last line on console assert patt in before + # return the console content up to the final prompt + return before + def test_root_actor_bp_forever( spawn, @@ -1085,10 +1090,10 @@ def test_pause_from_sync( ) if ctlc: do_ctlc(child) + # ^NOTE^ subactor not spawned yet; don't need extra delay. child.sendline('c') - # first `await tractor.pause()` inside `p.open_context()` body child.expect(PROMPT) @@ -1109,7 +1114,27 @@ def test_pause_from_sync( ) if ctlc: - do_ctlc(child) + do_ctlc( + child, + # NOTE: setting this to 0 (or some other sufficient + # small val) can cause the test to fail since the + # `subactor` suffers a race where the root/parent + # sends an actor-cancel prior to it hitting its pause + # point; by def the value is 0.1 + delay=0.3, + ) + + # XXX, fwiw without a brief sleep here the SIGINT might actually + # trigger "subactor" cancellation by its parent before the + # shield-handler is engaged. + # + # => similar to the `delay` input to `do_ctlc()` below, setting + # this too low can cause the test to fail since the `subactor` + # suffers a race where the root/parent sends an actor-cancel + # prior to the context task hitting its pause point (and thus + # engaging the `sigint_shield()` handler in time); this value + # seems be good enuf? + time.sleep(0.6) # one of the bg thread or subactor should have # `Lock.acquire()`-ed @@ -1128,29 +1153,45 @@ def test_pause_from_sync( "('root'", ], } + conts: int = 0 # for debugging below matching logic on failure while attach_patts: child.sendline('c') + conts += 1 child.expect(PROMPT) before = str(child.before.decode()) - for key in attach_patts.copy(): + for key in attach_patts: if key in before: + attach_key: str = key expected_patts: str = attach_patts.pop(key) assert_before( child, - [_pause_msg] + expected_patts + [_pause_msg] + + + expected_patts ) break + else: + pytest.fail( + f'No keys found?\n\n' + f'{attach_patts.keys()}\n\n' + f'{before}\n' + ) # ensure no other task/threads engaged a REPL # at the same time as the one that was detected above. - for key, other_patts in attach_patts.items(): + for key, other_patts in attach_patts.copy().items(): assert not in_prompt_msg( before, other_patts, ) if ctlc: - do_ctlc(child) + do_ctlc( + child, + patt=attach_key, + # NOTE same as comment above + delay=0.3, + ) child.sendline('c') child.expect(pexpect.EOF) diff --git a/tractor/devx/__init__.py b/tractor/devx/__init__.py index cfcff931..7047dbdb 100644 --- a/tractor/devx/__init__.py +++ b/tractor/devx/__init__.py @@ -26,7 +26,7 @@ from ._debug import ( breakpoint as breakpoint, pause as pause, pause_from_sync as pause_from_sync, - shield_sigint_handler as shield_sigint_handler, + sigint_shield as sigint_shield, open_crash_handler as open_crash_handler, maybe_open_crash_handler as maybe_open_crash_handler, maybe_init_greenback as maybe_init_greenback, diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index ae1c46db..02551fa1 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -409,9 +409,9 @@ class Lock: repl_task ) message += ( - f'\nA non-caller task still owns this lock on behalf of ' - f'{behalf_of_task}\n' - f'|_{lock_stats.owner}\n' + f'A non-caller task still owns this lock on behalf of ' + f'`{behalf_of_task}`\n' + f'lock owner task: {lock_stats.owner}\n' ) if ( @@ -523,6 +523,10 @@ class Lock: ) +def get_lock() -> Lock: + return Lock + + @tractor.context( # enable the locking msgspec pld_spec=__pld_spec__, @@ -788,13 +792,13 @@ class DebugStatus: cls._orig_sigint_handler: Callable = trio.from_thread.run_sync( signal.signal, signal.SIGINT, - shield_sigint_handler, + sigint_shield, ) else: cls._orig_sigint_handler = signal.signal( signal.SIGINT, - shield_sigint_handler, + sigint_shield, ) @classmethod @@ -900,12 +904,30 @@ class DebugStatus: # actor-local state, irrelevant for non-root. cls.repl_task = None + + # XXX WARNING needs very special caughtion, and we should + # prolly make a more explicit `@property` API? + # + # - if unset in root multi-threaded case can cause + # issues with detecting that some root thread is + # using a REPL, + # + # - what benefit is there to unsetting, it's always + # set again for the next task in some actor.. + # only thing would be to avoid in the sigint-handler + # logging when we don't need to? cls.repl = None # restore original sigint handler cls.unshield_sigint() + +# TODO: use the new `@lowlevel.singleton` for this! +def get_debug_req() -> DebugStatus|None: + return DebugStatus + + class TractorConfig(pdbp.DefaultConfig): ''' Custom `pdbp` config which tries to use the best tradeoff @@ -1311,7 +1333,7 @@ def any_connected_locker_child() -> bool: return False -def shield_sigint_handler( +def sigint_shield( signum: int, frame: 'frame', # type: ignore # noqa *args, @@ -1351,13 +1373,17 @@ def shield_sigint_handler( # root actor branch that reports whether or not a child # has locked debugger. if is_root_process(): + # log.warning( + log.devx( + 'Handling SIGINT in root actor\n' + f'{Lock.repr()}' + f'{DebugStatus.repr()}\n' + ) # try to see if the supposed (sub)actor in debug still # has an active connection to *this* actor, and if not # it's likely they aren't using the TTY lock / debugger # and we should propagate SIGINT normally. any_connected: bool = any_connected_locker_child() - # if not any_connected: - # return do_cancel() problem = ( f'root {actor.uid} handling SIGINT\n' @@ -1406,19 +1432,25 @@ def shield_sigint_handler( # an actor using the `Lock` (a bug state) ?? # => so immediately cancel any stale lock cs and revert # the handler! - if not repl: + if not DebugStatus.repl: # TODO: WHEN should we revert back to ``trio`` # handler if this one is stale? # -[ ] maybe after a counts work of ctl-c mashes? # -[ ] use a state var like `stale_handler: bool`? problem += ( - '\n' 'No subactor is using a `pdb` REPL according `Lock.ctx_in_debug`?\n' - 'BUT, the root should be using it, WHY this handler ??\n' + 'BUT, the root should be using it, WHY this handler ??\n\n' + 'So either..\n' + '- some root-thread is using it but has no `.repl` set?, OR\n' + '- something else weird is going on outside the runtime!?\n' ) else: + # NOTE: since we emit this msg on ctl-c, we should + # also always re-print the prompt the tail block! log.pdb( 'Ignoring SIGINT while pdb REPL in use by root actor..\n' + f'{DebugStatus.repl_task}\n' + f' |_{repl}\n' ) problem = None @@ -1468,7 +1500,6 @@ def shield_sigint_handler( 'Allowing SIGINT propagation..' ) DebugStatus.unshield_sigint() - # do_cancel() repl_task: str|None = DebugStatus.repl_task req_task: str|None = DebugStatus.req_task @@ -1483,10 +1514,15 @@ def shield_sigint_handler( f' |_{repl}\n' ) elif req_task: - log.pdb( - f'Ignoring SIGINT while debug request task is open\n' + log.debug( + 'Ignoring SIGINT while debug request task is open but either,\n' + '- someone else is already REPL-in and has the `Lock`, or\n' + '- some other local task already is replin?\n' f'|_{req_task}\n' ) + + # TODO can we remove this now? + # -[ ] does this path ever get hit any more? else: msg: str = ( 'SIGINT shield handler still active BUT, \n\n' @@ -1522,31 +1558,47 @@ def shield_sigint_handler( # https://github.com/goodboy/tractor/issues/320 # elif debug_mode(): - # NOTE: currently (at least on ``fancycompleter`` 0.9.2) - # it looks to be that the last command that was run (eg. ll) - # will be repeated by default. - # maybe redraw/print last REPL output to console since # we want to alert the user that more input is expect since # nothing has been done dur to ignoring sigint. if ( - repl # only when current actor has a REPL engaged + DebugStatus.repl # only when current actor has a REPL engaged ): + flush_status: str = ( + 'Flushing stdout to ensure new prompt line!\n' + ) + # XXX: yah, mega hack, but how else do we catch this madness XD - if repl.shname == 'xonsh': + if ( + repl.shname == 'xonsh' + ): + flush_status += ( + '-> ALSO re-flushing due to `xonsh`..\n' + ) repl.stdout.write(repl.prompt) + # log.warning( + log.devx( + flush_status + ) repl.stdout.flush() - # TODO: make this work like sticky mode where if there is output - # detected as written to the tty we redraw this part underneath - # and erase the past draw of this same bit above? + # TODO: better console UX to match the current "mode": + # -[ ] for example if in sticky mode where if there is output + # detected as written to the tty we redraw this part underneath + # and erase the past draw of this same bit above? # repl.sticky = True # repl._print_if_sticky() - # also see these links for an approach from ``ptk``: + # also see these links for an approach from `ptk`: # https://github.com/goodboy/tractor/issues/130#issuecomment-663752040 # https://github.com/prompt-toolkit/python-prompt-toolkit/blob/c2c6af8a0308f9e5d7c0e28cb8a02963fe0ce07a/prompt_toolkit/patch_stdout.py + else: + log.devx( + # log.warning( + 'Not flushing stdout since not needed?\n' + f'|_{repl}\n' + ) # XXX only for tracing this handler log.devx('exiting SIGINT') @@ -1617,7 +1669,7 @@ async def _pause( # 'directly (infected) `asyncio` tasks!' # ) from rte - raise + raise rte if debug_func is not None: debug_func = partial(debug_func) @@ -1625,9 +1677,13 @@ async def _pause( # XXX NOTE XXX set it here to avoid ctl-c from cancelling a debug # request from a subactor BEFORE the REPL is entered by that # process. - if not repl: + if ( + not repl + and + debug_func + ): + repl: PdbREPL = mk_pdb() DebugStatus.shield_sigint() - repl: PdbREPL = repl or mk_pdb() # TODO: move this into a `open_debug_request()` @acm? # -[ ] prolly makes the most sense to do the request @@ -1662,7 +1718,13 @@ async def _pause( # recurrent entries/requests from the same # actor-local task. DebugStatus.repl_task = task - DebugStatus.repl = repl + if repl: + DebugStatus.repl = repl + else: + log.error( + 'No REPl instance set before entering `debug_func`?\n' + f'{debug_func}\n' + ) # invoke the low-level REPL activation routine which itself # should call into a `Pdb.set_trace()` of some sort. @@ -2001,7 +2063,7 @@ async def _pause( DebugStatus.release(cancel_req_task=True) # sanity checks for ^ on request/status teardown - assert DebugStatus.repl is None + # assert DebugStatus.repl is None # XXX no more bc bg thread cases? assert DebugStatus.repl_task is None # sanity, for when hackin on all this? @@ -2240,7 +2302,12 @@ async def _pause_from_bg_root_thread( 'Trying to acquire `Lock` on behalf of bg thread\n' f'|_{behalf_of_thread}\n' ) - # DebugStatus.repl_task = behalf_of_thread + + # NOTE: this is already a task inside the main-`trio`-thread, so + # we don't need to worry about calling it another time from the + # bg thread on which who's behalf this task is operating. + DebugStatus.shield_sigint() + out = await _pause( debug_func=None, repl=repl, @@ -2249,6 +2316,8 @@ async def _pause_from_bg_root_thread( called_from_bg_thread=True, **_pause_kwargs ) + DebugStatus.repl_task = behalf_of_thread + lock: trio.FIFOLock = Lock._debug_lock stats: trio.LockStatistics= lock.statistics() assert stats.owner is task @@ -2282,7 +2351,6 @@ async def _pause_from_bg_root_thread( f'|_{behalf_of_thread}\n' ) task_status.started(out) - DebugStatus.shield_sigint() # wait for bg thread to exit REPL sesh. try: @@ -2323,7 +2391,7 @@ def pause_from_sync( err_on_no_runtime=False, ) message: str = ( - f'{actor.uid} task called `tractor.pause_from_sync()`\n\n' + f'{actor.uid} task called `tractor.pause_from_sync()`\n' ) if not actor: raise RuntimeError( @@ -2347,7 +2415,6 @@ def pause_from_sync( 'for infected `asyncio` mode!' ) - DebugStatus.shield_sigint() repl: PdbREPL = mk_pdb() # message += f'-> created local REPL {repl}\n' @@ -2365,6 +2432,10 @@ def pause_from_sync( # thread which will call `._pause()` manually with special # handling for root-actor caller usage. if not DebugStatus.is_main_trio_thread(): + + # TODO: `threading.Lock()` this so we don't get races in + # multi-thr cases where they're acquiring/releasing the + # REPL and setting request/`Lock` state, etc.. thread: threading.Thread = threading.current_thread() repl_owner = thread @@ -2372,9 +2443,16 @@ def pause_from_sync( if is_root: message += ( f'-> called from a root-actor bg {thread}\n' - f'-> scheduling `._pause_from_sync_thread()`..\n' + f'-> scheduling `._pause_from_bg_root_thread()`..\n' ) - bg_task, repl = trio.from_thread.run( + # XXX SUBTLE BADNESS XXX that should really change! + # don't over-write the `repl` here since when + # this behalf-of-bg_thread-task calls pause it will + # pass `debug_func=None` which will result in it + # returing a `repl==None` output and that get's also + # `.started(out)` back here! So instead just ignore + # that output and assign the `repl` created above! + bg_task, _ = trio.from_thread.run( afn=partial( actor._service_n.start, partial( @@ -2386,8 +2464,9 @@ def pause_from_sync( ), ) ) + DebugStatus.shield_sigint() message += ( - f'-> `._pause_from_sync_thread()` started bg task {bg_task}\n' + f'-> `._pause_from_bg_root_thread()` started bg task {bg_task}\n' ) else: message += f'-> called from a bg {thread}\n' @@ -2396,7 +2475,7 @@ def pause_from_sync( # `request_root_stdio_lock()` and we don't need to # worry about all the special considerations as with # the root-actor per above. - bg_task, repl = trio.from_thread.run( + bg_task, _ = trio.from_thread.run( afn=partial( _pause, debug_func=None, @@ -2411,6 +2490,9 @@ def pause_from_sync( **_pause_kwargs ), ) + # ?TODO? XXX where do we NEED to call this in the + # subactor-bg-thread case? + DebugStatus.shield_sigint() assert bg_task is not DebugStatus.repl_task else: # we are presumably the `trio.run()` + main thread @@ -2423,6 +2505,11 @@ def pause_from_sync( # greenback: ModuleType = await maybe_init_greenback() message += f'-> imported {greenback}\n' + + # NOTE XXX seems to need to be set BEFORE the `_pause()` + # invoke using gb below? + DebugStatus.shield_sigint() + repl_owner: Task = current_task() message += '-> calling `greenback.await_(_pause(debug_func=None))` from sync caller..\n' try: @@ -2448,9 +2535,12 @@ def pause_from_sync( raise if out: - bg_task, repl = out - assert repl is repl - assert bg_task is repl_owner + bg_task, _ = out + else: + bg_task: Task = current_task() + + # assert repl is repl + assert bg_task is repl_owner # NOTE: normally set inside `_enter_repl_sync()` DebugStatus.repl_task: str = repl_owner @@ -2464,7 +2554,10 @@ def pause_from_sync( ) log.devx(message) + # NOTE set as late as possible to avoid state clobbering + # in the multi-threaded case! DebugStatus.repl = repl + _set_trace( api_frame=api_frame or inspect.currentframe(), repl=repl, @@ -2665,7 +2758,8 @@ async def acquire_debug_lock( tuple, ]: ''' - Request to acquire the TTY `Lock` in the root actor, release on exit. + Request to acquire the TTY `Lock` in the root actor, release on + exit. This helper is for actor's who don't actually need to acquired the debugger but want to wait until the lock is free in the @@ -2677,10 +2771,14 @@ async def acquire_debug_lock( yield None return + task: Task = current_task() async with trio.open_nursery() as n: ctx: Context = await n.start( - request_root_stdio_lock, - subactor_uid, + partial( + request_root_stdio_lock, + actor_uid=subactor_uid, + task_uid=(task.name, id(task)), + ) ) yield ctx ctx.cancel() -- 2.34.1 From 4ada92d2f789550fe889c195f0491921711045e5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 10 Jul 2024 15:40:44 -0400 Subject: [PATCH 200/305] Move `mk_cmd()` to `._testing` Since we're going to need it more generally for `.devx` sub-sys tooling tests. Also, up the sync-pause ctl-c delay another 10ms.. --- tests/test_debugger.py | 36 +++++++++++++++++++----------------- tractor/_testing/__init__.py | 19 +++++++++++++++++++ 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/tests/test_debugger.py b/tests/test_debugger.py index 347a6849..03b3ae4f 100644 --- a/tests/test_debugger.py +++ b/tests/test_debugger.py @@ -13,7 +13,6 @@ TODO: from functools import partial import itertools import platform -import pathlib import time import pytest @@ -24,7 +23,7 @@ from pexpect.exceptions import ( ) from tractor._testing import ( - examples_dir, + mk_cmd, ) from tractor.devx._debug import ( _pause_msg, @@ -52,15 +51,6 @@ if platform.system() == 'Windows': ) -def mk_cmd(ex_name: str) -> str: - ''' - Generate a command suitable to pass to ``pexpect.spawn()``. - - ''' - script_path: pathlib.Path = examples_dir() / 'debugging' / f'{ex_name}.py' - return ' '.join(['python', str(script_path)]) - - # TODO: was trying to this xfail style but some weird bug i see in CI # that's happening at collect time.. pretty soon gonna dump actions i'm # thinkin... @@ -84,19 +74,31 @@ def spawn( start_method, testdir, reg_addr, -) -> 'pexpect.spawn': +) -> 'pexpect.spawn': + ''' + Use the `pexpect` module shipped via `testdir.spawn()` to + run an `./examples/..` script by name. + + ''' if start_method != 'trio': pytest.skip( - "Debugger tests are only supported on the trio backend" + '`pexpect` based tests only supported on `trio` backend' ) - def _spawn(cmd): + def _spawn( + cmd: str, + **mkcmd_kwargs, + ): return testdir.spawn( - cmd=mk_cmd(cmd), + cmd=mk_cmd( + cmd, + **mkcmd_kwargs, + ), expect_timeout=3, ) + # such that test-dep can pass input script name. return _spawn @@ -1121,7 +1123,7 @@ def test_pause_from_sync( # `subactor` suffers a race where the root/parent # sends an actor-cancel prior to it hitting its pause # point; by def the value is 0.1 - delay=0.3, + delay=0.4, ) # XXX, fwiw without a brief sleep here the SIGINT might actually @@ -1190,7 +1192,7 @@ def test_pause_from_sync( child, patt=attach_key, # NOTE same as comment above - delay=0.3, + delay=0.4, ) child.sendline('c') diff --git a/tractor/_testing/__init__.py b/tractor/_testing/__init__.py index fd79fe20..1f6624e9 100644 --- a/tractor/_testing/__init__.py +++ b/tractor/_testing/__init__.py @@ -54,6 +54,25 @@ def examples_dir() -> pathlib.Path: return repodir() / 'examples' +def mk_cmd( + ex_name: str, + exs_subpath: str = 'debugging', +) -> str: + ''' + Generate a shell command suitable to pass to ``pexpect.spawn()``. + + ''' + script_path: pathlib.Path = ( + examples_dir() + / exs_subpath + / f'{ex_name}.py' + ) + return ' '.join([ + 'python', + str(script_path) + ]) + + @acm async def expect_ctxc( yay: bool, -- 2.34.1 From 346e00973032c12e7306dea8e829d6f4dcd560a9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 10 Jul 2024 15:52:38 -0400 Subject: [PATCH 201/305] Start a new `tests/devx/` tooling-subsuite-pkg --- tests/{ => devx}/test_debugger.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{ => devx}/test_debugger.py (100%) diff --git a/tests/test_debugger.py b/tests/devx/test_debugger.py similarity index 100% rename from tests/test_debugger.py rename to tests/devx/test_debugger.py -- 2.34.1 From ccbd35f273274e047d0c697969a46458b2621d21 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 10 Jul 2024 18:17:42 -0400 Subject: [PATCH 202/305] Officially test proto-ed `stackscope` integration By re-purposing our `pexpect`-based console matching with a new `debugging/shield_hang_in_sub.py` example, this tests a few "hanging actor" conditions more formally: - that despite a hanging actor's task we can dump a `stackscope.extract()` tree on relay of `SIGUSR1`. - the actor tree will terminate despite a shielded forever-sleep by our "T-800" zombie reaper machinery activating and hard killing the underlying subprocess. Some test deats: - simulates the expect actions of a real user by manually using `os.kill()` to send both signals to the actor-tree program. - `pexpect`-matches against `log.devx()` emissions under normal `debug_mode == True` usage. - ensure we get the actual "T-800 deployed" `log.error()` msg and that the actor tree eventually terminates! Surrounding (re-org/impl/test-suite) changes: - allow disabling usage via a `maybe_enable_greenback: bool` to `open_root_actor()` but enable by def. - pretty up the actual `.devx()` content from `.devx._stackscope` including be extra pedantic about the conc-primitives for each signal event. - try to avoid double handles of `SIGUSR1` even though it seems the original (what i thought was a) problem was actually just double logging in the handler.. |_ avoid double applying the handler func via `signal.signal()`, |_ use a global to avoid double handle func calls and, |_ a `threading.RLock` around handling. - move common fixtures and helper routines from `test_debugger` to `tests/devx/conftest.py` and import them for use in both test mods. --- examples/debugging/shield_hang_in_sub.py | 81 +++++++++++ tests/devx/__init__.py | 0 tests/devx/conftest.py | 167 +++++++++++++++++++++ tests/devx/test_debugger.py | 178 ++--------------------- tests/devx/test_tooling.py | 120 +++++++++++++++ tractor/_root.py | 12 +- tractor/devx/_stackscope.py | 166 +++++++++++++++++---- 7 files changed, 521 insertions(+), 203 deletions(-) create mode 100644 examples/debugging/shield_hang_in_sub.py create mode 100644 tests/devx/__init__.py create mode 100644 tests/devx/conftest.py create mode 100644 tests/devx/test_tooling.py diff --git a/examples/debugging/shield_hang_in_sub.py b/examples/debugging/shield_hang_in_sub.py new file mode 100644 index 00000000..3cc084d5 --- /dev/null +++ b/examples/debugging/shield_hang_in_sub.py @@ -0,0 +1,81 @@ +''' +Verify we can dump a `stackscope` tree on a hang. + +''' +import os +import signal + +import trio +import tractor + +@tractor.context +async def start_n_shield_hang( + ctx: tractor.Context, +): + # actor: tractor.Actor = tractor.current_actor() + + # sync to parent-side task + await ctx.started(os.getpid()) + + print('Entering shield sleep..') + with trio.CancelScope(shield=True): + await trio.sleep_forever() # in subactor + + # XXX NOTE ^^^ since this shields, we expect + # the zombie reaper (aka T800) to engage on + # SIGINT from the user and eventually hard-kill + # this subprocess! + + +async def main( + from_test: bool = False, +) -> None: + + async with ( + tractor.open_nursery( + debug_mode=True, + enable_stack_on_sig=True, + # maybe_enable_greenback=False, + loglevel='devx', + ) as an, + ): + + ptl: tractor.Portal = await an.start_actor( + 'hanger', + enable_modules=[__name__], + debug_mode=True, + ) + async with ptl.open_context( + start_n_shield_hang, + ) as (ctx, cpid): + + _, proc, _ = an._children[ptl.chan.uid] + assert cpid == proc.pid + + print( + 'Yo my child hanging..?\n' + 'Sending SIGUSR1 to see a tree-trace!\n' + ) + + # XXX simulate the wrapping test's "user actions" + # (i.e. if a human didn't run this manually but wants to + # know what they should do to reproduce test behaviour) + if from_test: + os.kill( + cpid, + signal.SIGUSR1, + ) + + # simulate user cancelling program + await trio.sleep(0.5) + os.kill( + os.getpid(), + signal.SIGINT, + ) + else: + # actually let user send the ctl-c + await trio.sleep_forever() # in root + + +if __name__ == '__main__': + trio.run(main) diff --git a/tests/devx/__init__.py b/tests/devx/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py new file mode 100644 index 00000000..b739569a --- /dev/null +++ b/tests/devx/conftest.py @@ -0,0 +1,167 @@ +''' +`tractor.devx.*` tooling sub-pkg test space. + +''' +from typing import ( + Callable, +) + +import pytest +from pexpect.exceptions import ( + TIMEOUT, +) +from tractor._testing import ( + mk_cmd, +) + + +@pytest.fixture +def spawn( + start_method, + testdir: pytest.Testdir, + reg_addr: tuple[str, int], + +) -> Callable[[str], None]: + ''' + Use the `pexpect` module shipped via `testdir.spawn()` to + run an `./examples/..` script by name. + + ''' + if start_method != 'trio': + pytest.skip( + '`pexpect` based tests only supported on `trio` backend' + ) + + def _spawn( + cmd: str, + **mkcmd_kwargs, + ): + return testdir.spawn( + cmd=mk_cmd( + cmd, + **mkcmd_kwargs, + ), + expect_timeout=3, + ) + + # such that test-dep can pass input script name. + return _spawn + + +@pytest.fixture( + params=[False, True], + ids='ctl-c={}'.format, +) +def ctlc( + request, + ci_env: bool, + +) -> bool: + + use_ctlc = request.param + + node = request.node + markers = node.own_markers + for mark in markers: + if mark.name == 'has_nested_actors': + pytest.skip( + f'Test {node} has nested actors and fails with Ctrl-C.\n' + f'The test can sometimes run fine locally but until' + ' we solve' 'this issue this CI test will be xfail:\n' + 'https://github.com/goodboy/tractor/issues/320' + ) + + if use_ctlc: + # XXX: disable pygments highlighting for auto-tests + # since some envs (like actions CI) will struggle + # the the added color-char encoding.. + from tractor.devx._debug import TractorConfig + TractorConfig.use_pygements = False + + yield use_ctlc + + +def expect( + child, + + # normally a `pdb` prompt by default + patt: str, + + **kwargs, + +) -> None: + ''' + Expect wrapper that prints last seen console + data before failing. + + ''' + try: + child.expect( + patt, + **kwargs, + ) + except TIMEOUT: + before = str(child.before.decode()) + print(before) + raise + + +def in_prompt_msg( + prompt: str, + parts: list[str], + + pause_on_false: bool = False, + err_on_false: bool = False, + print_prompt_on_false: bool = True, + +) -> bool: + ''' + Predicate check if (the prompt's) std-streams output has all + `str`-parts in it. + + Can be used in test asserts for bulk matching expected + log/REPL output for a given `pdb` interact point. + + ''' + __tracebackhide__: bool = False + + for part in parts: + if part not in prompt: + if pause_on_false: + import pdbp + pdbp.set_trace() + + if print_prompt_on_false: + print(prompt) + + if err_on_false: + raise ValueError( + f'Could not find pattern: {part!r} in `before` output?' + ) + return False + + return True + + +# TODO: todo support terminal color-chars stripping so we can match +# against call stack frame output from the the 'll' command the like! +# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 +def assert_before( + child, + patts: list[str], + + **kwargs, + +) -> None: + __tracebackhide__: bool = False + + # as in before the prompt end + before: str = str(child.before.decode()) + assert in_prompt_msg( + prompt=before, + parts=patts, + + # since this is an "assert" helper ;) + err_on_false=True, + **kwargs + ) diff --git a/tests/devx/test_debugger.py b/tests/devx/test_debugger.py index 03b3ae4f..5ef039f3 100644 --- a/tests/devx/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -16,15 +16,11 @@ import platform import time import pytest -import pexpect from pexpect.exceptions import ( TIMEOUT, EOF, ) -from tractor._testing import ( - mk_cmd, -) from tractor.devx._debug import ( _pause_msg, _crash_msg, @@ -32,6 +28,9 @@ from tractor.devx._debug import ( ) from .conftest import ( _ci_env, + expect, + in_prompt_msg, + assert_before, ) # TODO: The next great debugger audit could be done by you! @@ -69,154 +68,9 @@ has_nested_actors = pytest.mark.has_nested_actors # ) -@pytest.fixture -def spawn( - start_method, - testdir, - reg_addr, - -) -> 'pexpect.spawn': - ''' - Use the `pexpect` module shipped via `testdir.spawn()` to - run an `./examples/..` script by name. - - ''' - if start_method != 'trio': - pytest.skip( - '`pexpect` based tests only supported on `trio` backend' - ) - - def _spawn( - cmd: str, - **mkcmd_kwargs, - ): - return testdir.spawn( - cmd=mk_cmd( - cmd, - **mkcmd_kwargs, - ), - expect_timeout=3, - ) - - # such that test-dep can pass input script name. - return _spawn - - PROMPT = r"\(Pdb\+\)" -def expect( - child, - - # prompt by default - patt: str = PROMPT, - - **kwargs, - -) -> None: - ''' - Expect wrapper that prints last seen console - data before failing. - - ''' - try: - child.expect( - patt, - **kwargs, - ) - except TIMEOUT: - before = str(child.before.decode()) - print(before) - raise - - -def in_prompt_msg( - prompt: str, - parts: list[str], - - pause_on_false: bool = False, - print_prompt_on_false: bool = True, - -) -> bool: - ''' - Predicate check if (the prompt's) std-streams output has all - `str`-parts in it. - - Can be used in test asserts for bulk matching expected - log/REPL output for a given `pdb` interact point. - - ''' - __tracebackhide__: bool = False - - for part in parts: - if part not in prompt: - if pause_on_false: - import pdbp - pdbp.set_trace() - - if print_prompt_on_false: - print(prompt) - - return False - - return True - - -# TODO: todo support terminal color-chars stripping so we can match -# against call stack frame output from the the 'll' command the like! -# -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 -def assert_before( - child, - patts: list[str], - - **kwargs, - -) -> None: - __tracebackhide__: bool = False - - # as in before the prompt end - before: str = str(child.before.decode()) - assert in_prompt_msg( - prompt=before, - parts=patts, - - **kwargs - ) - - -@pytest.fixture( - params=[False, True], - ids='ctl-c={}'.format, -) -def ctlc( - request, - ci_env: bool, - -) -> bool: - - use_ctlc = request.param - - node = request.node - markers = node.own_markers - for mark in markers: - if mark.name == 'has_nested_actors': - pytest.skip( - f'Test {node} has nested actors and fails with Ctrl-C.\n' - f'The test can sometimes run fine locally but until' - ' we solve' 'this issue this CI test will be xfail:\n' - 'https://github.com/goodboy/tractor/issues/320' - ) - - if use_ctlc: - # XXX: disable pygments highlighting for auto-tests - # since some envs (like actions CI) will struggle - # the the added color-char encoding.. - from tractor.devx._debug import TractorConfig - TractorConfig.use_pygements = False - - yield use_ctlc - - @pytest.mark.parametrize( 'user_in_out', [ @@ -281,7 +135,7 @@ def test_root_actor_bp(spawn, user_in_out): child.expect('\r\n') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) if expect_err_str is None: assert 'Error' not in str(child.before) @@ -365,7 +219,7 @@ def test_root_actor_bp_forever( # quit out of the loop child.sendline('q') - child.expect(pexpect.EOF) + child.expect(EOF) @pytest.mark.parametrize( @@ -430,7 +284,7 @@ def test_subactor_error( child.expect('\r\n') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) def test_subactor_breakpoint( @@ -493,7 +347,7 @@ def test_subactor_breakpoint( child.sendline('c') # process should exit - child.expect(pexpect.EOF) + child.expect(EOF) before = str(child.before.decode()) assert in_prompt_msg( @@ -636,7 +490,7 @@ def test_multi_subactors( # process should exit child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) # repeat of previous multierror for final output assert_before(child, [ @@ -776,7 +630,7 @@ def test_multi_daemon_subactors( ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) @has_nested_actors @@ -852,7 +706,7 @@ def test_multi_subactors_root_errors( ]) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) assert_before(child, [ # "Attaching to pdb in crashed actor: ('root'", @@ -982,7 +836,7 @@ def test_root_nursery_cancels_before_child_releases_tty_lock( for i in range(3): try: - child.expect(pexpect.EOF, timeout=0.5) + child.expect(EOF, timeout=0.5) break except TIMEOUT: child.sendline('c') @@ -1024,7 +878,7 @@ def test_root_cancels_child_context_during_startup( do_ctlc(child) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) def test_different_debug_mode_per_actor( @@ -1045,7 +899,7 @@ def test_different_debug_mode_per_actor( do_ctlc(child) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) before = str(child.before.decode()) @@ -1196,7 +1050,7 @@ def test_pause_from_sync( ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) def test_post_mortem_api( @@ -1301,7 +1155,7 @@ def test_post_mortem_api( # ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) def test_shield_pause( @@ -1376,7 +1230,7 @@ def test_shield_pause( ] ) child.sendline('c') - child.expect(pexpect.EOF) + child.expect(EOF) # TODO: better error for "non-ideal" usage from the root actor. diff --git a/tests/devx/test_tooling.py b/tests/devx/test_tooling.py new file mode 100644 index 00000000..3e48844e --- /dev/null +++ b/tests/devx/test_tooling.py @@ -0,0 +1,120 @@ +''' +That "native" runtime-hackin toolset better be dang useful! + +Verify the funtion of a variety of "developer-experience" tools we +offer from the `.devx` sub-pkg: + +- use of the lovely `stackscope` for dumping actor `trio`-task trees + during operation and hangs. + +TODO: +- demonstration of `CallerInfo` call stack frame filtering such that + for logging and REPL purposes a user sees exactly the layers needed + when debugging a problem inside the stack vs. in their app. + +''' +import os +import signal + +from .conftest import ( + expect, + assert_before, + # in_prompt_msg, +) + + +def test_shield_pause( + spawn, +): + ''' + Verify the `tractor.pause()/.post_mortem()` API works inside an + already cancelled `trio.CancelScope` and that you can step to the + next checkpoint wherein the cancelled will get raised. + + ''' + child = spawn( + 'shield_hang_in_sub' + ) + expect( + child, + 'Yo my child hanging..?', + ) + assert_before( + child, + [ + 'Entering shield sleep..', + 'Enabling trace-trees on `SIGUSR1` since `stackscope` is installed @', + ] + ) + + print( + 'Sending SIGUSR1 to see a tree-trace!', + ) + os.kill( + child.pid, + signal.SIGUSR1, + ) + expect( + child, + # end-of-tree delimiter + "------ \('root', ", + ) + + assert_before( + child, + [ + 'Trying to dump `stackscope` tree..', + 'Dumping `stackscope` tree for actor', + "('root'", # uid line + + # parent block point (non-shielded) + 'await trio.sleep_forever() # in root', + ] + ) + + # expect( + # child, + # # relay to the sub should be reported + # 'Relaying `SIGUSR1`[10] to sub-actor', + # ) + + expect( + child, + # end-of-tree delimiter + "------ \('hanger', ", + ) + assert_before( + child, + [ + # relay to the sub should be reported + 'Relaying `SIGUSR1`[10] to sub-actor', + + "('hanger'", # uid line + + # hanger LOC where it's shield-halted + 'await trio.sleep_forever() # in subactor', + ] + ) + # breakpoint() + + # simulate the user sending a ctl-c to the hanging program. + # this should result in the terminator kicking in since + # the sub is shield blocking and can't respond to SIGINT. + os.kill( + child.pid, + signal.SIGINT, + ) + expect( + child, + 'Shutting down actor runtime', + timeout=6, + ) + assert_before( + child, + [ + 'raise KeyboardInterrupt', + # 'Shutting down actor runtime', + '#T-800 deployed to collect zombie B0', + "'--uid', \"('hanger',", + ] + ) diff --git a/tractor/_root.py b/tractor/_root.py index 882285a5..51dbe5e6 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -80,7 +80,7 @@ async def open_root_actor( # enables the multi-process debugger support debug_mode: bool = False, - maybe_enable_greenback: bool = False, # `.pause_from_sync()/breakpoint()` support + maybe_enable_greenback: bool = True, # `.pause_from_sync()/breakpoint()` support enable_stack_on_sig: bool = False, # internal logging @@ -233,14 +233,8 @@ async def open_root_actor( and enable_stack_on_sig ): - try: - logger.info('Enabling `stackscope` traces on SIGUSR1') - from .devx import enable_stack_on_sig - enable_stack_on_sig() - except ImportError: - logger.warning( - '`stackscope` not installed for use in debug mode!' - ) + from .devx._stackscope import enable_stack_on_sig + enable_stack_on_sig() # closed into below ping task-func ponged_addrs: list[tuple[str, int]] = [] diff --git a/tractor/devx/_stackscope.py b/tractor/devx/_stackscope.py index e8e97d1a..944ae49a 100644 --- a/tractor/devx/_stackscope.py +++ b/tractor/devx/_stackscope.py @@ -24,13 +24,24 @@ disjoint, parallel executing tasks in separate actors. ''' from __future__ import annotations +# from functools import partial +from threading import ( + current_thread, + Thread, + RLock, +) import multiprocessing as mp from signal import ( signal, + getsignal, SIGUSR1, ) -import traceback -from typing import TYPE_CHECKING +# import traceback +from types import ModuleType +from typing import ( + Callable, + TYPE_CHECKING, +) import trio from tractor import ( @@ -51,26 +62,45 @@ if TYPE_CHECKING: @trio.lowlevel.disable_ki_protection def dump_task_tree() -> None: - import stackscope - from tractor.log import get_console_log + ''' + Do a classic `stackscope.extract()` task-tree dump to console at + `.devx()` level. + ''' + import stackscope tree_str: str = str( stackscope.extract( trio.lowlevel.current_root_task(), recurse_child_tasks=True ) ) - log = get_console_log( - name=__name__, - level='cancel', - ) actor: Actor = _state.current_actor() + thr: Thread = current_thread() log.devx( f'Dumping `stackscope` tree for actor\n' - f'{actor.name}: {actor}\n' - f' |_{mp.current_process()}\n\n' + f'{actor.uid}:\n' + f'|_{mp.current_process()}\n' + f' |_{thr}\n' + f' |_{actor}\n\n' + + # start-of-trace-tree delimiter (mostly for testing) + '------ - ------\n' + '\n' + + f'{tree_str}\n' + + + # end-of-trace-tree delimiter (mostly for testing) + f'\n' + f'------ {actor.uid!r} ------\n' ) + # TODO: can remove this right? + # -[ ] was original code from author + # + # print( + # 'DUMPING FROM PRINT\n' + # + + # content + # ) # import logging # try: # with open("/dev/tty", "w") as tty: @@ -80,58 +110,130 @@ def dump_task_tree() -> None: # "task_tree" # ).exception("Error printing task tree") +_handler_lock = RLock() +_tree_dumped: bool = False -def signal_handler( + +def dump_tree_on_sig( sig: int, frame: object, relay_to_subs: bool = True, ) -> None: - try: - trio.lowlevel.current_trio_token( - ).run_sync_soon(dump_task_tree) - except RuntimeError: - # not in async context -- print a normal traceback - traceback.print_stack() + global _tree_dumped, _handler_lock + with _handler_lock: + if _tree_dumped: + log.warning( + 'Already dumped for this actor...??' + ) + return + + _tree_dumped = True + + # actor: Actor = _state.current_actor() + log.devx( + 'Trying to dump `stackscope` tree..\n' + ) + try: + dump_task_tree() + # await actor._service_n.start_soon( + # partial( + # trio.to_thread.run_sync, + # dump_task_tree, + # ) + # ) + # trio.lowlevel.current_trio_token().run_sync_soon( + # dump_task_tree + # ) + + except RuntimeError: + log.exception( + 'Failed to dump `stackscope` tree..\n' + ) + # not in async context -- print a normal traceback + # traceback.print_stack() + raise + + except BaseException: + log.exception( + 'Failed to dump `stackscope` tree..\n' + ) + raise + + log.devx( + 'Supposedly we dumped just fine..?' + ) if not relay_to_subs: return an: ActorNursery for an in _state.current_actor()._actoruid2nursery.values(): - subproc: ProcessType subactor: Actor for subactor, subproc, _ in an._children.values(): - log.devx( + log.warning( f'Relaying `SIGUSR1`[{sig}] to sub-actor\n' f'{subactor}\n' f' |_{subproc}\n' ) - if isinstance(subproc, trio.Process): - subproc.send_signal(sig) + # bc of course stdlib can't have a std API.. XD + match subproc: + case trio.Process(): + subproc.send_signal(sig) - elif isinstance(subproc, mp.Process): - subproc._send_signal(sig) + case mp.Process(): + subproc._send_signal(sig) def enable_stack_on_sig( - sig: int = SIGUSR1 -) -> None: + sig: int = SIGUSR1, +) -> ModuleType: ''' Enable `stackscope` tracing on reception of a signal; by default this is SIGUSR1. + HOT TIP: a task/ctx-tree dump can be triggered from a shell with + fancy cmds. + + For ex. from `bash` using `pgrep` and cmd-sustitution + (https://www.gnu.org/software/bash/manual/bash.html#Command-Substitution) + you could use: + + >> kill -SIGUSR1 $(pgrep -f '') + + Or with with `xonsh` (which has diff capture-from-subproc syntax) + + >> kill -SIGUSR1 @$(pgrep -f '') + ''' + try: + import stackscope + except ImportError: + log.warning( + '`stackscope` not installed for use in debug mode!' + ) + return None + + handler: Callable|int = getsignal(sig) + if handler is dump_tree_on_sig: + log.devx( + 'A `SIGUSR1` handler already exists?\n' + f'|_ {handler!r}\n' + ) + return + signal( sig, - signal_handler, + dump_tree_on_sig, ) - # NOTE: not the above can be triggered from - # a (xonsh) shell using: - # kill -SIGUSR1 @$(pgrep -f '') - # - # for example if you were looking to trace a `pytest` run - # kill -SIGUSR1 @$(pgrep -f 'pytest') + log.devx( + 'Enabling trace-trees on `SIGUSR1` ' + 'since `stackscope` is installed @ \n' + f'{stackscope!r}\n\n' + f'With `SIGUSR1` handler\n' + f'|_{dump_tree_on_sig}\n' + ) + return stackscope -- 2.34.1 From aac013ae5c47444d39a9c765f3572e0f7f80c467 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Jul 2024 12:11:31 -0400 Subject: [PATCH 203/305] Pass `infect_asyncio` setting via runtime-vars The reason for this "duplication" with the `--asyncio` CLI flag (passed to the child during spawn) is 2-fold: - allows verifying inside `Actor._from_parent()` that the `trio` runtime was started via `.start_guest_run()` as well as if the `Actor._infected_aio` spawn-entrypoint value has been set (by the `._entry._main()` whenever `--asyncio` is passed) such that any mismatch can be signaled via an `InternalError`. - enables checking the `._state._runtime_vars['_is_infected_aio']` value directly (say from a non-actor/`trio`-thread) instead of calling `._state.current_actor(err_on_no_runtime=False)` in certain edge cases. Impl/testing deats: - add `._state._runtime_vars['_is_infected_aio'] = False` default. - raise `InternalError` on any `--asyncio`-flag-passed vs. `_runtime_vars`-value-relayed-from-parent inside `Actor._from_parent()` and include a `Runner.is_guest` assert for good measure B) - set and relay `infect_asyncio: bool` via runtime-vars to child in `ActorNursery.start_actor()`. - verify `actor.is_infected_aio()`, `actor._infected_aio` and `_state._runtime_vars['_is_infected_aio']` are all set in test suite's `asyncio_actor()` endpoint. --- tests/test_infected_asyncio.py | 15 ++++++++++-- tractor/_entry.py | 23 +++++++++++------- tractor/_runtime.py | 44 ++++++++++++++++++++++++++++++---- tractor/_state.py | 5 +++- tractor/_supervise.py | 1 + 5 files changed, 72 insertions(+), 16 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 42eb35b7..fca971d0 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -21,9 +21,11 @@ import trio import tractor from tractor import ( current_actor, + Actor, to_asyncio, RemoteActorError, ContextCancelled, + _state, ) from tractor.trionics import BroadcastReceiver from tractor._testing import expect_ctxc @@ -80,7 +82,16 @@ async def asyncio_actor( ) -> None: - assert tractor.current_actor().is_infected_aio() + # ensure internal runtime state is consistent + actor: Actor = tractor.current_actor() + assert ( + actor.is_infected_aio() + and + actor._infected_aio + and + _state._runtime_vars['_is_infected_aio'] + ) + target: Callable = globals()[target] if '.' in expect_err: @@ -136,7 +147,7 @@ def test_aio_simple_error(reg_addr): assert err assert isinstance(err, RemoteActorError) - assert err.boxed_type == AssertionError + assert err.boxed_type is AssertionError def test_tractor_cancels_aio(reg_addr): diff --git a/tractor/_entry.py b/tractor/_entry.py index a072706c..19dcb9f6 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -20,6 +20,7 @@ Sub-process entry points. """ from __future__ import annotations from functools import partial +import multiprocessing as mp import os import textwrap from typing import ( @@ -64,20 +65,22 @@ def _mp_main( ''' actor._forkserver_info = forkserver_info from ._spawn import try_set_start_method - spawn_ctx = try_set_start_method(start_method) + spawn_ctx: mp.context.BaseContext = try_set_start_method(start_method) + assert spawn_ctx if actor.loglevel is not None: log.info( - f"Setting loglevel for {actor.uid} to {actor.loglevel}") + f'Setting loglevel for {actor.uid} to {actor.loglevel}' + ) get_console_log(actor.loglevel) - assert spawn_ctx + # TODO: use scops headers like for `trio` below! + # (well after we libify it maybe..) log.info( - f"Started new {spawn_ctx.current_process()} for {actor.uid}") - - _state._current_actor = actor - - log.debug(f"parent_addr is {parent_addr}") + f'Started new {spawn_ctx.current_process()} for {actor.uid}' + # f"parent_addr is {parent_addr}" + ) + _state._current_actor: Actor = actor trio_main = partial( async_main, actor=actor, @@ -94,7 +97,9 @@ def _mp_main( pass # handle it the same way trio does? finally: - log.info(f"Subactor {actor.uid} terminated") + log.info( + f'`mp`-subactor {actor.uid} exited' + ) # TODO: move this func to some kinda `.devx._conc_lang.py` eventually diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 662dd67a..7a00d613 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -59,6 +59,7 @@ from types import ModuleType import warnings import trio +from trio._core import _run as trio_runtime from trio import ( CancelScope, Nursery, @@ -80,6 +81,7 @@ from ._context import ( from .log import get_logger from ._exceptions import ( ContextCancelled, + InternalError, ModuleNotExposed, MsgTypeError, unpack_error, @@ -98,6 +100,7 @@ from ._rpc import ( if TYPE_CHECKING: from ._supervise import ActorNursery + from trio._channel import MemoryChannelState log = get_logger('tractor') @@ -896,11 +899,15 @@ class Actor: f'peer: {chan.uid}\n' f'cid:{cid}\n' ) - ctx._allow_overruns = allow_overruns + ctx._allow_overruns: bool = allow_overruns # adjust buffer size if specified - state = ctx._send_chan._state # type: ignore - if msg_buffer_size and state.max_buffer_size != msg_buffer_size: + state: MemoryChannelState = ctx._send_chan._state # type: ignore + if ( + msg_buffer_size + and + state.max_buffer_size != msg_buffer_size + ): state.max_buffer_size = msg_buffer_size except KeyError: @@ -1094,7 +1101,36 @@ class Actor: '`tractor.pause_from_sync()` not available!' ) - rvs['_is_root'] = False + # XXX ensure the "infected `asyncio` mode" setting + # passed down from our spawning parent is consistent + # with `trio`-runtime initialization: + # - during sub-proc boot, the entrypoint func + # (`._entry._main()`) should set + # `._infected_aio = True` before calling + # `run_as_asyncio_guest()`, + # - the value of `infect_asyncio: bool = True` as + # passed to `ActorNursery.start_actor()` must be + # the same as `_runtime_vars['_is_infected_aio']` + if ( + (aio_rtv := rvs['_is_infected_aio']) + != + (aio_attr := self._infected_aio) + ): + raise InternalError( + 'Parent sent runtime-vars that mismatch for the ' + '"infected `asyncio` mode" settings ?!?\n\n' + + f'rvs["_is_infected_aio"] = {aio_rtv}\n' + f'self._infected_aio = {aio_attr}\n' + ) + if aio_rtv: + assert trio_runtime.GLOBAL_RUN_CONTEXT.runner.is_guest + # ^TODO^ possibly add a `sniffio` or + # `trio` pub-API for `is_guest_mode()`? + + rvs['_is_root'] = False # obvi XD + + # update process-wide globals _state._runtime_vars.update(rvs) # XXX: ``msgspec`` doesn't support serializing tuples diff --git a/tractor/_state.py b/tractor/_state.py index 9f896005..a87ad36b 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -44,6 +44,8 @@ _runtime_vars: dict[str, Any] = { '_root_mailbox': (None, None), '_registry_addrs': [], + '_is_infected_aio': False, + # for `tractor.pause_from_sync()` & `breakpoint()` support 'use_greenback': False, } @@ -70,7 +72,8 @@ def current_actor( ''' if ( err_on_no_runtime - and _current_actor is None + and + _current_actor is None ): msg: str = 'No local actor has been initialized yet?\n' from ._exceptions import NoRuntime diff --git a/tractor/_supervise.py b/tractor/_supervise.py index 4a538e9f..de268078 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -158,6 +158,7 @@ class ActorNursery: # configure and pass runtime state _rtv = _state._runtime_vars.copy() _rtv['_is_root'] = False + _rtv['_is_infected_aio'] = infect_asyncio # allow setting debug policy per actor if debug_mode is not None: -- 2.34.1 From f24e6f6e488e3f7e0940fb0b92eb819bdd4d9f9e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 11 Jul 2024 19:22:40 -0400 Subject: [PATCH 204/305] Use "sclang"-style syntax in `to_asyncio` task logging Just like we've started doing throughout the rest of the actor runtime for reporting (and where "sclang" = "structured conc (s)lang", our little supervision-focused operations syntax i've been playing with in log msg content). Further tweaks: - report the `trio_done_fute` alongside the `main_outcome` value. - add a todo list for supporting `greenback` for pause points. --- tractor/to_asyncio.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 5564d0e1..c37c0467 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -276,7 +276,10 @@ def _run_asyncio_task( chan._aio_task: asyncio.Task = task # XXX TODO XXX get this actually workin.. XD - # maybe setup `greenback` for `asyncio`-side task REPLing + # -[ ] we need logic to setup `greenback` for `asyncio`-side task + # REPLing.. which should normally be nearly the same as for + # `trio`? + # -[ ] add to a new `.devx._greenback.maybe_init_for_asyncio()`? if ( debug_mode() and @@ -305,15 +308,22 @@ def _run_asyncio_task( msg: str = ( 'Infected `asyncio` task {etype_str}\n' - f'|_{task}\n' ) if isinstance(terr, CancelledError): + msg += ( + f'c)>\n' + f' |_{task}\n' + ) log.cancel( msg.format(etype_str='cancelled') ) else: + msg += ( + f'x)>\n' + f' |_{task}\n' + ) log.exception( - msg.format(etype_str='cancelled') + msg.format(etype_str='errored') ) assert type(terr) is type(aio_err), ( @@ -619,9 +629,10 @@ def run_as_asyncio_guest( # ) def trio_done_callback(main_outcome): - log.info( - f'trio_main finished with\n' - f'|_{main_outcome!r}' + log.runtime( + f'`trio` guest-run finishing with outcome\n' + f'>) {main_outcome}\n' + f'|_{trio_done_fute}\n' ) if isinstance(main_outcome, Error): @@ -643,6 +654,12 @@ def run_as_asyncio_guest( else: trio_done_fute.set_result(main_outcome) + log.info( + f'`trio` guest-run finished with outcome\n' + f')>\n' + f'|_{trio_done_fute}\n' + ) + startup_msg += ( f'-> created {trio_done_callback!r}\n' f'-> scheduling `trio_main`: {trio_main!r}\n' @@ -681,7 +698,8 @@ def run_as_asyncio_guest( # error path in `asyncio`'s runtime..? asyncio.CancelledError, - ) as fute_err: + ) as _fute_err: + fute_err = _fute_err err_message: str = ( 'main `asyncio` task ' ) -- 2.34.1 From e10616fa4d27e13aef910944c63bb85285f20bfb Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 12 Jul 2024 15:57:41 -0400 Subject: [PATCH 205/305] Fix multi-daemon debug test `break` signal.. It was expecting `AssertionError` as a proceed-in-test signal (by breaking from a continue loop), but `in_prompt_msg(raise_on_err=True)` was changed to raise `ValueError`; so instead just use as a predicate for the `break`. Also rework `in_prompt_msg()` to accept the `child: BaseSpawn` as input instead of `before: str` remove the casting boilerplate, and adjust all usage to match. --- examples/debugging/multi_daemon_subactors.py | 3 +- tests/devx/conftest.py | 17 +-- tests/devx/test_debugger.py | 118 +++++++++---------- tests/test_infected_asyncio.py | 6 +- 4 files changed, 71 insertions(+), 73 deletions(-) diff --git a/examples/debugging/multi_daemon_subactors.py b/examples/debugging/multi_daemon_subactors.py index 80ef933c..4a462623 100644 --- a/examples/debugging/multi_daemon_subactors.py +++ b/examples/debugging/multi_daemon_subactors.py @@ -25,7 +25,8 @@ async def main(): """ async with tractor.open_nursery( debug_mode=True, - loglevel='cancel', + # loglevel='cancel', + # loglevel='devx', ) as n: p0 = await n.start_actor('bp_forever', enable_modules=[__name__]) diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py index b739569a..28a14cbb 100644 --- a/tests/devx/conftest.py +++ b/tests/devx/conftest.py @@ -10,6 +10,7 @@ import pytest from pexpect.exceptions import ( TIMEOUT, ) +from pexpect.spawnbase import SpawnBase from tractor._testing import ( mk_cmd, ) @@ -107,7 +108,7 @@ def expect( def in_prompt_msg( - prompt: str, + child: SpawnBase, parts: list[str], pause_on_false: bool = False, @@ -125,18 +126,20 @@ def in_prompt_msg( ''' __tracebackhide__: bool = False + before: str = str(child.before.decode()) for part in parts: - if part not in prompt: + if part not in before: if pause_on_false: import pdbp pdbp.set_trace() if print_prompt_on_false: - print(prompt) + print(before) if err_on_false: raise ValueError( - f'Could not find pattern: {part!r} in `before` output?' + f'Could not find pattern in `before` output?\n' + f'part: {part!r}\n' ) return False @@ -147,7 +150,7 @@ def in_prompt_msg( # against call stack frame output from the the 'll' command the like! # -[ ] SO answer for stipping ANSI codes: https://stackoverflow.com/a/14693789 def assert_before( - child, + child: SpawnBase, patts: list[str], **kwargs, @@ -155,10 +158,8 @@ def assert_before( ) -> None: __tracebackhide__: bool = False - # as in before the prompt end - before: str = str(child.before.decode()) assert in_prompt_msg( - prompt=before, + child=child, parts=patts, # since this is an "assert" helper ;) diff --git a/tests/devx/test_debugger.py b/tests/devx/test_debugger.py index 5ef039f3..2a24bf98 100644 --- a/tests/devx/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -94,14 +94,15 @@ def test_root_actor_error( # scan for the prompt expect(child, PROMPT) - before = str(child.before.decode()) - # make sure expected logging and error arrives assert in_prompt_msg( - before, - [_crash_msg, "('root'"] + child, + [ + _crash_msg, + "('root'", + 'AssertionError', + ] ) - assert 'AssertionError' in before # send user command child.sendline(user_input) @@ -241,10 +242,12 @@ def test_subactor_error( # scan for the prompt child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, - [_crash_msg, "('name_error'"] + child, + [ + _crash_msg, + "('name_error'", + ] ) if do_next: @@ -263,17 +266,15 @@ def test_subactor_error( child.sendline('continue') child.expect(PROMPT) - before = str(child.before.decode()) - - # root actor gets debugger engaged assert in_prompt_msg( - before, - [_crash_msg, "('root'"] - ) - # error is a remote error propagated from the subactor - assert in_prompt_msg( - before, - [_crash_msg, "('name_error'"] + child, + [ + _crash_msg, + # root actor gets debugger engaged + "('root'", + # error is a remote error propagated from the subactor + "('name_error'", + ] ) # another round @@ -294,14 +295,11 @@ def test_subactor_breakpoint( "Single subactor with an infinite breakpoint loop" child = spawn('subactor_breakpoint') - - # scan for the prompt child.expect(PROMPT) - - before = str(child.before.decode()) assert in_prompt_msg( - before, - [_pause_msg, "('breakpoint_forever'"] + child, + [_pause_msg, + "('breakpoint_forever'",] ) # do some "next" commands to demonstrate recurrent breakpoint @@ -317,9 +315,8 @@ def test_subactor_breakpoint( for _ in range(5): child.sendline('continue') child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_pause_msg, "('breakpoint_forever'"] ) @@ -332,9 +329,8 @@ def test_subactor_breakpoint( # child process should exit but parent will capture pdb.BdbQuit child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, ['RemoteActorError:', "('breakpoint_forever'", 'bdb.BdbQuit',] @@ -349,9 +345,8 @@ def test_subactor_breakpoint( # process should exit child.expect(EOF) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, ['RemoteActorError:', "('breakpoint_forever'", 'bdb.BdbQuit',] @@ -375,7 +370,7 @@ def test_multi_subactors( before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_pause_msg, "('breakpoint_forever'"] ) @@ -396,12 +391,14 @@ def test_multi_subactors( # first name_error failure child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, - [_crash_msg, "('name_error'"] + child, + [ + _crash_msg, + "('name_error'", + "NameError", + ] ) - assert "NameError" in before if ctlc: do_ctlc(child) @@ -425,9 +422,8 @@ def test_multi_subactors( # breakpoint loop should re-engage child.sendline('c') child.expect(PROMPT) - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_pause_msg, "('breakpoint_forever'"] ) @@ -520,25 +516,28 @@ def test_multi_daemon_subactors( # the root's tty lock first so anticipate either crash # message on the first entry. - bp_forev_parts = [_pause_msg, "('bp_forever'"] + bp_forev_parts = [ + _pause_msg, + "('bp_forever'", + ] bp_forev_in_msg = partial( in_prompt_msg, parts=bp_forev_parts, ) - name_error_msg = "NameError: name 'doggypants' is not defined" - name_error_parts = [name_error_msg] + name_error_msg: str = "NameError: name 'doggypants' is not defined" + name_error_parts: list[str] = [name_error_msg] before = str(child.before.decode()) - if bp_forev_in_msg(prompt=before): + if bp_forev_in_msg(child=child): next_parts = name_error_parts elif name_error_msg in before: next_parts = bp_forev_parts else: - raise ValueError("Neither log msg was found !?") + raise ValueError('Neither log msg was found !?') if ctlc: do_ctlc(child) @@ -607,14 +606,12 @@ def test_multi_daemon_subactors( # wait for final error in root # where it crashs with boxed error while True: - try: - child.sendline('c') - child.expect(PROMPT) - assert_before( - child, - bp_forev_parts - ) - except AssertionError: + child.sendline('c') + child.expect(PROMPT) + if not in_prompt_msg( + child, + bp_forev_parts + ): break assert_before( @@ -795,10 +792,13 @@ def test_root_nursery_cancels_before_child_releases_tty_lock( child = spawn('root_cancelled_but_child_is_in_tty_lock') child.expect(PROMPT) - - before = str(child.before.decode()) - assert "NameError: name 'doggypants' is not defined" in before - assert "tractor._exceptions.RemoteActorError: ('name_error'" not in before + assert_before( + child, + [ + "NameError: name 'doggypants' is not defined", + "tractor._exceptions.RemoteActorError: ('name_error'", + ], + ) time.sleep(0.5) if ctlc: @@ -889,9 +889,8 @@ def test_different_debug_mode_per_actor( child.expect(PROMPT) # only one actor should enter the debugger - before = str(child.before.decode()) assert in_prompt_msg( - before, + child, [_crash_msg, "('debugged_boi'", "RuntimeError"], ) @@ -901,8 +900,6 @@ def test_different_debug_mode_per_actor( child.sendline('c') child.expect(EOF) - before = str(child.before.decode()) - # NOTE: this debugged actor error currently WON'T show up since the # root will actually cancel and terminate the nursery before the error # msg reported back from the debug mode actor is processed. @@ -954,9 +951,8 @@ def test_pause_from_sync( child.expect(PROMPT) # XXX shouldn't see gb loaded message with PDB loglevel! - before = str(child.before.decode()) assert not in_prompt_msg( - before, + child, ['`greenback` portal opened!'], ) # should be same root task @@ -1037,7 +1033,7 @@ def test_pause_from_sync( # at the same time as the one that was detected above. for key, other_patts in attach_patts.copy().items(): assert not in_prompt_msg( - before, + child, other_patts, ) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index fca971d0..f5fa0aab 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -291,7 +291,7 @@ def test_context_spawns_aio_task_that_errors( err = excinfo.value assert isinstance(err, expect) - assert err.boxed_type == AssertionError + assert err.boxed_type is AssertionError async def aio_cancel(): @@ -497,7 +497,7 @@ def test_trio_error_cancels_intertask_chan(reg_addr): trio.run(main) # ensure boxed error type - excinfo.value.boxed_type == Exception + excinfo.value.boxed_type is Exception def test_trio_closes_early_and_channel_exits(reg_addr): @@ -533,7 +533,7 @@ def test_aio_errors_and_channel_propagates_and_closes(reg_addr): ) as excinfo: trio.run(main) - excinfo.value.boxed_type == Exception + excinfo.value.boxed_type is Exception @tractor.context -- 2.34.1 From e7adeee549fb3afbf2160d1326130e1b52ed4adf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 13 Jul 2024 00:16:28 -0400 Subject: [PATCH 206/305] First draft, `asyncio`-task, sync-pausing Bo Mostly due to magic from @oremanj where we slap in a little bit of `.from_asyncio`-type stuff to run a `trio`-task from `asyncio.Task` code! I'm not gonna go into tooo too much detail but basically the primary thing needed was a way to (blocking-ly) invoke a `trio.lowlevel.Task` from an `asyncio` one (which we now have with a new `run_trio_task_in_future()` thanks to draft code from the aforementioned jefe) which we now invoke from a dedicated aio case-branch inside `.devx._debug.pause_from_sync()`. Further include a case inside `DebugStatus.release()` to handle using the same func to set the `repl_release: trio.Event` from the aio side when releasing the REPL on exit cmds. Prolly more refinements to come ;{o --- examples/debugging/asyncio_bp.py | 21 +-- tractor/devx/_debug.py | 212 +++++++++++++++++++++---------- tractor/to_asyncio.py | 95 ++++++++++++++ 3 files changed, 254 insertions(+), 74 deletions(-) diff --git a/examples/debugging/asyncio_bp.py b/examples/debugging/asyncio_bp.py index baddfe03..f2fabddd 100644 --- a/examples/debugging/asyncio_bp.py +++ b/examples/debugging/asyncio_bp.py @@ -2,7 +2,10 @@ import asyncio import trio import tractor -from tractor import to_asyncio +from tractor import ( + to_asyncio, + Portal, +) async def aio_sleep_forever(): @@ -43,7 +46,7 @@ async def bp_then_error( @tractor.context async def trio_ctx( ctx: tractor.Context, - bp_before_started: bool = False, + bp_before_started: bool = True, ): # this will block until the ``asyncio`` task sends a "first" @@ -57,7 +60,6 @@ async def trio_ctx( trio.open_nursery() as n, ): - assert first == 'start' if bp_before_started: @@ -73,15 +75,18 @@ async def trio_ctx( async def main( - bps_all_over: bool = False, + bps_all_over: bool = True, ) -> None: async with tractor.open_nursery( - # debug_mode=True, + debug_mode=True, + maybe_enable_greenback=True, + # loglevel='devx', + # loglevel='runtime', ) as n: - p = await n.start_actor( + ptl: Portal = await n.start_actor( 'aio_daemon', enable_modules=[__name__], infect_asyncio=True, @@ -89,7 +94,7 @@ async def main( loglevel='cancel', ) - async with p.open_context( + async with ptl.open_context( trio_ctx, bp_before_started=bps_all_over, ) as (ctx, first): @@ -105,7 +110,7 @@ async def main( # TODO: case where we cancel from trio-side while asyncio task # has debugger lock? - # await p.cancel_actor() + # await ptl.cancel_actor() if __name__ == '__main__': diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 02551fa1..0784e594 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -20,6 +20,7 @@ Multi-core debugging for da peeps! """ from __future__ import annotations +import asyncio import bdb from contextlib import ( asynccontextmanager as acm, @@ -67,6 +68,7 @@ from trio import ( TaskStatus, ) import tractor +from tractor.to_asyncio import run_trio_task_in_future from tractor.log import get_logger from tractor._context import Context from tractor import _state @@ -296,7 +298,7 @@ class Lock: ) @classmethod - @pdbp.hideframe + # @pdbp.hideframe def release( cls, raise_on_thread: bool = True, @@ -310,39 +312,40 @@ class Lock: we_released: bool = False ctx_in_debug: Context|None = cls.ctx_in_debug repl_task: Task|Thread|None = DebugStatus.repl_task - if not DebugStatus.is_main_trio_thread(): - thread: threading.Thread = threading.current_thread() - message: str = ( - '`Lock.release()` can not be called from a non-main-`trio` thread!\n' - f'{thread}\n' - ) - if raise_on_thread: - raise RuntimeError(message) - - log.devx(message) - return False - - task: Task = current_task() - - # sanity check that if we're the root actor - # the lock is marked as such. - # note the pre-release value may be diff the the - # post-release task. - if repl_task is task: - assert cls._owned_by_root - message: str = ( - 'TTY lock held by root-actor on behalf of local task\n' - f'|_{repl_task}\n' - ) - else: - assert DebugStatus.repl_task is not task - - message: str = ( - 'TTY lock was NOT released on behalf of caller\n' - f'|_{task}\n' - ) try: + if not DebugStatus.is_main_trio_thread(): + thread: threading.Thread = threading.current_thread() + message: str = ( + '`Lock.release()` can not be called from a non-main-`trio` thread!\n' + f'{thread}\n' + ) + if raise_on_thread: + raise RuntimeError(message) + + log.devx(message) + return False + + task: Task = current_task() + + # sanity check that if we're the root actor + # the lock is marked as such. + # note the pre-release value may be diff the the + # post-release task. + if repl_task is task: + assert cls._owned_by_root + message: str = ( + 'TTY lock held by root-actor on behalf of local task\n' + f'|_{repl_task}\n' + ) + else: + assert DebugStatus.repl_task is not task + + message: str = ( + 'TTY lock was NOT released on behalf of caller\n' + f'|_{task}\n' + ) + lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( @@ -788,7 +791,14 @@ class DebugStatus: # in which case schedule the SIGINT shielding override # to in the main thread. # https://docs.python.org/3/library/signal.html#signals-and-threads - if not cls.is_main_trio_thread(): + if ( + not cls.is_main_trio_thread() + and + not _state._runtime_vars.get( + '_is_infected_aio', + False, + ) + ): cls._orig_sigint_handler: Callable = trio.from_thread.run_sync( signal.signal, signal.SIGINT, @@ -813,7 +823,16 @@ class DebugStatus: # always restore ``trio``'s sigint handler. see notes below in # the pdb factory about the nightmare that is that code swapping # out the handler when the repl activates... - if not cls.is_main_trio_thread(): + # if not cls.is_main_trio_thread(): + if ( + not cls.is_main_trio_thread() + and + # not _state._runtime_vars.get( + # '_is_infected_aio', + # False, + # ) + not current_actor().is_infected_aio() + ): trio.from_thread.run_sync( signal.signal, signal.SIGINT, @@ -871,7 +890,7 @@ class DebugStatus: return False @classmethod - @pdbp.hideframe + # @pdbp.hideframe def release( cls, cancel_req_task: bool = False, @@ -880,11 +899,21 @@ class DebugStatus: try: # sometimes the task might already be terminated in # which case this call will raise an RTE? - if ( - repl_release is not None - ): + if repl_release is not None: if cls.is_main_trio_thread(): repl_release.set() + + elif current_actor().is_infected_aio(): + + async def _set_repl_release(): + repl_release.set() + + fute: asyncio.Future = run_trio_task_in_future( + _set_repl_release + ) + if not fute.done(): + log.warning('REPL release state unknown..?') + else: # XXX NOTE ONLY used for bg root-actor sync # threads, see `.pause_from_sync()`. @@ -1658,18 +1687,24 @@ async def _pause( try: task: Task = current_task() except RuntimeError as rte: + # NOTE, 2 cases we might get here: + # + # - ACTUALLY not a `trio.lowlevel.Task` nor runtime caller, + # |_ error out as normal + # + # - an infected `asycio` actor calls it from an actual + # `asyncio.Task` + # |_ in this case we DO NOT want to RTE! __tracebackhide__: bool = False - log.exception( - 'Failed to get current `trio`-task?' - ) - # if actor.is_infected_aio(): - # mk_pdb().set_trace() - # raise RuntimeError( - # '`tractor.pause[_from_sync]()` not yet supported ' - # 'directly (infected) `asyncio` tasks!' - # ) from rte - - raise rte + if actor.is_infected_aio(): + log.exception( + 'Failed to get current `trio`-task?' + ) + raise RuntimeError( + 'An `asyncio` task should not be calling this!?' + ) from rte + else: + task = asyncio.current_task() if debug_func is not None: debug_func = partial(debug_func) @@ -2060,7 +2095,8 @@ async def _pause( f'on behalf of {repl_task} ??\n' ) - DebugStatus.release(cancel_req_task=True) + if not actor.is_infected_aio(): + DebugStatus.release(cancel_req_task=True) # sanity checks for ^ on request/status teardown # assert DebugStatus.repl is None # XXX no more bc bg thread cases? @@ -2113,7 +2149,9 @@ def _set_trace( log.pdb( f'{_pause_msg}\n' f'>(\n' - f'|_ {task} @ {actor.uid}\n' + f'|_{actor.uid}\n' + f' |_{task}\n' # @ {actor.uid}\n' + # f'|_{task}\n' # ^-TODO-^ more compact pformating? # -[ ] make an `Actor.__repr()__` # -[ ] should we use `log.pformat_task_uid()`? @@ -2390,9 +2428,6 @@ def pause_from_sync( actor: tractor.Actor = current_actor( err_on_no_runtime=False, ) - message: str = ( - f'{actor.uid} task called `tractor.pause_from_sync()`\n' - ) if not actor: raise RuntimeError( 'Not inside the `tractor`-runtime?\n' @@ -2400,6 +2435,9 @@ def pause_from_sync( '- `async with tractor.open_nursery()` or,\n' '- `async with tractor.open_root_actor()`\n' ) + message: str = ( + f'{actor.uid} task called `tractor.pause_from_sync()`\n' + ) # TODO: once supported, remove this AND the one # inside `._pause()`! @@ -2409,16 +2447,17 @@ def pause_from_sync( # injection? # -[ ] should `breakpoint()` work and what does it normally # do in `asyncio` ctxs? - if actor.is_infected_aio(): - raise RuntimeError( - '`tractor.pause[_from_sync]()` not yet supported ' - 'for infected `asyncio` mode!' - ) + # if actor.is_infected_aio(): + # raise RuntimeError( + # '`tractor.pause[_from_sync]()` not yet supported ' + # 'for infected `asyncio` mode!' + # ) repl: PdbREPL = mk_pdb() # message += f'-> created local REPL {repl}\n' is_root: bool = is_root_process() + is_aio: bool = actor.is_infected_aio() # TODO: we could also check for a non-`.to_thread` context # using `trio.from_thread.check_cancelled()` (says @@ -2431,8 +2470,11 @@ def pause_from_sync( # when called from a (bg) thread, run an async task in a new # thread which will call `._pause()` manually with special # handling for root-actor caller usage. - if not DebugStatus.is_main_trio_thread(): - + if ( + not DebugStatus.is_main_trio_thread() + and + not is_aio # see below for this usage + ): # TODO: `threading.Lock()` this so we don't get races in # multi-thr cases where they're acquiring/releasing the # REPL and setting request/`Lock` state, etc.. @@ -2440,10 +2482,21 @@ def pause_from_sync( repl_owner = thread # TODO: make root-actor bg thread usage work! - if is_root: + if ( + is_root + # or + # is_aio + ): + if is_root: + message += ( + f'-> called from a root-actor bg {thread}\n' + ) + elif is_aio: + message += ( + f'-> called from a `asyncio`-task bg {thread}\n' + ) message += ( - f'-> called from a root-actor bg {thread}\n' - f'-> scheduling `._pause_from_bg_root_thread()`..\n' + '-> scheduling `._pause_from_bg_root_thread()`..\n' ) # XXX SUBTLE BADNESS XXX that should really change! # don't over-write the `repl` here since when @@ -2462,7 +2515,7 @@ def pause_from_sync( hide_tb=hide_tb, **_pause_kwargs, ), - ) + ), ) DebugStatus.shield_sigint() message += ( @@ -2495,6 +2548,29 @@ def pause_from_sync( DebugStatus.shield_sigint() assert bg_task is not DebugStatus.repl_task + elif is_aio: + greenback: ModuleType = maybe_import_greenback() + repl_owner: Task = asyncio.current_task() + fute: asyncio.Future = run_trio_task_in_future( + partial( + _pause, + debug_func=None, + repl=repl, + hide_tb=hide_tb, + + # XXX to prevent `._pause()` for setting + # `DebugStatus.repl_task` to the gb task! + called_from_sync=True, + called_from_bg_thread=True, + + **_pause_kwargs + ) + ) + # TODO: for async version -> `.pause_from_aio()`? + # bg_task, _ = await fute + bg_task, _ = greenback.await_(fute) + bg_task: asyncio.Task = asyncio.current_task() + else: # we are presumably the `trio.run()` + main thread # raises on not-found by default greenback: ModuleType = maybe_import_greenback() @@ -2509,8 +2585,8 @@ def pause_from_sync( # NOTE XXX seems to need to be set BEFORE the `_pause()` # invoke using gb below? DebugStatus.shield_sigint() - repl_owner: Task = current_task() + message += '-> calling `greenback.await_(_pause(debug_func=None))` from sync caller..\n' try: out = greenback.await_( @@ -2572,6 +2648,10 @@ def pause_from_sync( # -[ ] tried to use `@pdbp.hideframe` decoration but # still doesn't work except BaseException as err: + log.exception( + 'Failed to sync-pause from\n\n' + f'{repl_owner}\n' + ) __tracebackhide__: bool = False raise err diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index c37c0467..8feaf88f 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -562,6 +562,101 @@ class AsyncioRuntimeTranslationError(RuntimeError): ''' +def run_trio_task_in_future( + async_fn, + *args, +) -> asyncio.Future: + ''' + Run an async-func as a `trio` task from an `asyncio.Task` wrapped + in a `asyncio.Future` which is returned to the caller. + + Another astounding feat by the great @oremanj !! + + Bo + + ''' + result_future = asyncio.Future() + cancel_scope = trio.CancelScope() + finished: bool = False + + # monkey-patch the future's `.cancel()` meth to + # allow cancellation relay to `trio`-task. + cancel_message: str|None = None + orig_cancel = result_future.cancel + + def wrapped_cancel( + msg: str|None = None, + ): + nonlocal cancel_message + if finished: + # We're being called back after the task completed + if msg is not None: + return orig_cancel(msg) + elif cancel_message is not None: + return orig_cancel(cancel_message) + else: + return orig_cancel() + + if result_future.done(): + return False + + # Forward cancellation to the Trio task, don't mark + # future as cancelled until it completes + cancel_message = msg + cancel_scope.cancel() + return True + + result_future.cancel = wrapped_cancel + + async def trio_task() -> None: + nonlocal finished + try: + with cancel_scope: + try: + # TODO: type this with new tech in 3.13 + result: Any = await async_fn(*args) + finally: + finished = True + + # Propagate result or cancellation to the Future + if cancel_scope.cancelled_caught: + result_future.cancel() + + elif not result_future.cancelled(): + result_future.set_result(result) + + except BaseException as exc: + # the result future gets all the non-Cancelled + # exceptions. Any Cancelled need to keep propagating + # out of this stack frame in order to reach the cancel + # scope for which they're intended. + cancelled: BaseException|None + rest: BaseException|None + if isinstance(exc, BaseExceptionGroup): + cancelled, rest = exc.split(trio.Cancelled) + + elif isinstance(exc, trio.Cancelled): + cancelled, rest = exc, None + + else: + cancelled, rest = None, exc + + if not result_future.cancelled(): + if rest: + result_future.set_exception(rest) + else: + result_future.cancel() + + if cancelled: + raise cancelled + + trio.lowlevel.spawn_system_task( + trio_task, + name=async_fn, + ) + return result_future + + def run_as_asyncio_guest( trio_main: Callable, # ^-NOTE-^ when spawned with `infected_aio=True` this func is -- 2.34.1 From 46c9ee2551bc6716a5217394b36973990d5e275a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 29 Jul 2024 17:53:52 -0400 Subject: [PATCH 207/305] Drop `asyncio_bp` loglevel setting by default --- examples/debugging/asyncio_bp.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/debugging/asyncio_bp.py b/examples/debugging/asyncio_bp.py index f2fabddd..161435d1 100644 --- a/examples/debugging/asyncio_bp.py +++ b/examples/debugging/asyncio_bp.py @@ -83,15 +83,13 @@ async def main( debug_mode=True, maybe_enable_greenback=True, # loglevel='devx', - # loglevel='runtime', ) as n: - ptl: Portal = await n.start_actor( 'aio_daemon', enable_modules=[__name__], infect_asyncio=True, debug_mode=True, - loglevel='cancel', + # loglevel='cancel', ) async with ptl.open_context( -- 2.34.1 From e898a41e2299cd9e438ed1280dd821c08c2c0eee Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 29 Jul 2024 17:56:00 -0400 Subject: [PATCH 208/305] Use our `._post_mortem` from `open_crash_handler()` Since it seems that `pdbp.xpm()` can sometimes lose the up-stack traceback info/frames? Not sure why but ours seems to work just fine from a `asyncio`-handler in `modden`'s use of `i3ipc` B) Also call `DebugStatus.shield_sigint()` from `pause_from_sync()` in the infected-`asyncio` case to get the same shielding behaviour as in all other usage! --- tractor/devx/_debug.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 0784e594..d38c2239 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -2491,10 +2491,7 @@ def pause_from_sync( message += ( f'-> called from a root-actor bg {thread}\n' ) - elif is_aio: - message += ( - f'-> called from a `asyncio`-task bg {thread}\n' - ) + message += ( '-> scheduling `._pause_from_bg_root_thread()`..\n' ) @@ -2551,6 +2548,7 @@ def pause_from_sync( elif is_aio: greenback: ModuleType = maybe_import_greenback() repl_owner: Task = asyncio.current_task() + DebugStatus.shield_sigint() fute: asyncio.Future = run_trio_task_in_future( partial( _pause, @@ -2566,6 +2564,7 @@ def pause_from_sync( **_pause_kwargs ) ) + # TODO: for async version -> `.pause_from_aio()`? # bg_task, _ = await fute bg_task, _ = greenback.await_(fute) @@ -2700,8 +2699,7 @@ _crash_msg: str = ( def _post_mortem( - # provided and passed by `_pause()` - repl: PdbREPL, + repl: PdbREPL, # normally passed by `_pause()` # XXX all `partial`-ed in by `post_mortem()` below! tb: TracebackType, @@ -3018,7 +3016,17 @@ def open_crash_handler( yield except tuple(catch) as err: if type(err) not in ignore: - pdbp.xpm() + + # use our re-impl-ed version + _post_mortem( + repl=mk_pdb(), + tb=sys.exc_info()[2], + api_frame=inspect.currentframe().f_back, + ) + + # XXX NOTE, `pdbp`'s version seems to lose the up-stack + # tb-info? + # pdbp.xpm() raise -- 2.34.1 From 84dbf5381720865e18611db6d3b096a97b0eb91b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 31 Jul 2024 21:34:57 -0400 Subject: [PATCH 209/305] Hide `[maybe]_open_crash_handler()` frame by default --- tractor/devx/_debug.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index d38c2239..e0a20c0e 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -2989,6 +2989,7 @@ async def maybe_wait_for_debugger( # - [ ] optional runtime plugging? # - [ ] detection for sync vs. async code? # - [ ] specialized REPL entry when in distributed mode? +# -[x] hide tb by def # - [x] allow ignoring kbi Bo @cm def open_crash_handler( @@ -2999,6 +3000,7 @@ def open_crash_handler( ignore: set[BaseException] = { KeyboardInterrupt, }, + tb_hide: bool = True, ): ''' Generic "post mortem" crash handler using `pdbp` REPL debugger. @@ -3011,6 +3013,8 @@ def open_crash_handler( `trio.run()`. ''' + __tracebackhide__: bool = tb_hide + err: BaseException try: yield @@ -3034,6 +3038,7 @@ def open_crash_handler( @cm def maybe_open_crash_handler( pdb: bool = False, + tb_hide: bool = True, ): ''' Same as `open_crash_handler()` but with bool input flag @@ -3042,6 +3047,8 @@ def maybe_open_crash_handler( Normally this is used with CLI endpoints such that if the --pdb flag is passed the pdb REPL is engaed on any crashes B) ''' + __tracebackhide__: bool = tb_hide + rtctx = nullcontext if pdb: rtctx = open_crash_handler -- 2.34.1 From d9662d9b343f721c2cfe07914004401b7a1a6e65 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 22 Aug 2024 17:10:01 -0400 Subject: [PATCH 210/305] More `.pause_from_sync()` in bg-threads "polish" Various `try`/`except` blocks around external APIs that raise when not running inside an `tractor` and/or some async framework (mostly to avoid too-late/benign error tbs on certain classes of actor tree teardown): - for the `log.pdb()` prompts emitted before REPL console entry. - inside `DebugStatus.is_main_trio_thread()`'s call to `sniffio`. - in `_post_mortem()` by catching `NoRuntime` when called from a thread still active after the `.open_root_actor()` has already exited. Also, - create a dedicated `DebugStateError` for raising instead of `assert`s when we have actual debug-request inconsistencies (as seem to be most likely with bg thread usage of `breakpoint()`). - show the `open_crash_handler()` frame on `bdb.BdbQuit` (for now?) --- tractor/devx/_debug.py | 90 +++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 19 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index e0a20c0e..5e3b00d7 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -72,6 +72,10 @@ from tractor.to_asyncio import run_trio_task_in_future from tractor.log import get_logger from tractor._context import Context from tractor import _state +from tractor._exceptions import ( + InternalError, + NoRuntime, +) from tractor._state import ( current_actor, is_root_process, @@ -691,6 +695,14 @@ async def lock_stdio_for_peer( DebugStatus.unshield_sigint() +class DebugStateError(InternalError): + ''' + Something inconsistent or unexpected happend with a sub-actor's + debug mutex request to the root actor. + + ''' + + # TODO: rename to ReplState or somethin? # DebugRequest, make it a singleton instance? class DebugStatus: @@ -860,20 +872,37 @@ class DebugStatus: `trio.to_thread.run_sync()`. ''' + try: + async_lib: str = sniffio.current_async_library() + except sniffio.AsyncLibraryNotFoundError: + async_lib = None + + is_main_thread: bool = trio._util.is_main_thread() + # ^TODO, since this is private, @oremanj says + # we should just copy the impl for now..? + if is_main_thread: + thread_name: str = 'main' + else: + thread_name: str = threading.current_thread().name + is_trio_main = ( - # TODO: since this is private, @oremanj says - # we should just copy the impl for now.. - (is_main_thread := trio._util.is_main_thread()) + is_main_thread and - (async_lib := sniffio.current_async_library()) == 'trio' + (async_lib == 'trio') ) - if ( - not is_trio_main - and is_main_thread - ): - log.warning( + + report: str = f'Running thread: {thread_name!r}\n' + if async_lib: + report += ( f'Current async-lib detected by `sniffio`: {async_lib}\n' ) + else: + report += ( + 'No async-lib detected (by `sniffio`) ??\n' + ) + if not is_trio_main: + log.warning(report) + return is_trio_main # XXX apparently unreliable..see ^ # ( @@ -2615,7 +2644,15 @@ def pause_from_sync( bg_task: Task = current_task() # assert repl is repl - assert bg_task is repl_owner + # assert bg_task is repl_owner + if bg_task is not repl_owner: + raise DebugStateError( + f'The registered bg task for this debug request is NOT its owner ??\n' + f'bg_task: {bg_task}\n' + f'repl_owner: {repl_owner}\n\n' + + f'{DebugStatus.repr()}\n' + ) # NOTE: normally set inside `_enter_repl_sync()` DebugStatus.repl_task: str = repl_owner @@ -2715,17 +2752,28 @@ def _post_mortem( ''' __tracebackhide__: bool = hide_tb - actor: tractor.Actor = current_actor() + try: + actor: tractor.Actor = current_actor() + actor_repr: str = str(actor.uid) + # ^TODO, instead a nice runtime-info + maddr + uid? + # -[ ] impl a `Actor.__repr()__`?? + # |_ : @ + + except NoRuntime: + actor_repr: str = '' + + try: + task_repr: Task = current_task() + except RuntimeError: + task_repr: str = '' # TODO: print the actor supervion tree up to the root # here! Bo log.pdb( f'{_crash_msg}\n' f'x>(\n' - f' |_ {current_task()} @ {actor.uid}\n' + f' |_ {task_repr} @ {actor_repr}\n' - # TODO: make an `Actor.__repr()__` - # f'|_ {current_task()} @ {actor.name}\n' ) # NOTE only replacing this from `pdbp.xpm()` to add the @@ -3022,11 +3070,15 @@ def open_crash_handler( if type(err) not in ignore: # use our re-impl-ed version - _post_mortem( - repl=mk_pdb(), - tb=sys.exc_info()[2], - api_frame=inspect.currentframe().f_back, - ) + try: + _post_mortem( + repl=mk_pdb(), + tb=sys.exc_info()[2], + api_frame=inspect.currentframe().f_back, + ) + except bdb.BdbQuit: + __tracebackhide__: bool = False + raise # XXX NOTE, `pdbp`'s version seems to lose the up-stack # tb-info? -- 2.34.1 From 7443e387b58dececb9eea69a41d59bd14da0687c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 3 Dec 2024 15:26:25 -0500 Subject: [PATCH 211/305] Messy-teardown `DebugStatus` related fixes Mostly fixing edge cases with `asyncio` and/or bg threads where the `.repl_release: trio.Event` needs to be used from the main `trio` thread OW confusing-but-valid teardown tracebacks can show under various races. Also improve, - log reporting for such internal bugs to make them more obvious on console via `log.exception()`. - only restore the SIGINT handler when runtime is (still) active. - reporting when `tractor.pause(shield=True)` should be used and unhiding the internal frames from the tb in that case. - for `pause_from_sync()` some deep fixes.. |_add a `allow_no_runtime: bool = False` flag to allow **not** requiring the actor runtime to be active. |_fix the `greenback` case-branch to only trigger on `not is_trio_thread`. |_add a scope-global `repl_owner: Task|Thread|None = None` to avoid ref errors.. --- tractor/devx/_debug.py | 112 +++++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 26 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 5e3b00d7..83ca5492 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -730,6 +730,9 @@ class DebugStatus: # -[ ] see if we can get our proto oco task-mngr to work for # this? repl_task: Task|None = None + # repl_thread: Thread|None = None + # ^TODO? + repl_release: trio.Event|None = None req_task: Task|None = None @@ -839,11 +842,12 @@ class DebugStatus: if ( not cls.is_main_trio_thread() and - # not _state._runtime_vars.get( - # '_is_infected_aio', - # False, - # ) - not current_actor().is_infected_aio() + not _state._runtime_vars.get( + '_is_infected_aio', + False, + ) + # not current_actor().is_infected_aio() + # ^XXX, since for bg-thr case will always raise.. ): trio.from_thread.run_sync( signal.signal, @@ -928,12 +932,27 @@ class DebugStatus: try: # sometimes the task might already be terminated in # which case this call will raise an RTE? - if repl_release is not None: + # See below for reporting on that.. + if ( + repl_release is not None + and + not repl_release.is_set() + ): if cls.is_main_trio_thread(): repl_release.set() - elif current_actor().is_infected_aio(): - + elif ( + _state._runtime_vars.get( + '_is_infected_aio', + False, + ) + # ^XXX, again bc we need to not except + # but for bg-thread case it will always raise.. + # + # TODO, is there a better api then using + # `err_on_no_runtime=False` in the below? + # current_actor().is_infected_aio() + ): async def _set_repl_release(): repl_release.set() @@ -949,6 +968,15 @@ class DebugStatus: trio.from_thread.run_sync( repl_release.set ) + + except RuntimeError as rte: + log.exception( + f'Failed to release debug-request ??\n\n' + f'{cls.repr()}\n' + ) + # pdbp.set_trace() + raise rte + finally: # if req_ctx := cls.req_ctx: # req_ctx._scope.cancel() @@ -976,9 +1004,10 @@ class DebugStatus: # logging when we don't need to? cls.repl = None - # restore original sigint handler - cls.unshield_sigint() - + # maybe restore original sigint handler + # XXX requires runtime check to avoid crash! + if current_actor(err_on_no_runtime=False): + cls.unshield_sigint() # TODO: use the new `@lowlevel.singleton` for this! @@ -1066,7 +1095,7 @@ class PdbREPL(pdbp.Pdb): # Lock.release(raise_on_thread=False) Lock.release() - # XXX after `Lock.release()` for root local repl usage + # XXX AFTER `Lock.release()` for root local repl usage DebugStatus.release() def set_quit(self): @@ -1672,7 +1701,7 @@ class DebugRequestError(RuntimeError): ''' -_repl_fail_msg: str = ( +_repl_fail_msg: str|None = ( 'Failed to REPl via `_pause()` ' ) @@ -1712,6 +1741,7 @@ async def _pause( ''' __tracebackhide__: bool = hide_tb + pause_err: BaseException|None = None actor: Actor = current_actor() try: task: Task = current_task() @@ -2094,11 +2124,13 @@ async def _pause( # TODO: prolly factor this plus the similar block from # `_enter_repl_sync()` into a common @cm? - except BaseException as pause_err: + except BaseException as _pause_err: + pause_err: BaseException = _pause_err if isinstance(pause_err, bdb.BdbQuit): log.devx( - 'REPL for pdb was quit!\n' + 'REPL for pdb was explicitly quit!\n' ) + _repl_fail_msg = None # when the actor is mid-runtime cancellation the # `Actor._service_n` might get closed before we can spawn @@ -2117,13 +2149,18 @@ async def _pause( ) return - else: - log.exception( - _repl_fail_msg - + - f'on behalf of {repl_task} ??\n' + elif isinstance(pause_err, trio.Cancelled): + _repl_fail_msg = ( + 'You called `tractor.pause()` from an already cancelled scope!\n\n' + 'Consider `await tractor.pause(shield=True)` to make it work B)\n' ) + else: + _repl_fail_msg += f'on behalf of {repl_task} ??\n' + + if _repl_fail_msg: + log.exception(_repl_fail_msg) + if not actor.is_infected_aio(): DebugStatus.release(cancel_req_task=True) @@ -2152,6 +2189,8 @@ async def _pause( DebugStatus.req_err or repl_err + or + pause_err ): __tracebackhide__: bool = False @@ -2435,6 +2474,8 @@ def pause_from_sync( called_from_builtin: bool = False, api_frame: FrameType|None = None, + allow_no_runtime: bool = False, + # proxy to `._pause()`, for ex: # shield: bool = False, # api_frame: FrameType|None = None, @@ -2453,16 +2494,25 @@ def pause_from_sync( ''' __tracebackhide__: bool = hide_tb + repl_owner: Task|Thread|None = None try: actor: tractor.Actor = current_actor( err_on_no_runtime=False, ) - if not actor: - raise RuntimeError( - 'Not inside the `tractor`-runtime?\n' + if ( + not actor + and + not allow_no_runtime + ): + raise NoRuntime( + 'The actor runtime has not been opened?\n\n' '`tractor.pause_from_sync()` is not functional without a wrapping\n' '- `async with tractor.open_nursery()` or,\n' - '- `async with tractor.open_root_actor()`\n' + '- `async with tractor.open_root_actor()`\n\n' + + 'If you are getting this from a builtin `breakpoint()` call\n' + 'it might mean the runtime was started then ' + 'stopped prematurely?\n' ) message: str = ( f'{actor.uid} task called `tractor.pause_from_sync()`\n' @@ -2485,6 +2535,7 @@ def pause_from_sync( repl: PdbREPL = mk_pdb() # message += f'-> created local REPL {repl}\n' + is_trio_thread: bool = DebugStatus.is_main_trio_thread() is_root: bool = is_root_process() is_aio: bool = actor.is_infected_aio() @@ -2500,7 +2551,7 @@ def pause_from_sync( # thread which will call `._pause()` manually with special # handling for root-actor caller usage. if ( - not DebugStatus.is_main_trio_thread() + not is_trio_thread and not is_aio # see below for this usage ): @@ -2574,7 +2625,11 @@ def pause_from_sync( DebugStatus.shield_sigint() assert bg_task is not DebugStatus.repl_task - elif is_aio: + elif ( + not is_trio_thread + and + is_aio + ): greenback: ModuleType = maybe_import_greenback() repl_owner: Task = asyncio.current_task() DebugStatus.shield_sigint() @@ -2758,9 +2813,11 @@ def _post_mortem( # ^TODO, instead a nice runtime-info + maddr + uid? # -[ ] impl a `Actor.__repr()__`?? # |_ : @ + # no_runtime: bool = False except NoRuntime: actor_repr: str = '' + # no_runtime: bool = True try: task_repr: Task = current_task() @@ -2796,6 +2853,8 @@ def _post_mortem( # Since we presume the post-mortem was enaged to a task-ending # error, we MUST release the local REPL request so that not other # local task nor the root remains blocked! + # if not no_runtime: + # DebugStatus.release() DebugStatus.release() @@ -3033,6 +3092,7 @@ async def maybe_wait_for_debugger( # pass return False + # TODO: better naming and what additionals? # - [ ] optional runtime plugging? # - [ ] detection for sync vs. async code? -- 2.34.1 From 36021d1f2b596c46902711c8fb6d5d2b5091d698 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 4 Dec 2024 14:01:38 -0500 Subject: [PATCH 212/305] Rename `n: trio.Nursery` -> `tn` (task nursery) --- examples/debugging/asyncio_bp.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/debugging/asyncio_bp.py b/examples/debugging/asyncio_bp.py index 161435d1..50a0eead 100644 --- a/examples/debugging/asyncio_bp.py +++ b/examples/debugging/asyncio_bp.py @@ -58,7 +58,7 @@ async def trio_ctx( raise_after_bp=not bp_before_started, ) as (first, chan), - trio.open_nursery() as n, + trio.open_nursery() as tn, ): assert first == 'start' @@ -67,7 +67,7 @@ async def trio_ctx( await ctx.started(first) - n.start_soon( + tn.start_soon( to_asyncio.run_task, aio_sleep_forever, ) @@ -83,8 +83,8 @@ async def main( debug_mode=True, maybe_enable_greenback=True, # loglevel='devx', - ) as n: - ptl: Portal = await n.start_actor( + ) as an: + ptl: Portal = await an.start_actor( 'aio_daemon', enable_modules=[__name__], infect_asyncio=True, -- 2.34.1 From 9af6271e99fbe91b6724d36b7acd25007d2dc8f8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 5 Dec 2024 20:55:12 -0500 Subject: [PATCH 213/305] Add `breakpoint()` hook restoration example + test --- .../debugging/restore_builtin_breakpoint.py | 35 ++++++++++-- tests/devx/test_debugger.py | 53 ++++++++++++++++++- 2 files changed, 82 insertions(+), 6 deletions(-) diff --git a/examples/debugging/restore_builtin_breakpoint.py b/examples/debugging/restore_builtin_breakpoint.py index 6e141dfc..89605075 100644 --- a/examples/debugging/restore_builtin_breakpoint.py +++ b/examples/debugging/restore_builtin_breakpoint.py @@ -6,19 +6,46 @@ import tractor async def main() -> None: - async with tractor.open_nursery(debug_mode=True) as an: - assert os.environ['PYTHONBREAKPOINT'] == 'tractor._debug._set_trace' + # intially unset, no entry. + orig_pybp_var: int = os.environ.get('PYTHONBREAKPOINT') + assert orig_pybp_var in {None, "0"} + + async with tractor.open_nursery( + debug_mode=True, + ) as an: + assert an + assert ( + (pybp_var := os.environ['PYTHONBREAKPOINT']) + == + 'tractor.devx._debug._sync_pause_from_builtin' + ) # TODO: an assert that verifies the hook has indeed been, hooked # XD - assert sys.breakpointhook is not tractor._debug._set_trace + assert ( + (pybp_hook := sys.breakpointhook) + is not tractor.devx._debug._set_trace + ) + print( + f'$PYTHONOBREAKPOINT: {pybp_var!r}\n' + f'`sys.breakpointhook`: {pybp_hook!r}\n' + ) breakpoint() + pass # first bp, tractor hook set. - # TODO: an assert that verifies the hook is unhooked.. + # XXX AFTER EXIT (of actor-runtime) verify the hook is unset.. + # + # YES, this is weird but it's how stdlib docs say to do it.. + # https://docs.python.org/3/library/sys.html#sys.breakpointhook + assert os.environ.get('PYTHONBREAKPOINT') is orig_pybp_var assert sys.breakpointhook + + # now ensure a regular builtin pause still works breakpoint() + pass # last bp, stdlib hook restored + if __name__ == '__main__': trio.run(main) diff --git a/tests/devx/test_debugger.py b/tests/devx/test_debugger.py index 2a24bf98..5327fb0b 100644 --- a/tests/devx/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -1229,6 +1229,53 @@ def test_shield_pause( child.expect(EOF) +def test_breakpoint_hook_restored( + spawn, +): + ''' + Ensures our actor runtime sets a custom `breakpoint()` hook + on open then restores the stdlib's default on close. + + The hook state validation is done via `assert`s inside the + invoked script with only `breakpoint()` (not `tractor.pause()`) + calls used. + + ''' + child = spawn('restore_builtin_breakpoint') + + child.expect(PROMPT) + assert_before( + child, + [ + _pause_msg, + " Date: Mon, 9 Dec 2024 15:38:28 -0500 Subject: [PATCH 214/305] Wrap `asyncio_bp.py` ex into test suite Ensuring we can at least use `breakpoint()` from an infected actor's `asyncio.Task` spawned via a `.to_asyncio` API. Also includes a little `tests/devx/` reorging, - start splitting out non-`tractor.pause()` tests into a new `test_pause_from_non_trio.py` for all the `.pause_from_sync()` use in bg-threaded or `asyncio` applications. - factor harness commonalities to the `devx/conftest` (namely the `do_ctlc()` masher). - mv `test_pause_from_sync` to the new non`-trio` mod. NOTE, the `ctlc=True` is still failing for `test_pause_from_asyncio_task` which is a user-happiness bug but not anything fundamentally broken - just need to handle the `asyncio` case in `.devx._debug.sigint_shield()`! --- examples/debugging/asyncio_bp.py | 42 +++- tests/devx/conftest.py | 51 ++++ tests/devx/test_debugger.py | 235 +----------------- tests/devx/test_pause_from_non_trio.py | 329 +++++++++++++++++++++++++ 4 files changed, 418 insertions(+), 239 deletions(-) create mode 100644 tests/devx/test_pause_from_non_trio.py diff --git a/examples/debugging/asyncio_bp.py b/examples/debugging/asyncio_bp.py index 50a0eead..b55b28fd 100644 --- a/examples/debugging/asyncio_bp.py +++ b/examples/debugging/asyncio_bp.py @@ -1,3 +1,8 @@ +''' +Examples of using the builtin `breakpoint()` from an `asyncio.Task` +running in a subactor spawned with `infect_asyncio=True`. + +''' import asyncio import trio @@ -26,15 +31,16 @@ async def bp_then_error( # NOTE: what happens here inside the hook needs some refinement.. # => seems like it's still `._debug._set_trace()` but # we set `Lock.local_task_in_debug = 'sync'`, we probably want - # some further, at least, meta-data about the task/actoq in debug - # in terms of making it clear it's asyncio mucking about. + # some further, at least, meta-data about the task/actor in debug + # in terms of making it clear it's `asyncio` mucking about. breakpoint() + # short checkpoint / delay - await asyncio.sleep(0.5) + await asyncio.sleep(0.5) # asyncio-side if raise_after_bp: - raise ValueError('blah') + raise ValueError('asyncio side error!') # TODO: test case with this so that it gets cancelled? else: @@ -46,7 +52,7 @@ async def bp_then_error( @tractor.context async def trio_ctx( ctx: tractor.Context, - bp_before_started: bool = True, + bp_before_started: bool = False, ): # this will block until the ``asyncio`` task sends a "first" @@ -55,7 +61,7 @@ async def trio_ctx( to_asyncio.open_channel_from( bp_then_error, - raise_after_bp=not bp_before_started, + # raise_after_bp=not bp_before_started, ) as (first, chan), trio.open_nursery() as tn, @@ -63,9 +69,9 @@ async def trio_ctx( assert first == 'start' if bp_before_started: - await tractor.breakpoint() + await tractor.pause() - await ctx.started(first) + await ctx.started(first) # trio-side tn.start_soon( to_asyncio.run_task, @@ -77,6 +83,10 @@ async def trio_ctx( async def main( bps_all_over: bool = True, + # TODO, WHICH OF THESE HAZ BUGZ? + cancel_from_root: bool = False, + err_from_root: bool = False, + ) -> None: async with tractor.open_nursery( @@ -99,12 +109,18 @@ async def main( assert first == 'start' - if bps_all_over: - await tractor.breakpoint() + # pause in parent to ensure no cross-actor + # locking problems exist! + await tractor.pause() + + if cancel_from_root: + await ctx.cancel() + + if err_from_root: + assert 0 + else: + await trio.sleep_forever() - # await trio.sleep_forever() - await ctx.cancel() - assert 0 # TODO: case where we cancel from trio-side while asyncio task # has debugger lock? diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py index 28a14cbb..e1ad2ea3 100644 --- a/tests/devx/conftest.py +++ b/tests/devx/conftest.py @@ -2,6 +2,7 @@ `tractor.devx.*` tooling sub-pkg test space. ''' +import time from typing import ( Callable, ) @@ -11,9 +12,19 @@ from pexpect.exceptions import ( TIMEOUT, ) from pexpect.spawnbase import SpawnBase + from tractor._testing import ( mk_cmd, ) +from tractor.devx._debug import ( + _pause_msg as _pause_msg, + _crash_msg as _crash_msg, + _repl_fail_msg as _repl_fail_msg, + _ctlc_ignore_header as _ctlc_ignore_header, +) +from conftest import ( + _ci_env, +) @pytest.fixture @@ -107,6 +118,9 @@ def expect( raise +PROMPT = r"\(Pdb\+\)" + + def in_prompt_msg( child: SpawnBase, parts: list[str], @@ -166,3 +180,40 @@ def assert_before( err_on_false=True, **kwargs ) + + +def do_ctlc( + child, + count: int = 3, + delay: float = 0.1, + patt: str|None = None, + + # expect repl UX to reprint the prompt after every + # ctrl-c send. + # XXX: no idea but, in CI this never seems to work even on 3.10 so + # needs some further investigation potentially... + expect_prompt: bool = not _ci_env, + +) -> str|None: + + before: str|None = None + + # make sure ctl-c sends don't do anything but repeat output + for _ in range(count): + time.sleep(delay) + child.sendcontrol('c') + + # TODO: figure out why this makes CI fail.. + # if you run this test manually it works just fine.. + if expect_prompt: + time.sleep(delay) + child.expect(PROMPT) + before = str(child.before.decode()) + time.sleep(delay) + + if patt: + # should see the last line on console + assert patt in before + + # return the console content up to the final prompt + return before diff --git a/tests/devx/test_debugger.py b/tests/devx/test_debugger.py index 5327fb0b..254b92a1 100644 --- a/tests/devx/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -21,7 +21,9 @@ from pexpect.exceptions import ( EOF, ) -from tractor.devx._debug import ( +from .conftest import ( + do_ctlc, + PROMPT, _pause_msg, _crash_msg, _repl_fail_msg, @@ -68,9 +70,6 @@ has_nested_actors = pytest.mark.has_nested_actors # ) -PROMPT = r"\(Pdb\+\)" - - @pytest.mark.parametrize( 'user_in_out', [ @@ -121,8 +120,10 @@ def test_root_actor_error( ids=lambda item: f'{item[0]} -> {item[1]}', ) def test_root_actor_bp(spawn, user_in_out): - """Demonstrate breakpoint from in root actor. - """ + ''' + Demonstrate breakpoint from in root actor. + + ''' user_input, expect_err_str = user_in_out child = spawn('root_actor_breakpoint') @@ -144,43 +145,6 @@ def test_root_actor_bp(spawn, user_in_out): assert expect_err_str in str(child.before) -def do_ctlc( - child, - count: int = 3, - delay: float = 0.1, - patt: str|None = None, - - # expect repl UX to reprint the prompt after every - # ctrl-c send. - # XXX: no idea but, in CI this never seems to work even on 3.10 so - # needs some further investigation potentially... - expect_prompt: bool = not _ci_env, - -) -> str|None: - - before: str|None = None - - # make sure ctl-c sends don't do anything but repeat output - for _ in range(count): - time.sleep(delay) - child.sendcontrol('c') - - # TODO: figure out why this makes CI fail.. - # if you run this test manually it works just fine.. - if expect_prompt: - time.sleep(delay) - child.expect(PROMPT) - before = str(child.before.decode()) - time.sleep(delay) - - if patt: - # should see the last line on console - assert patt in before - - # return the console content up to the final prompt - return before - - def test_root_actor_bp_forever( spawn, ctlc: bool, @@ -917,138 +881,6 @@ def test_different_debug_mode_per_actor( ) -def test_pause_from_sync( - spawn, - ctlc: bool -): - ''' - Verify we can use the `pdbp` REPL from sync functions AND from - any thread spawned with `trio.to_thread.run_sync()`. - - `examples/debugging/sync_bp.py` - - ''' - child = spawn('sync_bp') - - # first `sync_pause()` after nurseries open - child.expect(PROMPT) - assert_before( - child, - [ - # pre-prompt line - _pause_msg, - " similar to the `delay` input to `do_ctlc()` below, setting - # this too low can cause the test to fail since the `subactor` - # suffers a race where the root/parent sends an actor-cancel - # prior to the context task hitting its pause point (and thus - # engaging the `sigint_shield()` handler in time); this value - # seems be good enuf? - time.sleep(0.6) - - # one of the bg thread or subactor should have - # `Lock.acquire()`-ed - # (NOT both, which will result in REPL clobbering!) - attach_patts: dict[str, list[str]] = { - 'subactor': [ - "'start_n_sync_pause'", - "('subactor'", - ], - 'inline_root_bg_thread': [ - " similar to the `delay` input to `do_ctlc()` below, setting + # this too low can cause the test to fail since the `subactor` + # suffers a race where the root/parent sends an actor-cancel + # prior to the context task hitting its pause point (and thus + # engaging the `sigint_shield()` handler in time); this value + # seems be good enuf? + time.sleep(0.6) + + # one of the bg thread or subactor should have + # `Lock.acquire()`-ed + # (NOT both, which will result in REPL clobbering!) + attach_patts: dict[str, list[str]] = { + 'subactor': [ + "'start_n_sync_pause'", + "('subactor'", + ], + 'inline_root_bg_thread': [ + " list[str]: + ''' + Receive any of a `list[str]` of patterns provided in + `attach_patts`. + + Used to test racing prompts from multiple actors and/or + tasks using a common root process' `pdbp` REPL. + + ''' + assert attach_patts + + child.expect(PROMPT) + before = str(child.before.decode()) + + for attach_key in attach_patts: + if attach_key in before: + expected_patts: str = attach_patts.pop(attach_key) + assert_before( + child, + expected_patts + ) + break # from for + else: + pytest.fail( + f'No keys found?\n\n' + f'{attach_patts.keys()}\n\n' + f'{before}\n' + ) + + # ensure no other task/threads engaged a REPL + # at the same time as the one that was detected above. + for key, other_patts in attach_patts.copy().items(): + assert not in_prompt_msg( + child, + other_patts, + ) + + if ctlc: + do_ctlc( + child, + patt=prompt, + # NOTE same as comment above + delay=ctlc_delay, + ) + + return expected_patts + # yield child + + +def test_pause_from_asyncio_task( + spawn, + ctlc: bool + # ^TODO, fix for `asyncio`!! +): + ''' + Verify we can use the `pdbp` REPL from an `asyncio.Task` spawned using + APIs in `.to_asyncio`. + + `examples/debugging/asycio_bp.py` + + ''' + child = spawn('asyncio_bp') + + # RACE on whether trio/asyncio task bps first + attach_patts: dict[str, list[str]] = { + + # first pause in guest-mode (aka "infecting") + # `trio.Task`. + 'trio-side': [ + _pause_msg, + " Date: Mon, 9 Dec 2024 16:08:55 -0500 Subject: [PATCH 215/305] Change `tractor.breakpoint()` to new `.pause()` in test suite --- examples/debugging/fast_error_in_root_after_spawn.py | 2 +- examples/debugging/multi_daemon_subactors.py | 2 +- .../multi_nested_subactors_error_up_through_nurseries.py | 2 +- examples/debugging/multi_subactors.py | 2 +- examples/debugging/root_actor_breakpoint.py | 2 +- examples/debugging/root_actor_breakpoint_forever.py | 2 +- examples/debugging/subactor_bp_in_ctx.py | 6 +++--- tests/test_context_stream_semantics.py | 2 +- tests/test_task_broadcasting.py | 2 +- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/examples/debugging/fast_error_in_root_after_spawn.py b/examples/debugging/fast_error_in_root_after_spawn.py index 570cf7ef..86710788 100644 --- a/examples/debugging/fast_error_in_root_after_spawn.py +++ b/examples/debugging/fast_error_in_root_after_spawn.py @@ -1,5 +1,5 @@ ''' -Fast fail test with a context. +Fast fail test with a `Context`. Ensure the partially initialized sub-actor process doesn't cause a hang on error/cancel of the parent diff --git a/examples/debugging/multi_daemon_subactors.py b/examples/debugging/multi_daemon_subactors.py index 4a462623..7844ccef 100644 --- a/examples/debugging/multi_daemon_subactors.py +++ b/examples/debugging/multi_daemon_subactors.py @@ -7,7 +7,7 @@ async def breakpoint_forever(): try: while True: yield 'yo' - await tractor.breakpoint() + await tractor.pause() except BaseException: tractor.log.get_console_log().exception( 'Cancelled while trying to enter pause point!' diff --git a/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py b/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py index 8df52e3b..b63f1945 100644 --- a/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py +++ b/examples/debugging/multi_nested_subactors_error_up_through_nurseries.py @@ -10,7 +10,7 @@ async def name_error(): async def breakpoint_forever(): "Indefinitely re-enter debugger in child actor." while True: - await tractor.breakpoint() + await tractor.pause() # NOTE: if the test never sent 'q'/'quit' commands # on the pdb repl, without this checkpoint line the diff --git a/examples/debugging/multi_subactors.py b/examples/debugging/multi_subactors.py index 22b13ac8..57634cc3 100644 --- a/examples/debugging/multi_subactors.py +++ b/examples/debugging/multi_subactors.py @@ -6,7 +6,7 @@ async def breakpoint_forever(): "Indefinitely re-enter debugger in child actor." while True: await trio.sleep(0.1) - await tractor.breakpoint() + await tractor.pause() async def name_error(): diff --git a/examples/debugging/root_actor_breakpoint.py b/examples/debugging/root_actor_breakpoint.py index 5c858d4c..55b4ca56 100644 --- a/examples/debugging/root_actor_breakpoint.py +++ b/examples/debugging/root_actor_breakpoint.py @@ -10,7 +10,7 @@ async def main(): await trio.sleep(0.1) - await tractor.breakpoint() + await tractor.pause() await trio.sleep(0.1) diff --git a/examples/debugging/root_actor_breakpoint_forever.py b/examples/debugging/root_actor_breakpoint_forever.py index 88a6e0e9..04cd7e7e 100644 --- a/examples/debugging/root_actor_breakpoint_forever.py +++ b/examples/debugging/root_actor_breakpoint_forever.py @@ -11,7 +11,7 @@ async def main( # loglevel='runtime', ): while True: - await tractor.breakpoint() + await tractor.pause() if __name__ == '__main__': diff --git a/examples/debugging/subactor_bp_in_ctx.py b/examples/debugging/subactor_bp_in_ctx.py index a47dbd92..2c5fee8c 100644 --- a/examples/debugging/subactor_bp_in_ctx.py +++ b/examples/debugging/subactor_bp_in_ctx.py @@ -4,9 +4,9 @@ import trio async def gen(): yield 'yo' - await tractor.breakpoint() + await tractor.pause() yield 'yo' - await tractor.breakpoint() + await tractor.pause() @tractor.context @@ -15,7 +15,7 @@ async def just_bp( ) -> None: await ctx.started() - await tractor.breakpoint() + await tractor.pause() # TODO: bps and errors in this call.. async for val in gen(): diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 8edea510..ade275aa 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -955,7 +955,7 @@ async def echo_back_sequence( ) await ctx.started() - # await tractor.breakpoint() + # await tractor.pause() async with ctx.open_stream( msg_buffer_size=msg_buffer_size, diff --git a/tests/test_task_broadcasting.py b/tests/test_task_broadcasting.py index d7a29134..4a2209eb 100644 --- a/tests/test_task_broadcasting.py +++ b/tests/test_task_broadcasting.py @@ -271,7 +271,7 @@ def test_faster_task_to_recv_is_cancelled_by_slower( # the faster subtask was cancelled break - # await tractor.breakpoint() + # await tractor.pause() # await stream.receive() print(f'final value: {value}') -- 2.34.1 From e1575051f06d86463ffb3482a10b83b35b35c37d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 16:56:30 -0500 Subject: [PATCH 216/305] Expose a `_ctlc_ignore_header: str` for use in `sigint_shield()` --- tractor/devx/_debug.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 83ca5492..2f010b15 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -1420,6 +1420,10 @@ def any_connected_locker_child() -> bool: return False +_ctlc_ignore_header: str = ( + 'Ignoring SIGINT while debug REPL in use' +) + def sigint_shield( signum: int, frame: 'frame', # type: ignore # noqa @@ -1501,7 +1505,9 @@ def sigint_shield( # NOTE: don't emit this with `.pdb()` level in # root without a higher level. log.runtime( - f'Ignoring SIGINT while debug REPL in use by child ' + _ctlc_ignore_header + + + f' by child ' f'{uid_in_debug}\n' ) problem = None @@ -1535,7 +1541,9 @@ def sigint_shield( # NOTE: since we emit this msg on ctl-c, we should # also always re-print the prompt the tail block! log.pdb( - 'Ignoring SIGINT while pdb REPL in use by root actor..\n' + _ctlc_ignore_header + + + f' by root actor..\n' f'{DebugStatus.repl_task}\n' f' |_{repl}\n' ) @@ -1596,16 +1604,20 @@ def sigint_shield( repl ): log.pdb( - f'Ignoring SIGINT while local task using debug REPL\n' - f'|_{repl_task}\n' - f' |_{repl}\n' + _ctlc_ignore_header + + + f' by local task\n\n' + f'{repl_task}\n' + f' |_{repl}\n' ) elif req_task: log.debug( - 'Ignoring SIGINT while debug request task is open but either,\n' - '- someone else is already REPL-in and has the `Lock`, or\n' - '- some other local task already is replin?\n' - f'|_{req_task}\n' + _ctlc_ignore_header + + + f' by local request-task and either,\n' + f'- someone else is already REPL-in and has the `Lock`, or\n' + f'- some other local task already is replin?\n\n' + f'{req_task}\n' ) # TODO can we remove this now? -- 2.34.1 From ec6b2e8738d606970f94fd99f1a5929252f02f29 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 16:57:22 -0500 Subject: [PATCH 217/305] Support custom `boxer_header: str` provided by `pformat_boxed_tb()` caller --- tractor/devx/pformat.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tractor/devx/pformat.py b/tractor/devx/pformat.py index 5fe9bc62..d24eaaf4 100644 --- a/tractor/devx/pformat.py +++ b/tractor/devx/pformat.py @@ -53,6 +53,7 @@ def pformat_boxed_tb( tb_box_indent: int|None = None, tb_body_indent: int = 1, + boxer_header: str = '-' ) -> str: ''' @@ -88,9 +89,9 @@ def pformat_boxed_tb( tb_box: str = ( f'|\n' - f' ------ - ------\n' + f' ------ {boxer_header} ------\n' f'{tb_body}' - f' ------ - ------\n' + f' ------ {boxer_header}- ------\n' f'_|\n' ) tb_box_indent: str = ( -- 2.34.1 From 5fcb46bbb9b6604ed196cc1efe3790a8606bac52 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 16:57:57 -0500 Subject: [PATCH 218/305] Set `RemoteActorError.pformat(boxer_header=self.relay_uid)` by def --- tractor/_exceptions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index a0b6ff3f..812664ac 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -609,6 +609,7 @@ class RemoteActorError(Exception): # just after Date: Mon, 9 Dec 2024 17:14:51 -0500 Subject: [PATCH 219/305] Add TODO for a tb frame "filterer" sys.. --- tractor/devx/_frame_stack.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tractor/devx/_frame_stack.py b/tractor/devx/_frame_stack.py index 89a9e849..8e9bf46f 100644 --- a/tractor/devx/_frame_stack.py +++ b/tractor/devx/_frame_stack.py @@ -234,7 +234,7 @@ def find_caller_info( _frame2callerinfo_cache: dict[FrameType, CallerInfo] = {} -# TODO: -[x] move all this into new `.devx._code`! +# TODO: -[x] move all this into new `.devx._frame_stack`! # -[ ] consider rename to _callstack? # -[ ] prolly create a `@runtime_api` dec? # |_ @api_frame seems better? @@ -286,3 +286,18 @@ def api_frame( wrapped._call_infos: dict[FrameType, CallerInfo] = _frame2callerinfo_cache wrapped.__api_func__: bool = True return wrapper(wrapped) + + +# TODO: something like this instead of the adhoc frame-unhiding +# blocks all over the runtime!! XD +# -[ ] ideally we can expect a certain error (set) and if something +# else is raised then all frames below the wrapped one will be +# un-hidden via `__tracebackhide__: bool = False`. +# |_ might need to dynamically mutate the code objs like +# `pdbp.hideframe()` does? +# -[ ] use this as a `@acm` decorator as introed in 3.10? +# @acm +# async def unhide_frame_when_not( +# error_set: set[BaseException], +# ) -> TracebackType: +# ... -- 2.34.1 From 962941c56c2094c99668af88e55b9b1d976dba76 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 17:37:27 -0500 Subject: [PATCH 220/305] Type the inter-loop chans --- tractor/to_asyncio.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 8feaf88f..f2a8570b 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -245,14 +245,14 @@ def _run_asyncio_task( result != orig and aio_err is None and - # in the ``open_channel_from()`` case we don't + # in the `open_channel_from()` case we don't # relay through the "return value". not provide_channels ): to_trio.send_nowait(result) finally: - # if the task was spawned using ``open_channel_from()`` + # if the task was spawned using `open_channel_from()` # then we close the channels on exit. if provide_channels: # only close the sender side which will relay @@ -500,7 +500,7 @@ async def run_task( ''' # simple async func - chan = _run_asyncio_task( + chan: LinkedTaskChannel = _run_asyncio_task( func, qsize=1, **kwargs, @@ -530,7 +530,7 @@ async def open_channel_from( spawned ``asyncio`` task and ``trio``. ''' - chan = _run_asyncio_task( + chan: LinkedTaskChannel = _run_asyncio_task( target, qsize=2**8, provide_channels=True, -- 2.34.1 From c738f8b54056ecbf08c48aac85ef5763cc201ce0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 18:04:32 -0500 Subject: [PATCH 221/305] Change masked `.pause()` line --- tractor/_context.py | 21 +++++++++++++++++---- tractor/trionics/_broadcast.py | 2 +- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 31db2bad..d4cad88e 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1703,15 +1703,28 @@ class Context: # TODO: expose as mod func instead! structfmt = pretty_struct.Struct.pformat if self._in_overrun: - log.warning( - f'Queueing OVERRUN msg on caller task:\n\n' - + report: str = ( f'{flow_body}' - f'{structfmt(msg)}\n' ) + over_q: deque = self._overflow_q self._overflow_q.append(msg) + if len(over_q) == over_q.maxlen: + report = ( + 'FAILED to queue OVERRUN msg, OVERAN the OVERRUN QUEUE !!\n\n' + + report + ) + # log.error(report) + log.debug(report) + + else: + report = ( + 'Queueing OVERRUN msg on caller task:\n\n' + + report + ) + log.debug(report) + # XXX NOTE XXX # overrun is the ONLY case where returning early is fine! return False diff --git a/tractor/trionics/_broadcast.py b/tractor/trionics/_broadcast.py index 977b6828..154b037d 100644 --- a/tractor/trionics/_broadcast.py +++ b/tractor/trionics/_broadcast.py @@ -382,7 +382,7 @@ class BroadcastReceiver(ReceiveChannel): # likely it makes sense to unwind back to the # underlying? # import tractor - # await tractor.breakpoint() + # await tractor.pause() log.warning( f'Only one sub left for {self}?\n' 'We can probably unwind from breceiver?' -- 2.34.1 From 526187d1a08f96ca54e8cc8a4c52923f2001689e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 18:12:22 -0500 Subject: [PATCH 222/305] Add TODO for a runtime-vars passing mechanism --- tractor/_root.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tractor/_root.py b/tractor/_root.py index 51dbe5e6..bcdee3ef 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -95,6 +95,10 @@ async def open_root_actor( hide_tb: bool = True, + # TODO, a way for actors to augment passing derived + # read-only state to sublayers? + # extra_rt_vars: dict|None = None, + ) -> Actor: ''' Runtime init entry point for ``tractor``. -- 2.34.1 From 4a5ab155e26259b4b723fc2f686f1daaf69b4ba2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 18:14:11 -0500 Subject: [PATCH 223/305] Add `notes_to_self/howtorelease.md` reminder doc --- notes_to_self/howtorelease.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 notes_to_self/howtorelease.md diff --git a/notes_to_self/howtorelease.md b/notes_to_self/howtorelease.md new file mode 100644 index 00000000..5f31a6d8 --- /dev/null +++ b/notes_to_self/howtorelease.md @@ -0,0 +1,18 @@ +First generate a built disti: + +``` +python -m pip install --upgrade build +python -m build --sdist --outdir dist/alpha5/ +``` + +Then try a test ``pypi`` upload: + +``` +python -m twine upload --repository testpypi dist/alpha5/* +``` + +The push to `pypi` for realz. + +``` +python -m twine upload --repository testpypi dist/alpha5/* +``` -- 2.34.1 From 9412745aafed4b33285c302b327ec1a21cb859c9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 9 Dec 2024 20:57:56 -0500 Subject: [PATCH 224/305] Spitballing how to expose custom `msgspec` type hooks Such that maybe we can eventually offer a nicer higher-level API which implements much of the boilerplate required by `msgspec` (like type-matched branching to serialization logic) via a type-table interface or something? Not sure if the idea is that useful so leaving it all as TODOs for now obviously. --- tractor/msg/_codec.py | 91 ++++++++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 26 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index c1301bd2..32f690f1 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -41,8 +41,10 @@ import textwrap from typing import ( Any, Callable, + Protocol, Type, TYPE_CHECKING, + TypeVar, Union, ) from types import ModuleType @@ -181,7 +183,11 @@ def mk_dec( dec_hook: Callable|None = None, ) -> MsgDec: + ''' + Create an IPC msg decoder, normally used as the + `PayloadMsg.pld: PayloadT` field decoder inside a `PldRx`. + ''' return MsgDec( _dec=msgpack.Decoder( type=spec, # like `MsgType[Any]` @@ -227,6 +233,13 @@ def pformat_msgspec( join_char: str = '\n', ) -> str: + ''' + Pretty `str` format the `msgspec.msgpack.Decoder.type` attribute + for display in (console) log messages as a nice (maybe multiline) + presentation of all supported `Struct`s (subtypes) available for + typed decoding. + + ''' dec: msgpack.Decoder = getattr(codec, 'dec', codec) return join_char.join( mk_msgspec_table( @@ -630,31 +643,57 @@ def limit_msg_spec( # # import pdbp; pdbp.set_trace() # assert ext_codec.pld_spec == extended_spec # yield ext_codec +# +# ^-TODO-^ is it impossible to make something like this orr!? + +# TODO: make an auto-custom hook generator from a set of input custom +# types? +# -[ ] below is a proto design using a `TypeCodec` idea? +# +# type var for the expected interchange-lib's +# IPC-transport type when not available as a built-in +# serialization output. +WireT = TypeVar('WireT') -# TODO: make something similar to this inside `._codec` such that -# user can just pass a type table of some sort? -# -[ ] we would need to decode all msgs to `pretty_struct.Struct` -# and then call `.to_dict()` on them? -# -[x] we're going to need to re-impl all the stuff changed in the -# runtime port such that it can handle dicts or `Msg`s? -# -# def mk_dict_msg_codec_hooks() -> tuple[Callable, Callable]: -# ''' -# Deliver a `enc_hook()`/`dec_hook()` pair which does -# manual convertion from our above native `Msg` set -# to `dict` equivalent (wire msgs) in order to keep legacy compat -# with the original runtime implementation. -# -# Note: this is is/was primarly used while moving the core -# runtime over to using native `Msg`-struct types wherein we -# start with the send side emitting without loading -# a typed-decoder and then later flipping the switch over to -# load to the native struct types once all runtime usage has -# been adjusted appropriately. -# -# ''' -# return ( -# # enc_to_dict, -# dec_from_dict, -# ) +# TODO: some kinda (decorator) API for built-in subtypes +# that builds this implicitly by inspecting the `mro()`? +class TypeCodec(Protocol): + ''' + A per-custom-type wire-transport serialization translator + description type. + + ''' + src_type: Type + wire_type: WireT + + def encode(obj: Type) -> WireT: + ... + + def decode( + obj_type: Type[WireT], + obj: WireT, + ) -> Type: + ... + + +class MsgpackTypeCodec(TypeCodec): + ... + + +def mk_codec_hooks( + type_codecs: list[TypeCodec], + +) -> tuple[Callable, Callable]: + ''' + Deliver a `enc_hook()`/`dec_hook()` pair which handle + manual convertion from an input `Type` set such that whenever + the `TypeCodec.filter()` predicate matches the + `TypeCodec.decode()` is called on the input native object by + the `dec_hook()` and whenever the + `isiinstance(obj, TypeCodec.type)` matches against an + `enc_hook(obj=obj)` the return value is taken from a + `TypeCodec.encode(obj)` callback. + + ''' + ... -- 2.34.1 From ddf6222eb687e1efde41ed2ab1bae7b1238e4175 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 Dec 2024 13:50:35 -0500 Subject: [PATCH 225/305] Draft a (pretty)`Struct.fields_diff()` For comparing a `msgspec.Struct` against an input `dict` presumably to be used as input for struct instantiation. The main diff with `.__sub__()` is that non-existing fields on either are reported (loudly). --- tractor/msg/pretty_struct.py | 90 ++++++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 20 deletions(-) diff --git a/tractor/msg/pretty_struct.py b/tractor/msg/pretty_struct.py index 15e469e2..91eba8bd 100644 --- a/tractor/msg/pretty_struct.py +++ b/tractor/msg/pretty_struct.py @@ -30,9 +30,9 @@ from msgspec import ( Struct as _Struct, structs, ) -from pprint import ( - saferepr, -) +# from pprint import ( +# saferepr, +# ) from tractor.log import get_logger @@ -75,8 +75,8 @@ class DiffDump(UserList): for k, left, right in self: repstr += ( f'({k},\n' - f'\t{repr(left)},\n' - f'\t{repr(right)},\n' + f' |_{repr(left)},\n' + f' |_{repr(right)},\n' ')\n' ) repstr += ']\n' @@ -144,15 +144,22 @@ def pformat( field_indent=indent + field_indent, ) - else: # the `pprint` recursion-safe format: + else: + val_str: str = repr(v) + + # XXX LOL, below just seems to be f#$%in causing + # recursion errs.. + # + # the `pprint` recursion-safe format: # https://docs.python.org/3.11/library/pprint.html#pprint.saferepr - try: - val_str: str = saferepr(v) - except Exception: - log.exception( - 'Failed to `saferepr({type(struct)})` !?\n' - ) - return _Struct.__repr__(struct) + # try: + # val_str: str = saferepr(v) + # except Exception: + # log.exception( + # 'Failed to `saferepr({type(struct)})` !?\n' + # ) + # raise + # return _Struct.__repr__(struct) # TODO: LOLOL use `textwrap.indent()` instead dawwwwwg! obj_str += (field_ws + f'{k}: {typ_name} = {val_str},\n') @@ -203,12 +210,7 @@ class Struct( return sin_props pformat = pformat - # __repr__ = pformat - # __str__ = __repr__ = pformat - # TODO: use a pprint.PrettyPrinter instance around ONLY rendering - # inside a known tty? - # def __repr__(self) -> str: - # ... + def __repr__(self) -> str: try: return pformat(self) @@ -218,6 +220,13 @@ class Struct( ) return _Struct.__repr__(self) + # __repr__ = pformat + # __str__ = __repr__ = pformat + # TODO: use a pprint.PrettyPrinter instance around ONLY rendering + # inside a known tty? + # def __repr__(self) -> str: + # ... + def copy( self, update: dict | None = None, @@ -267,13 +276,15 @@ class Struct( fi.type(getattr(self, fi.name)), ) + # TODO: make a mod func instead and just point to it here for + # method impl? def __sub__( self, other: Struct, ) -> DiffDump[tuple[str, Any, Any]]: ''' - Compare fields/items key-wise and return a ``DiffDump`` + Compare fields/items key-wise and return a `DiffDump` for easy visual REPL comparison B) ''' @@ -290,3 +301,42 @@ class Struct( )) return diffs + + @classmethod + def fields_diff( + cls, + other: dict|Struct, + + ) -> DiffDump[tuple[str, Any, Any]]: + ''' + Very similar to `PrettyStruct.__sub__()` except accepts an + input `other: dict` (presumably that would normally be called + like `Struct(**other)`) which returns a `DiffDump` of the + fields of the struct and the `dict`'s fields. + + ''' + nullish = object() + consumed: dict = other.copy() + diffs: DiffDump[tuple[str, Any, Any]] = DiffDump() + for fi in structs.fields(cls): + field_name: str = fi.name + # ours: Any = getattr(self, field_name) + theirs: Any = consumed.pop(field_name, nullish) + if theirs is nullish: + diffs.append(( + field_name, + f'{fi.type!r}', + 'NOT-DEFINED in `other: dict`', + )) + + # when there are lingering fields in `other` that this struct + # DOES NOT define we also append those. + if consumed: + for k, v in consumed.items(): + diffs.append(( + k, + f'NOT-DEFINED for `{cls.__name__}`', + f'`other: dict` has value = {v!r}', + )) + + return diffs -- 2.34.1 From a5b8e009fda05678147ee42fdebd47f48a29acfb Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 10 Dec 2024 14:43:39 -0500 Subject: [PATCH 226/305] TOSQUASH: 9002f60 howtorelease.md file --- notes_to_self/howtorelease.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notes_to_self/howtorelease.md b/notes_to_self/howtorelease.md index 5f31a6d8..a1b52d7a 100644 --- a/notes_to_self/howtorelease.md +++ b/notes_to_self/howtorelease.md @@ -1,7 +1,7 @@ First generate a built disti: ``` -python -m pip install --upgrade build +python -m pip install --upgrade build python -m build --sdist --outdir dist/alpha5/ ``` -- 2.34.1 From 4a195eef4c360263989063480fd646c9c06d2819 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 11 Dec 2024 22:23:17 -0500 Subject: [PATCH 227/305] Support and test infected-`asyncio`-mode for root Such that you can use, ```python tractor.to_asyncio.run_as_asyncio_guest( trio_main=_trio_main, ) ``` to boostrap the root actor (and thus main parent process) to embed the actor-rumtime into an `asyncio` loop. Prove it all works with an subactor-free version of the aio echo-server test suite B) --- tests/test_infected_asyncio.py | 112 +++++++++++++++++++++++++-------- tractor/_root.py | 4 ++ tractor/to_asyncio.py | 5 ++ 3 files changed, 96 insertions(+), 25 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index f5fa0aab..b0a11715 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -5,6 +5,7 @@ The hipster way to force SC onto the stdlib's "async": 'infection mode'. import asyncio import builtins from contextlib import ExitStack +from functools import partial import itertools import importlib import os @@ -536,41 +537,40 @@ def test_aio_errors_and_channel_propagates_and_closes(reg_addr): excinfo.value.boxed_type is Exception +async def aio_echo_server( + to_trio: trio.MemorySendChannel, + from_trio: asyncio.Queue, +) -> None: + + to_trio.send_nowait('start') + + while True: + msg = await from_trio.get() + + # echo the msg back + to_trio.send_nowait(msg) + + # if we get the terminate sentinel + # break the echo loop + if msg is None: + print('breaking aio echo loop') + break + + print('exiting asyncio task') + + @tractor.context async def trio_to_aio_echo_server( - ctx: tractor.Context, + ctx: tractor.Context|None, ): - - async def aio_echo_server( - to_trio: trio.MemorySendChannel, - from_trio: asyncio.Queue, - ) -> None: - - to_trio.send_nowait('start') - - while True: - msg = await from_trio.get() - - # echo the msg back - to_trio.send_nowait(msg) - - # if we get the terminate sentinel - # break the echo loop - if msg is None: - print('breaking aio echo loop') - break - - print('exiting asyncio task') - async with to_asyncio.open_channel_from( aio_echo_server, ) as (first, chan): - assert first == 'start' + await ctx.started(first) async with ctx.open_stream() as stream: - async for msg in stream: print(f'asyncio echoing {msg}') await chan.send(msg) @@ -649,6 +649,68 @@ def test_echoserver_detailed_mechanics( trio.run(main) +@pytest.mark.parametrize( + 'raise_error_mid_stream', + [ + False, + Exception, + KeyboardInterrupt, + ], + ids='raise_error={}'.format, +) +def test_infected_root_actor( + raise_error_mid_stream: bool|Exception, + # conftest wide + loglevel: str, + debug_mode: bool, +): + ''' + Verify you can run the `tractor` runtime with `Actor.is_infected_aio() == True` + in the root actor. + + ''' + async def _trio_main(): + + first: str + chan: to_asyncio.LinkedTaskChannel + async with ( + tractor.open_root_actor( + debug_mode=debug_mode, + loglevel=loglevel, + ), + to_asyncio.open_channel_from( + aio_echo_server, + ) as (first, chan), + ): + assert first == 'start' + + for i in range(1000): + await chan.send(i) + out = await chan.receive() + assert out == i + print(f'asyncio echoing {i}') + + if raise_error_mid_stream and i == 500: + raise raise_error_mid_stream + + if out is None: + try: + out = await chan.receive() + except trio.EndOfChannel: + break + else: + raise RuntimeError('aio channel never stopped?') + + if raise_error_mid_stream: + with pytest.raises(raise_error_mid_stream): + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + else: + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + @tractor.context async def manage_file( diff --git a/tractor/_root.py b/tractor/_root.py index bcdee3ef..38ddbe23 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -334,6 +334,10 @@ async def open_root_actor( loglevel=loglevel, enable_modules=enable_modules, ) + # XXX, in case the root actor runtime was actually run from + # `tractor.to_asyncio.run_as_asyncio_guest()` and NOt + # `.trio.run()`. + actor._infected_aio = _state._runtime_vars['_is_infected_aio'] # Start up main task set via core actor-runtime nurseries. try: diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index f2a8570b..24f1ace6 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -36,6 +36,7 @@ import tractor from tractor._exceptions import AsyncioCancelled from tractor._state import ( debug_mode, + _runtime_vars, ) from tractor.devx import _debug from tractor.log import get_logger @@ -767,12 +768,16 @@ def run_as_asyncio_guest( 'Infecting `asyncio`-process with a `trio` guest-run!\n' ) + # TODO, somehow bootstrap this! + _runtime_vars['_is_infected_aio'] = True + trio.lowlevel.start_guest_run( trio_main, run_sync_soon_threadsafe=loop.call_soon_threadsafe, done_callback=trio_done_callback, ) fute_err: BaseException|None = None + try: out: Outcome = await asyncio.shield(trio_done_fute) -- 2.34.1 From 72fc6fce24933628290fbd2ea5f54293ac733553 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 18 Dec 2024 12:30:17 -0500 Subject: [PATCH 228/305] Support passing pre-conf-ed `Logger` Such that we can hook into 3rd-party-libs more easily to monkey them and use our (prettier/hipper) console logging with something like (an example from the client project `modden`), ```python connection_mod = i3ipc.connection tractor_style_i3ipc_logger: logging.LoggingAdapter = tractor.log.get_console_log( _root_name=connection_mod.__name__, logger=i3ipc.connection_mod.logger, level='info', ) # monkey the instance-ref in 3rd-party module connection_mod.logger = our_logger ``` Impl deats, - expose as `get_console_log(logger: logging.Logger)` and add default failover logic. - toss in more typing, also for mod-global instance. --- tractor/log.py | 53 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/tractor/log.py b/tractor/log.py index 47f1f259..74e0321b 100644 --- a/tractor/log.py +++ b/tractor/log.py @@ -258,20 +258,28 @@ class ActorContextInfo(Mapping): def get_logger( - - name: str | None = None, + name: str|None = None, _root_name: str = _proj_name, + logger: Logger|None = None, + + # TODO, using `.config.dictConfig()` api? + # -[ ] SO answer with docs links + # |_https://stackoverflow.com/questions/7507825/where-is-a-complete-example-of-logging-config-dictconfig + # |_https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema + subsys_spec: str|None = None, + ) -> StackLevelAdapter: '''Return the package log or a sub-logger for ``name`` if provided. ''' log: Logger - log = rlog = logging.getLogger(_root_name) + log = rlog = logger or logging.getLogger(_root_name) if ( name - and name != _proj_name + and + name != _proj_name ): # NOTE: for handling for modules that use ``get_logger(__name__)`` @@ -283,7 +291,7 @@ def get_logger( # since in python the {filename} is always this same # module-file. - sub_name: None | str = None + sub_name: None|str = None rname, _, sub_name = name.partition('.') pkgpath, _, modfilename = sub_name.rpartition('.') @@ -306,7 +314,10 @@ def get_logger( # add our actor-task aware adapter which will dynamically look up # the actor and task names at each log emit - logger = StackLevelAdapter(log, ActorContextInfo()) + logger = StackLevelAdapter( + log, + ActorContextInfo(), + ) # additional levels for name, val in CUSTOM_LEVELS.items(): @@ -319,15 +330,25 @@ def get_logger( def get_console_log( - level: str | None = None, + level: str|None = None, + logger: Logger|None = None, **kwargs, -) -> LoggerAdapter: - '''Get the package logger and enable a handler which writes to stderr. - Yeah yeah, i know we can use ``DictConfig``. You do it. +) -> LoggerAdapter: ''' - log = get_logger(**kwargs) # our root logger - logger = log.logger + Get a `tractor`-style logging instance: a `Logger` wrapped in + a `StackLevelAdapter` which injects various concurrency-primitive + (process, thread, task) fields and enables a `StreamHandler` that + writes on stderr using `colorlog` formatting. + + Yeah yeah, i know we can use `logging.config.dictConfig()`. You do it. + + ''' + log = get_logger( + logger=logger, + **kwargs + ) # set a root logger + logger: Logger = log.logger if not level: return log @@ -346,9 +367,13 @@ def get_console_log( None, ) ): + fmt = LOG_FORMAT + # if logger: + # fmt = None + handler = StreamHandler() formatter = colorlog.ColoredFormatter( - LOG_FORMAT, + fmt=fmt, datefmt=DATE_FORMAT, log_colors=STD_PALETTE, secondary_log_colors=BOLD_PALETTE, @@ -365,7 +390,7 @@ def get_loglevel() -> str: # global module logger for tractor itself -log = get_logger('tractor') +log: StackLevelAdapter = get_logger('tractor') def at_least_level( -- 2.34.1 From 11d4c83aeddfb65505e3a9e0426ac246e7dfab3a Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 27 Dec 2024 14:07:50 -0500 Subject: [PATCH 229/305] Accept err-type override in `is_multi_cancelled()` Such that equivalents of `trio.Cancelled` from other runtimes such as `asyncio.CancelledError` and `subprocess.CalledProcessError` (with a `.returncode == -2`) can be gracefully ignored as needed by the caller. For example this is handy if you want to avoid debug-mode REPL entry on an exception-group full of only some subset of exception types since you expect certain tasks to raise such errors after having been cancelled by a request from some parent supervision sys (some "higher up" `trio.CancelScope`, a remote triggered `ContextCancelled` or just from and OS SIGINT). Impl deats, - offer a new `ignore_nested: set[BaseException]` param which by default we add `trio.Cancelled` to when no other types are provided. - use `ExceptionGroup.subgroup(tuple(ignore_nested)` to filter to egs of the "ignored sub-errors set" and return any such match (instead of `True`). - detail a comment on exclusion case. --- tractor/_exceptions.py | 48 +++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 812664ac..89ea21ad 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -1146,19 +1146,51 @@ def unpack_error( def is_multi_cancelled( - exc: BaseException|BaseExceptionGroup -) -> bool: + exc: BaseException|BaseExceptionGroup, + + ignore_nested: set[BaseException] = set(), + +) -> bool|BaseExceptionGroup: ''' - Predicate to determine if a possible ``BaseExceptionGroup`` contains - only ``trio.Cancelled`` sub-exceptions (and is likely the result of - cancelling a collection of subtasks. + Predicate to determine if an `BaseExceptionGroup` only contains + some (maybe nested) set of sub-grouped exceptions (like only + `trio.Cancelled`s which get swallowed silently by default) and is + thus the result of "gracefully cancelling" a collection of + sub-tasks (or other conc primitives) and receiving a "cancelled + ACK" from each after termination. + + Docs: + ---- + - https://docs.python.org/3/library/exceptions.html#exception-groups + - https://docs.python.org/3/library/exceptions.html#BaseExceptionGroup.subgroup ''' + + if ( + not ignore_nested + or + trio.Cancelled in ignore_nested + # XXX always count-in `trio`'s native signal + ): + ignore_nested |= {trio.Cancelled} + if isinstance(exc, BaseExceptionGroup): - return exc.subgroup( - lambda exc: isinstance(exc, trio.Cancelled) - ) is not None + matched_exc: BaseExceptionGroup|None = exc.subgroup( + tuple(ignore_nested), + # TODO, complain about why not allowed XD + # condition=tuple(ignore_nested), + ) + if matched_exc is not None: + return matched_exc + + # NOTE, IFF no excs types match (throughout the error-tree) + # -> return `False`, OW return the matched sub-eg. + # + # IOW, for the inverse of ^ for the purpose of + # maybe-enter-REPL--logic: "only debug when the err-tree contains + # at least one exc-type NOT in `ignore_nested`" ; i.e. the case where + # we fallthrough and return `False` here. return False -- 2.34.1 From 1afef149d4a7edf4f72ce9cd524daa601945773b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 28 Dec 2024 14:07:01 -0500 Subject: [PATCH 230/305] Raise explicitly on missing `greenback` portal When `.pause_from_sync()` is called from an `asyncio.Task` which was never bestowed a portal we want to be mega pedantic about it; indicate that the task was NOT spawned from our `.to_asyncio` API and likely by some out-of-our-control code (normally using `asyncio.ensure_future()/.create_task()`). Though `greenback` already errors on such usage, it's not always clear why no portal exists; explaining the situation of a 3rd-party-bg-spawned-task should avoid dev confusion for most cases. Impl deats, - distinguish between an actor in infected mode versus the actual caller of `.pause_from_sync()` being an `asyncio.Task` with more explicit `asyncio_task` and `is_infected_aio` vars. - ONLY in the case of being both an infected-mode-actor AND detecting that the caller is an `asyncio.Task`, check `greenback.has_portal()` such that when not bestowed we presume the aforementioned 3rd-party-bg-task case above and raise a new explicit RTE with a detailed explanatory message. - add some masked draft code for handling the speical case of a root actor `asyncio.Task` caller which could (in theory) not actually require gb portal use since the `Lock` can be acquired directly without IPC. |_this will likely require factoring of various pause machinery funcs into a `_pause_from_root_task()` to mk the impl sane XD Other, - expose a new `debug_filter: Callable` which can be provided by the caller of `_maybe_enter_pm()` to predicate whether to enter the debugger REPL based on the caught `BaseException|BaseExceptionGroup`; this is handy for customizing the meaning of "graceful cancellations" so as to avoid crash handling on expected egs of more then `trioCancelled`. |_ make the default as it was implemented: `not is_multi_cancelled(err)` - pass-through a new `ignore: set[BaseException]` as `open_crash_handler(ignore_nested=ignore)` to allow for the same silent-cancellation-egs-swallowing as desired from outside the actor runtime. --- tractor/devx/_debug.py | 166 ++++++++++++++++++++++++++++------------- 1 file changed, 113 insertions(+), 53 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 2f010b15..7c178ab1 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -75,6 +75,7 @@ from tractor import _state from tractor._exceptions import ( InternalError, NoRuntime, + is_multi_cancelled, ) from tractor._state import ( current_actor, @@ -1743,7 +1744,7 @@ async def _pause( ] = trio.TASK_STATUS_IGNORED, **debug_func_kwargs, -) -> tuple[PdbREPL, Task]|None: +) -> tuple[Task, PdbREPL]|None: ''' Inner impl for `pause()` to avoid the `trio.CancelScope.__exit__()` stack frame when not shielded (since apparently i can't figure out @@ -1929,7 +1930,7 @@ async def _pause( ) with trio.CancelScope(shield=shield): await trio.lowlevel.checkpoint() - return repl, task + return (repl, task) # elif repl_task: # log.warning( @@ -2530,26 +2531,17 @@ def pause_from_sync( f'{actor.uid} task called `tractor.pause_from_sync()`\n' ) - # TODO: once supported, remove this AND the one - # inside `._pause()`! - # outstanding impl fixes: - # -[ ] need to make `.shield_sigint()` below work here! - # -[ ] how to handle `asyncio`'s new SIGINT-handler - # injection? - # -[ ] should `breakpoint()` work and what does it normally - # do in `asyncio` ctxs? - # if actor.is_infected_aio(): - # raise RuntimeError( - # '`tractor.pause[_from_sync]()` not yet supported ' - # 'for infected `asyncio` mode!' - # ) - repl: PdbREPL = mk_pdb() # message += f'-> created local REPL {repl}\n' is_trio_thread: bool = DebugStatus.is_main_trio_thread() is_root: bool = is_root_process() - is_aio: bool = actor.is_infected_aio() + is_infected_aio: bool = actor.is_infected_aio() + thread: Thread = threading.current_thread() + + asyncio_task: asyncio.Task|None = None + if is_infected_aio: + asyncio_task = asyncio.current_task() # TODO: we could also check for a non-`.to_thread` context # using `trio.from_thread.check_cancelled()` (says @@ -2565,24 +2557,18 @@ def pause_from_sync( if ( not is_trio_thread and - not is_aio # see below for this usage + not asyncio_task ): # TODO: `threading.Lock()` this so we don't get races in # multi-thr cases where they're acquiring/releasing the # REPL and setting request/`Lock` state, etc.. - thread: threading.Thread = threading.current_thread() - repl_owner = thread + repl_owner: Thread = thread # TODO: make root-actor bg thread usage work! - if ( - is_root - # or - # is_aio - ): - if is_root: - message += ( - f'-> called from a root-actor bg {thread}\n' - ) + if is_root: + message += ( + f'-> called from a root-actor bg {thread}\n' + ) message += ( '-> scheduling `._pause_from_bg_root_thread()`..\n' @@ -2637,34 +2623,95 @@ def pause_from_sync( DebugStatus.shield_sigint() assert bg_task is not DebugStatus.repl_task + # TODO: once supported, remove this AND the one + # inside `._pause()`! + # outstanding impl fixes: + # -[ ] need to make `.shield_sigint()` below work here! + # -[ ] how to handle `asyncio`'s new SIGINT-handler + # injection? + # -[ ] should `breakpoint()` work and what does it normally + # do in `asyncio` ctxs? + # if actor.is_infected_aio(): + # raise RuntimeError( + # '`tractor.pause[_from_sync]()` not yet supported ' + # 'for infected `asyncio` mode!' + # ) elif ( not is_trio_thread and - is_aio + is_infected_aio # as in, the special actor-runtime mode + # ^NOTE XXX, that doesn't mean the caller is necessarily + # an `asyncio.Task` just that `trio` has been embedded on + # the `asyncio` event loop! + and + asyncio_task # transitive caller is an actual `asyncio.Task` ): greenback: ModuleType = maybe_import_greenback() - repl_owner: Task = asyncio.current_task() - DebugStatus.shield_sigint() - fute: asyncio.Future = run_trio_task_in_future( - partial( - _pause, - debug_func=None, - repl=repl, - hide_tb=hide_tb, - # XXX to prevent `._pause()` for setting - # `DebugStatus.repl_task` to the gb task! - called_from_sync=True, - called_from_bg_thread=True, + if greenback.has_portal(): + DebugStatus.shield_sigint() + fute: asyncio.Future = run_trio_task_in_future( + partial( + _pause, + debug_func=None, + repl=repl, + hide_tb=hide_tb, - **_pause_kwargs + # XXX to prevent `._pause()` for setting + # `DebugStatus.repl_task` to the gb task! + called_from_sync=True, + called_from_bg_thread=True, + + **_pause_kwargs + ) ) - ) + repl_owner = asyncio_task + bg_task, _ = greenback.await_(fute) + # TODO: ASYNC version -> `.pause_from_aio()`? + # bg_task, _ = await fute - # TODO: for async version -> `.pause_from_aio()`? - # bg_task, _ = await fute - bg_task, _ = greenback.await_(fute) - bg_task: asyncio.Task = asyncio.current_task() + # handle the case where an `asyncio` task has been + # spawned WITHOUT enabling a `greenback` portal.. + # => can often happen in 3rd party libs. + else: + bg_task = repl_owner + + # TODO, ostensibly we can just acquire the + # debug lock directly presuming we're the + # root actor running in infected asyncio + # mode? + # + # TODO, this would be a special case where + # a `_pause_from_root()` would come in very + # handy! + # if is_root: + # import pdbp; pdbp.set_trace() + # log.warning( + # 'Allowing `asyncio` task to acquire debug-lock in root-actor..\n' + # 'This is not fully implemented yet; there may be teardown hangs!\n\n' + # ) + # else: + + # simply unsupported, since there exists no hack (i + # can think of) to workaround this in a subactor + # which needs to lock the root's REPL ow we're sure + # to get prompt stdstreams clobbering.. + cf_repr: str = '' + if api_frame: + caller_frame: FrameType = api_frame.f_back + cf_repr: str = f'caller_frame: {caller_frame!r}\n' + + raise RuntimeError( + f"CAN'T USE `greenback._await()` without a portal !?\n\n" + f'Likely this task was NOT spawned via the `tractor.to_asyncio` API..\n' + f'{asyncio_task}\n' + f'{cf_repr}\n' + + f'Prolly the task was started out-of-band (from some lib?)\n' + f'AND one of the below was never called ??\n' + f'- greenback.ensure_portal()\n' + f'- greenback.bestow_portal()\n' + ) else: # we are presumably the `trio.run()` + main thread # raises on not-found by default @@ -2915,8 +2962,14 @@ async def _maybe_enter_pm( tb: TracebackType|None = None, api_frame: FrameType|None = None, hide_tb: bool = False, + + # only enter debugger REPL when returns `True` + debug_filter: Callable[ + [BaseException|BaseExceptionGroup], + bool, + ] = lambda err: not is_multi_cancelled(err), + ): - from tractor._exceptions import is_multi_cancelled if ( debug_mode() @@ -2933,7 +2986,8 @@ async def _maybe_enter_pm( # Really we just want to mostly avoid catching KBIs here so there # might be a simpler check we can do? - and not is_multi_cancelled(err) + and + debug_filter(err) ): api_frame: FrameType = api_frame or inspect.currentframe() tb: TracebackType = tb or sys.exc_info()[2] @@ -3139,10 +3193,16 @@ def open_crash_handler( try: yield except tuple(catch) as err: - if type(err) not in ignore: - - # use our re-impl-ed version + if ( + type(err) not in ignore + and + not is_multi_cancelled( + err, + ignore_nested=ignore + ) + ): try: + # use our re-impl-ed version _post_mortem( repl=mk_pdb(), tb=sys.exc_info()[2], -- 2.34.1 From cdd0c5384abfaf3d216cdd69247959d2ac5a4891 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 28 Dec 2024 14:34:24 -0500 Subject: [PATCH 231/305] Drop extra nl from boxed error fmt --- tractor/devx/pformat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/devx/pformat.py b/tractor/devx/pformat.py index d24eaaf4..1530ef02 100644 --- a/tractor/devx/pformat.py +++ b/tractor/devx/pformat.py @@ -92,7 +92,7 @@ def pformat_boxed_tb( f' ------ {boxer_header} ------\n' f'{tb_body}' f' ------ {boxer_header}- ------\n' - f'_|\n' + f'_|' ) tb_box_indent: str = ( tb_box_indent -- 2.34.1 From 46f644e748742735e6d9c1a7f7ddb90e96080cb2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 28 Dec 2024 14:35:05 -0500 Subject: [PATCH 232/305] Expose `debug_filter` from `open_root_actor()` also Such that actor-runtime graceful cancel handling can be used throughout any process tree. --- tractor/_root.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tractor/_root.py b/tractor/_root.py index 38ddbe23..e10b02ef 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -95,6 +95,13 @@ async def open_root_actor( hide_tb: bool = True, + # XXX, proxied directly to `.devx._debug._maybe_enter_pm()` + # for REPL-entry logic. + debug_filter: Callable[ + [BaseException|BaseExceptionGroup], + bool, + ] = lambda err: not is_multi_cancelled(err), + # TODO, a way for actors to augment passing derived # read-only state to sublayers? # extra_rt_vars: dict|None = None, @@ -379,6 +386,7 @@ async def open_root_actor( Exception, BaseExceptionGroup, ) as err: + # XXX NOTE XXX see equiv note inside # `._runtime.Actor._stream_handler()` where in the # non-root or root-that-opened-this-mahually case we @@ -387,11 +395,15 @@ async def open_root_actor( entered: bool = await _debug._maybe_enter_pm( err, api_frame=inspect.currentframe(), + debug_filter=debug_filter, ) + if ( not entered and - not is_multi_cancelled(err) + not is_multi_cancelled( + err, + ) ): logger.exception('Root actor crashed\n') -- 2.34.1 From 14fb56329b3add9ee473b0f572e502d6d39d7992 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 31 Dec 2024 18:10:09 -0500 Subject: [PATCH 233/305] Much more limited `asyncio.Task.cancel()` use Since it can not only cause the guest-mode run to abandon but also in some edge cases prevent `trio`-errors from propagating (at least on py3.12-13?) as discovered as part of supporting this mode officially in the *root actor*. As such try to avoid that method as much as possible instead opting to pass the `trio`-side error via the iter-task channel ref. Deats, - add a `LinkedTaskChannel._trio_err: BaseException|None` which gets set whenver the `trio.Task` error is caught; ONLY set `AsyncioCancelled` when the `trio` task was for sure the cause, whether itself cancelled or errored. - always check for this error when exiting the `asyncio` side (even when terminated via a call to `asyncio.Task.cancel()` or during any other `CancelledError` handling such that the `asyncio`-task can expect to handle `AsyncioCancelled` due to the above^^ cases. - never `cs.cancel()` the `trio` side unless that cancel scope has not yet been `.cancel_called` whatsoever; it's a noop anyway. - only raise any exc from `asyncio.Task.result()` when `chan._aio_err` does not already match it since the existence of the pre-existing `task_err` means `asyncio` prolly intends (or has already) raised and interrupted the task elsewhere. Various supporting tweaks, - don't bother maybe-init-ing `greenback` from the actor entrypoint since we already need to (and do) bestow the portals to each `asyncio` task spawned using the `run_task()`/`open_channel_from()` API; further the init-ing should be done already by client code that enables infected mode (even in the root actor). |_we should prolly also codify it from any `run_daemon(infected_aio=True, debug_mode=True)` usage we offer. - pass all the `_`s to `Linked TaskChannel` explicitly in named kwarg style. - better sclang-style log reports throughout, particularly on teardowns. - generally more/better comments and docs around (not well understood) edge cases. - prep to just inline `maybe_raise_aio_side_err()` closure.. --- tractor/to_asyncio.py | 471 +++++++++++++++++++++++++++++------------- 1 file changed, 331 insertions(+), 140 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 24f1ace6..d5f78ca8 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -33,13 +33,19 @@ from typing import ( ) import tractor -from tractor._exceptions import AsyncioCancelled +from tractor._exceptions import ( + AsyncioCancelled, + is_multi_cancelled, +) from tractor._state import ( debug_mode, _runtime_vars, ) from tractor.devx import _debug -from tractor.log import get_logger +from tractor.log import ( + get_logger, + StackLevelAdapter, +) from tractor.trionics._broadcast import ( broadcast_receiver, BroadcastReceiver, @@ -50,7 +56,7 @@ from outcome import ( Outcome, ) -log = get_logger(__name__) +log: StackLevelAdapter = get_logger(__name__) __all__ = [ @@ -70,9 +76,10 @@ class LinkedTaskChannel(trio.abc.Channel): _to_aio: asyncio.Queue _from_aio: trio.MemoryReceiveChannel _to_trio: trio.MemorySendChannel - _trio_cs: trio.CancelScope _aio_task_complete: trio.Event + + _trio_err: BaseException|None = None _trio_exited: bool = False # set after ``asyncio.create_task()`` @@ -84,28 +91,40 @@ class LinkedTaskChannel(trio.abc.Channel): await self._from_aio.aclose() async def receive(self) -> Any: - async with translate_aio_errors( - self, - - # XXX: obviously this will deadlock if an on-going stream is - # being procesed. - # wait_on_aio_task=False, - ): + ''' + Receive a value from the paired `asyncio.Task` with + exception/cancel handling to teardown both sides on any + unexpected error. + ''' + try: # TODO: do we need this to guarantee asyncio code get's # cancelled in the case where the trio side somehow creates # a state where the asyncio cycle-task isn't getting the # cancel request sent by (in theory) the last checkpoint # cycle on the trio side? # await trio.lowlevel.checkpoint() - return await self._from_aio.receive() + except BaseException as err: + async with translate_aio_errors( + self, + + # XXX: obviously this will deadlock if an on-going stream is + # being procesed. + # wait_on_aio_task=False, + ): + raise err async def wait_asyncio_complete(self) -> None: await self._aio_task_complete.wait() - # def cancel_asyncio_task(self) -> None: - # self._aio_task.cancel() + def cancel_asyncio_task( + self, + msg: str = '', + ) -> None: + self._aio_task.cancel( + msg=msg, + ) async def send(self, item: Any) -> None: ''' @@ -155,7 +174,6 @@ class LinkedTaskChannel(trio.abc.Channel): def _run_asyncio_task( - func: Callable, *, qsize: int = 1, @@ -165,8 +183,9 @@ def _run_asyncio_task( ) -> LinkedTaskChannel: ''' - Run an ``asyncio`` async function or generator in a task, return - or stream the result back to the caller `trio.lowleve.Task`. + Run an `asyncio`-compat async function or generator in a task, + return or stream the result back to the caller + `trio.lowleve.Task`. ''' __tracebackhide__: bool = hide_tb @@ -204,23 +223,23 @@ def _run_asyncio_task( aio_err: BaseException|None = None chan = LinkedTaskChannel( - aio_q, # asyncio.Queue - from_aio, # recv chan - to_trio, # send chan - - cancel_scope, - aio_task_complete, + _to_aio=aio_q, # asyncio.Queue + _from_aio=from_aio, # recv chan + _to_trio=to_trio, # send chan + _trio_cs=cancel_scope, + _aio_task_complete=aio_task_complete, ) async def wait_on_coro_final_result( - to_trio: trio.MemorySendChannel, coro: Awaitable, aio_task_complete: trio.Event, ) -> None: ''' - Await ``coro`` and relay result back to ``trio``. + Await `coro` and relay result back to `trio`. + + This can only be run as an `asyncio.Task`! ''' nonlocal aio_err @@ -243,8 +262,10 @@ def _run_asyncio_task( else: if ( - result != orig and - aio_err is None and + result != orig + and + aio_err is None + and # in the `open_channel_from()` case we don't # relay through the "return value". @@ -260,12 +281,21 @@ def _run_asyncio_task( # a ``trio.EndOfChannel`` to the trio (consumer) side. to_trio.close() + # import pdbp; pdbp.set_trace() aio_task_complete.set() - log.runtime(f'`asyncio` task: {task.get_name()} is complete') + # await asyncio.sleep(0.1) + log.info( + f'`asyncio` task terminated\n' + f'x)>\n' + f' |_{task}\n' + ) # start the asyncio task we submitted from trio if not inspect.isawaitable(coro): - raise TypeError(f"No support for invoking {coro}") + raise TypeError( + f'Pass the async-fn NOT a coroutine\n' + f'{coro!r}' + ) task: asyncio.Task = asyncio.create_task( wait_on_coro_final_result( @@ -289,6 +319,10 @@ def _run_asyncio_task( raise_not_found=False, )) ): + log.info( + f'Bestowing `greenback` portal for `asyncio`-task\n' + f'{task}\n' + ) greenback.bestow_portal(task) def cancel_trio(task: asyncio.Task) -> None: @@ -304,11 +338,22 @@ def _run_asyncio_task( # task exceptions try: res: Any = task.result() + log.info( + '`trio` received final result from {task}\n' + f'|_{res}\n' + ) except BaseException as terr: task_err: BaseException = terr + # read again AFTER the `asyncio` side errors in case + # it was cancelled due to an error from `trio` (or + # some other out of band exc). + aio_err: BaseException|None = chan._aio_err + msg: str = ( - 'Infected `asyncio` task {etype_str}\n' + '`trio`-side reports that the `asyncio`-side ' + '{etype_str}\n' + # ^NOTE filled in below ) if isinstance(terr, CancelledError): msg += ( @@ -327,17 +372,18 @@ def _run_asyncio_task( msg.format(etype_str='errored') ) - assert type(terr) is type(aio_err), ( - '`asyncio` task error mismatch?!?' - ) + assert ( + type(terr) is type(aio_err) + ), '`asyncio` task error mismatch?!?' if aio_err is not None: + # import pdbp; pdbp.set_trace() # XXX: uhh is this true? # assert task_err, f'Asyncio task {task.get_name()} discrepancy!?' # NOTE: currently mem chan closure may act as a form - # of error relay (at least in the ``asyncio.CancelledError`` - # case) since we have no way to directly trigger a ``trio`` + # of error relay (at least in the `asyncio.CancelledError` + # case) since we have no way to directly trigger a `trio` # task error without creating a nursery to throw one. # We might want to change this in the future though. from_aio.close() @@ -359,29 +405,25 @@ def _run_asyncio_task( # ) # raise aio_err from task_err - # XXX: if not already, alway cancel the scope - # on a task error in case the trio task is blocking on + # XXX: if not already, alway cancel the scope on a task + # error in case the trio task is blocking on # a checkpoint. - cancel_scope.cancel() - if ( - task_err - and - aio_err is not task_err + not cancel_scope.cancelled_caught + or + not cancel_scope.cancel_called ): - raise aio_err from task_err + # import pdbp; pdbp.set_trace() + cancel_scope.cancel() - # raise any `asyncio` side error. - raise aio_err - - log.info( - '`trio` received final result from {task}\n' - f'|_{res}\n' - ) - # TODO: do we need this? - # if task_err: - # cancel_scope.cancel() - # raise task_err + if task_err: + # XXX raise any `asyncio` side error IFF it doesn't + # match the one we just caught from the task above! + # (that would indicate something weird/very-wrong + # going on?) + if aio_err is not task_err: + # import pdbp; pdbp.set_trace() + raise aio_err from task_err task.add_done_callback(cancel_trio) return chan @@ -389,13 +431,18 @@ def _run_asyncio_task( @acm async def translate_aio_errors( - chan: LinkedTaskChannel, wait_on_aio_task: bool = False, + cancel_aio_task_on_trio_exit: bool = True, ) -> AsyncIterator[None]: ''' - Error handling context around ``asyncio`` task spawns which + An error handling to cross-loop propagation context around + `asyncio.Task` spawns via one of this module's APIs: + + - `open_channel_from()` + - `run_task()` + appropriately translates errors and cancels into ``trio`` land. ''' @@ -403,88 +450,204 @@ async def translate_aio_errors( aio_err: BaseException|None = None - # TODO: make thisi a channel method? - def maybe_raise_aio_err( - err: Exception|None = None - ) -> None: - aio_err = chan._aio_err - if ( - aio_err is not None - and - # not isinstance(aio_err, CancelledError) - type(aio_err) != CancelledError - ): - # always raise from any captured asyncio error - if err: - raise aio_err from err - else: - raise aio_err - - task = chan._aio_task - assert task + aio_task: asyncio.Task = chan._aio_task + assert aio_task + trio_err: BaseException|None = None try: - yield - + yield # back to one of the cross-loop apis except ( trio.Cancelled, - ): - # relay cancel through to called ``asyncio`` task + ) as _trio_err: + trio_err = _trio_err assert chan._aio_task - chan._aio_task.cancel( - msg=f'the `trio` caller task was cancelled: {trio_task.name}' + + # import pdbp; pdbp.set_trace() # lolevel-debug + + # relay cancel through to called ``asyncio`` task + chan._aio_err = AsyncioCancelled( + f'trio`-side cancelled the `asyncio`-side,\n' + f'c)>\n' + f' |_{trio_task}\n\n' + + + f'{trio_err!r}\n' ) - raise + + # XXX NOTE XXX seems like we can get all sorts of unreliable + # behaviour from `asyncio` under various cancellation + # conditions (like SIGINT/kbi) when this is used.. + # SO FOR NOW, try to avoid it at most costs! + # + # aio_task.cancel( + # msg=f'the `trio` parent task was cancelled: {trio_task.name}' + # ) + # raise except ( - # NOTE: see the note in the ``cancel_trio()`` asyncio task + # NOTE: also see note in the `cancel_trio()` asyncio task # termination callback trio.ClosedResourceError, # trio.BrokenResourceError, - ): + + ) as _trio_err: + trio_err = _trio_err aio_err = chan._aio_err + # import pdbp; pdbp.set_trace() + + # XXX if an underlying `asyncio.CancelledError` triggered + # this channel close, raise our (non-`BaseException`) wrapper + # exception (`AsyncioCancelled`) from that source error. if ( - task.cancelled() + # NOTE, not until it terminates? + aio_task.cancelled() and type(aio_err) is CancelledError ): - # if an underlying `asyncio.CancelledError` triggered this - # channel close, raise our (non-``BaseException``) wrapper - # error: ``AsyncioCancelled`` from that source error. raise AsyncioCancelled( - f'Task cancelled\n' - f'|_{task}\n' + f'asyncio`-side cancelled the `trio`-side,\n' + f'c(>\n' + f' |_{aio_task}\n\n' + + f'{trio_err!r}\n' ) from aio_err else: raise - finally: + except BaseException as _trio_err: + trio_err = _trio_err + log.exception( + '`trio`-side task errored?' + ) + + entered: bool = await _debug._maybe_enter_pm( + trio_err, + api_frame=inspect.currentframe(), + ) if ( - # NOTE: always cancel the ``asyncio`` task if we've made it - # this far and it's not done. - not task.done() and aio_err + not entered + and + not is_multi_cancelled(trio_err) + ): + log.exception('actor crashed\n') + + aio_taskc = AsyncioCancelled( + f'`trio`-side task errored!\n' + f'{trio_err}' + ) #from trio_err + + try: + aio_task.set_exception(aio_taskc) + except ( + asyncio.InvalidStateError, + RuntimeError, + # ^XXX, uhh bc apparently we can't use `.set_exception()` + # any more XD .. ?? + ): + wait_on_aio_task = False + + # import pdbp; pdbp.set_trace() + # raise aio_taskc from trio_err + + finally: + # record wtv `trio`-side error transpired + chan._trio_err = trio_err + + # NOTE! by default always cancel the `asyncio` task if + # we've made it this far and it's not done. + # TODO, how to detect if there's an out-of-band error that + # caused the exit? + if ( + cancel_aio_task_on_trio_exit + and + not aio_task.done() + and + aio_err # or the trio side has exited it's surrounding cancel scope # indicating the lifetime of the ``asyncio``-side task # should also be terminated. - or chan._trio_exited - ): - log.runtime( - f'Cancelling `asyncio`-task: {task.get_name()}' + or ( + chan._trio_exited + and + not chan._trio_err # XXX CRITICAL, `asyncio.Task.cancel()` is cucked man.. ) - # assert not aio_err, 'WTF how did asyncio do this?!' - task.cancel() + ): + # pass + msg: str = ( + f'MANUALLY Cancelling `asyncio`-task: {aio_task.get_name()}!\n\n' + f'**THIS CAN SILENTLY SUPPRESS ERRORS FYI\n\n' - # Required to sync with the far end ``asyncio``-task to ensure + f'trio-side exited silently!' + ) + # TODO XXX, figure out the case where calling this makes the + # `test_infected_asyncio.py::test_trio_closes_early_and_channel_exits` + # hang and then don't call it in that case! + # + aio_task.cancel(msg=msg) + log.warning(msg) + # assert not aio_err, 'WTF how did asyncio do this?!' + # import pdbp; pdbp.set_trace() + + # Required to sync with the far end `asyncio`-task to ensure # any error is captured (via monkeypatching the - # ``channel._aio_err``) before calling ``maybe_raise_aio_err()`` + # `channel._aio_err`) before calling ``maybe_raise_aio_err()`` # below! + # + # XXX NOTE XXX the `task.set_exception(aio_taskc)` call above + # MUST NOT EXCEPT or this WILL HANG!! + # + # so if you get a hang maybe step through and figure out why + # it erroed out up there! + # if wait_on_aio_task: + # await chan.wait_asyncio_complete() await chan._aio_task_complete.wait() + log.info( + 'asyncio-task is done and unblocked trio-side!\n' + ) + + # TODO? + # -[ ] make this a channel method, OR + # -[ ] just put back inline below? + # + def maybe_raise_aio_side_err( + trio_err: Exception, + ) -> None: + ''' + Raise any `trio`-side-caused cancellation or legit task + error normally propagated from the caller of either, + - `open_channel_from()` + - `run_task()` + + ''' + aio_err: BaseException|None = chan._aio_err + + # Check if the asyncio-side is the cause of the trio-side + # error. + if ( + aio_err is not None + and + type(aio_err) is not AsyncioCancelled + + # not isinstance(aio_err, CancelledError) + # type(aio_err) is not CancelledError + ): + # always raise from any captured asyncio error + if trio_err: + raise trio_err from aio_err + + raise aio_err + + if trio_err: + raise trio_err # NOTE: if any ``asyncio`` error was caught, raise it here inline # here in the ``trio`` task - maybe_raise_aio_err() + # if trio_err: + maybe_raise_aio_side_err( + trio_err=trio_err + ) async def run_task( @@ -496,8 +659,8 @@ async def run_task( ) -> Any: ''' - Run an `asyncio` async function or generator in a task, return - or stream the result back to `trio`. + Run an `asyncio`-compat async function or generator in a task, + return or stream the result back to `trio`. ''' # simple async func @@ -537,6 +700,7 @@ async def open_channel_from( provide_channels=True, **kwargs, ) + # TODO, tuple form here? async with chan._from_aio: async with translate_aio_errors( chan, @@ -685,18 +849,21 @@ def run_as_asyncio_guest( # Uh, oh. # # :o - - # It looks like your event loop has caught a case of the ``trio``s. - - # :() - - # Don't worry, we've heard you'll barely notice. You might - # hallucinate a few more propagating errors and feel like your - # digestion has slowed but if anything get's too bad your parents - # will know about it. - + # + # looks like your stdlib event loop has caught a case of "the trios" ! + # + # :O + # + # Don't worry, we've heard you'll barely notice. + # # :) - + # + # You might hallucinate a few more propagating errors and feel + # like your digestion has slowed, but if anything get's too bad + # your parents will know about it. + # + # B) + # async def aio_main(trio_main): ''' Main `asyncio.Task` which calls @@ -713,16 +880,20 @@ def run_as_asyncio_guest( '-> built a `trio`-done future\n' ) - # TODO: shoudn't this be done in the guest-run trio task? - # if debug_mode(): - # # XXX make it obvi we know this isn't supported yet! - # log.error( - # 'Attempting to enter unsupported `greenback` init ' - # 'from `asyncio` task..' - # ) - # await _debug.maybe_init_greenback( - # force_reload=True, - # ) + # TODO: is this evern run or needed? + # -[ ] pretty sure it never gets run for root-infected-aio + # since this main task is always the parent of any + # eventual `open_root_actor()` call? + if debug_mode(): + log.error( + 'Attempting to enter non-required `greenback` init ' + 'from `asyncio` task ???' + ) + # XXX make it obvi we know this isn't supported yet! + assert 0 + # await _debug.maybe_init_greenback( + # force_reload=True, + # ) def trio_done_callback(main_outcome): log.runtime( @@ -732,6 +903,7 @@ def run_as_asyncio_guest( ) if isinstance(main_outcome, Error): + # import pdbp; pdbp.set_trace() error: BaseException = main_outcome.error # show an dedicated `asyncio`-side tb from the error @@ -751,7 +923,7 @@ def run_as_asyncio_guest( trio_done_fute.set_result(main_outcome) log.info( - f'`trio` guest-run finished with outcome\n' + f'`trio` guest-run finished with,\n' f')>\n' f'|_{trio_done_fute}\n' ) @@ -777,9 +949,20 @@ def run_as_asyncio_guest( done_callback=trio_done_callback, ) fute_err: BaseException|None = None - try: out: Outcome = await asyncio.shield(trio_done_fute) + # ^TODO still don't really understand why the `.shield()` + # is required ... ?? + # https://docs.python.org/3/library/asyncio-task.html#asyncio.shield + # ^ seems as though in combo with the try/except here + # we're BOLDLY INGORING cancel of the trio fute? + # + # I guess it makes sense bc we don't want `asyncio` to + # cancel trio just because they can't handle SIGINT + # sanely? XD .. kk + + # XXX, sin-shield causes guest-run abandons on SIGINT.. + # out: Outcome = await trio_done_fute # NOTE will raise (via `Error.unwrap()`) from any # exception packed into the guest-run's `main_outcome`. @@ -802,27 +985,32 @@ def run_as_asyncio_guest( fute_err = _fute_err err_message: str = ( 'main `asyncio` task ' + 'was cancelled!\n' ) - if isinstance(fute_err, asyncio.CancelledError): - err_message += 'was cancelled!\n' - else: - err_message += f'errored with {out.error!r}\n' + # TODO, handle possible edge cases with + # `open_root_actor()` closing before this is run! + # actor: tractor.Actor = tractor.current_actor() + log.exception( err_message + 'Cancelling `trio`-side `tractor`-runtime..\n' - f'c)>\n' + f'c(>\n' f' |_{actor}.cancel_soon()\n' ) - # XXX WARNING XXX the next LOCs are super important, since - # without them, we can get guest-run abandonment cases - # where `asyncio` will not schedule or wait on the `trio` - # guest-run task before final shutdown! This is - # particularly true if the `trio` side has tasks doing - # shielded work when a SIGINT condition occurs. + # XXX WARNING XXX the next LOCs are super important! + # + # SINCE without them, we can get guest-run ABANDONMENT + # cases where `asyncio` will not schedule or wait on the + # guest-run `trio.Task` nor invoke its registered + # `trio_done_callback()` before final shutdown! + # + # This is particularly true if the `trio` side has tasks + # in shielded sections when an OC-cancel (SIGINT) + # condition occurs! # # We now have the # `test_infected_asyncio.test_sigint_closes_lifetime_stack()` @@ -886,7 +1074,10 @@ def run_as_asyncio_guest( try: return trio_done_fute.result() - except asyncio.exceptions.InvalidStateError as state_err: + except ( + asyncio.InvalidStateError, + # asyncio.CancelledError, + )as state_err: # XXX be super dupere noisy about abandonment issues! aio_task: asyncio.Task = asyncio.current_task() -- 2.34.1 From 32e760284f0a9c97ff14683c64032487e34cca86 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 2 Jan 2025 15:35:36 -0500 Subject: [PATCH 234/305] Hm, `asyncio.Task._fut_waiter.set_exception()`? Since we can't use it to `Task.set_exception()` (since that task method never seems to work.. XD) and setting the private/internal always seems to do the desired raising in the task? I realize it's an internal `asyncio` runtime field but i'd rather take the risk of it breaking then having to rely on our own equivalent hack.. Also, it seems like the case where the task's associated (and internal) future-waiter field is null, we won't run into the (same?) prior hanging issues (maybe since there's nothing for `asyncio` internals to use to wait XD ??) when `Task.cancel()` is used..?? Main deats, - add and `Future.set_exception()` a new signal-exception `class TrioTaskExited(AsyncioCancelled):` whenever the trio-task exits gracefully and the asyncio-side task is still doing blocking work (of some sort) which *seem to* be predicated by a check that `._fut_waiter is not None`. - always call `asyncio.Queue.shutdown()` for the same^ as well as whenever we decide to call `Task.cancel()`; in that case the shutdown relays correctly? Some further refinements, - only warn about `Task.cancel()` usage when actually used ;) - more local scope vars setting in the exit phase of `translate_aio_errors()`. - also in ^ use explicit caught-exc var names for each error-type. --- tractor/to_asyncio.py | 130 +++++++++++++++++++++++++++++++----------- 1 file changed, 97 insertions(+), 33 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index d5f78ca8..3f8d20d3 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -342,20 +342,29 @@ def _run_asyncio_task( '`trio` received final result from {task}\n' f'|_{res}\n' ) - except BaseException as terr: - task_err: BaseException = terr + except BaseException as _aio_err: + task_err: BaseException = _aio_err # read again AFTER the `asyncio` side errors in case # it was cancelled due to an error from `trio` (or # some other out of band exc). aio_err: BaseException|None = chan._aio_err + # always true right? + assert ( + type(_aio_err) is type(aio_err) + ), ( + f'`asyncio`-side task errors mismatch?!?\n\n' + f'caught: {_aio_err}\n' + f'chan._aio_err: {aio_err}\n' + ) + msg: str = ( '`trio`-side reports that the `asyncio`-side ' '{etype_str}\n' # ^NOTE filled in below ) - if isinstance(terr, CancelledError): + if isinstance(_aio_err, CancelledError): msg += ( f'c)>\n' f' |_{task}\n' @@ -372,9 +381,6 @@ def _run_asyncio_task( msg.format(etype_str='errored') ) - assert ( - type(terr) is type(aio_err) - ), '`asyncio` task error mismatch?!?' if aio_err is not None: # import pdbp; pdbp.set_trace() @@ -394,7 +400,7 @@ def _run_asyncio_task( # aio_err.with_traceback(aio_err.__traceback__) # TODO: show when cancellation originated - # from each side more pedantically? + # from each side more pedantically in log-msg? # elif ( # type(aio_err) is CancelledError # and # trio was the cause? @@ -429,6 +435,19 @@ def _run_asyncio_task( return chan +class TrioTaskExited(AsyncioCancelled): + ''' + The `trio`-side task exited without explicitly cancelling the + `asyncio.Task` peer. + + This is very similar to how `trio.ClosedResource` acts as + a "clean shutdown" signal to the consumer side of a mem-chan, + + https://trio.readthedocs.io/en/stable/reference-core.html#clean-shutdown-with-channels + + ''' + + @acm async def translate_aio_errors( chan: LinkedTaskChannel, @@ -455,10 +474,11 @@ async def translate_aio_errors( trio_err: BaseException|None = None try: yield # back to one of the cross-loop apis - except ( - trio.Cancelled, - ) as _trio_err: - trio_err = _trio_err + except trio.Cancelled as taskc: + trio_err = taskc + + # should NEVER be the case that `trio` is cancel-handling + # BEFORE the other side's task-ref was set!? assert chan._aio_task # import pdbp; pdbp.set_trace() # lolevel-debug @@ -483,14 +503,13 @@ async def translate_aio_errors( # ) # raise + # NOTE ALSO SEE the matching note in the `cancel_trio()` asyncio + # task-done-callback. except ( - # NOTE: also see note in the `cancel_trio()` asyncio task - # termination callback trio.ClosedResourceError, # trio.BrokenResourceError, - - ) as _trio_err: - trio_err = _trio_err + ) as cre: + trio_err = cre aio_err = chan._aio_err # import pdbp; pdbp.set_trace() @@ -498,10 +517,21 @@ async def translate_aio_errors( # this channel close, raise our (non-`BaseException`) wrapper # exception (`AsyncioCancelled`) from that source error. if ( - # NOTE, not until it terminates? - aio_task.cancelled() + # aio-side is cancelled? + aio_task.cancelled() # not set until it terminates?? and type(aio_err) is CancelledError + + # TODO, if we want suppression of the + # silent-exit-by-`trio` case? + # -[ ] the parent task can also just catch it though? + # -[ ] OR, offer a `signal_aio_side_on_exit=True` ?? + # + # or + # aio_err is None + # and + # chan._trio_exited + ): raise AsyncioCancelled( f'asyncio`-side cancelled the `trio`-side,\n' @@ -511,6 +541,7 @@ async def translate_aio_errors( f'{trio_err!r}\n' ) from aio_err + # maybe the chan-closure is due to something else? else: raise @@ -552,6 +583,7 @@ async def translate_aio_errors( finally: # record wtv `trio`-side error transpired chan._trio_err = trio_err + ya_trio_exited: bool = chan._trio_exited # NOTE! by default always cancel the `asyncio` task if # we've made it this far and it's not done. @@ -568,26 +600,56 @@ async def translate_aio_errors( # indicating the lifetime of the ``asyncio``-side task # should also be terminated. or ( - chan._trio_exited + ya_trio_exited and not chan._trio_err # XXX CRITICAL, `asyncio.Task.cancel()` is cucked man.. ) ): - # pass - msg: str = ( - f'MANUALLY Cancelling `asyncio`-task: {aio_task.get_name()}!\n\n' - f'**THIS CAN SILENTLY SUPPRESS ERRORS FYI\n\n' - - f'trio-side exited silently!' + report: str = ( + 'trio-side exited silently!' ) - # TODO XXX, figure out the case where calling this makes the - # `test_infected_asyncio.py::test_trio_closes_early_and_channel_exits` - # hang and then don't call it in that case! - # - aio_task.cancel(msg=msg) - log.warning(msg) - # assert not aio_err, 'WTF how did asyncio do this?!' - # import pdbp; pdbp.set_trace() + assert not aio_err, 'WTF how did asyncio do this?!' + + # if the `trio.Task` already exited the `open_channel_from()` + # block we ensure the asyncio-side gets signalled via an + # explicit exception and its `Queue` is shutdown. + if ya_trio_exited: + chan._to_aio.shutdown() + + # pump the other side's task? needed? + await trio.lowlevel.checkpoint() + + if ( + not chan._trio_err + and + (fut := aio_task._fut_waiter) + ): + fut.set_exception( + TrioTaskExited( + f'The peer `asyncio` task is still blocking/running?\n' + f'>>\n' + f'|_{aio_task!r}\n' + ) + ) + else: + # from tractor._state import is_root_process + # if is_root_process(): + # breakpoint() + # import pdbp; pdbp.set_trace() + + aio_taskc_warn: str = ( + f'\n' + f'MANUALLY Cancelling `asyncio`-task: {aio_task.get_name()}!\n\n' + f'**THIS CAN SILENTLY SUPPRESS ERRORS FYI\n\n' + ) + report += aio_taskc_warn + # TODO XXX, figure out the case where calling this makes the + # `test_infected_asyncio.py::test_trio_closes_early_and_channel_exits` + # hang and then don't call it in that case! + # + aio_task.cancel(msg=aio_taskc_warn) + + log.warning(report) # Required to sync with the far end `asyncio`-task to ensure # any error is captured (via monkeypatching the @@ -1077,6 +1139,8 @@ def run_as_asyncio_guest( except ( asyncio.InvalidStateError, # asyncio.CancelledError, + # ^^XXX `.shield()` call above prevents this?? + )as state_err: # XXX be super dupere noisy about abandonment issues! -- 2.34.1 From 72035a20d7bc15fa6d39cc929045cfd0e59949a7 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 Jan 2025 08:59:30 -0500 Subject: [PATCH 235/305] Add an inter-leaved-task error test Trying to replicate cases where errors are raised in both `trio` and `asyncio` tasks independently (at least in `.to_asyncio` API terms) with a new `test_trio_prestarted_task_bubbles` that generates 3 cases inside a `@acm` calls stack composing a `trio.Nursery` with a `to_asyncio.open_channel_from()` call where a set of `trio` tasks are started in a loop using `.start()` with various exc raising sequences, - the aio task raising *before* the last `trio` task spawns. - the aio task raising just after the last trio task spawns, but before it starts. - after the last trio task `.start()` call returns control to the parent - but (for now) did not error. TODO, still more cases to discover as i'm still fighting a `modden` bug of this sort atm.. Other, - tweak some other tests to have timeouts since some recent hangs were found.. - started mucking with py3.13 and thus adjustments for strict egs in some tests; full patchset to test suite likely coming soon! --- tests/test_infected_asyncio.py | 316 +++++++++++++++++++++++++++------ 1 file changed, 259 insertions(+), 57 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index b0a11715..5d88920a 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -109,7 +109,9 @@ async def asyncio_actor( except BaseException as err: if expect_err: - assert isinstance(err, error_type) + assert isinstance(err, error_type), ( + f'{type(err)} is not {error_type}?' + ) raise @@ -181,8 +183,8 @@ def test_trio_cancels_aio(reg_addr): with trio.move_on_after(1): # cancel the nursery shortly after boot - async with tractor.open_nursery() as n: - await n.run_in_actor( + async with tractor.open_nursery() as tn: + await tn.run_in_actor( asyncio_actor, target='aio_sleep_forever', expect_err='trio.Cancelled', @@ -202,22 +204,33 @@ async def trio_ctx( # this will block until the ``asyncio`` task sends a "first" # message. with trio.fail_after(2): - async with ( - trio.open_nursery() as n, + try: + async with ( + trio.open_nursery( + # TODO, for new `trio` / py3.13 + # strict_exception_groups=False, + ) as tn, + tractor.to_asyncio.open_channel_from( + sleep_and_err, + ) as (first, chan), + ): - tractor.to_asyncio.open_channel_from( - sleep_and_err, - ) as (first, chan), - ): + assert first == 'start' - assert first == 'start' + # spawn another asyncio task for the cuck of it. + tn.start_soon( + tractor.to_asyncio.run_task, + aio_sleep_forever, + ) + await trio.sleep_forever() - # spawn another asyncio task for the cuck of it. - n.start_soon( - tractor.to_asyncio.run_task, - aio_sleep_forever, - ) - await trio.sleep_forever() + # TODO, factor this into a `trionics.callapse()`? + except* BaseException as beg: + # await tractor.pause(shield=True) + if len(excs := beg.exceptions) == 1: + raise excs[0] + else: + raise @pytest.mark.parametrize( @@ -236,7 +249,6 @@ def test_context_spawns_aio_task_that_errors( ''' async def main(): - with trio.fail_after(2): async with tractor.open_nursery() as n: p = await n.start_actor( @@ -308,7 +320,9 @@ async def aio_cancel(): await aio_sleep_forever() -def test_aio_cancelled_from_aio_causes_trio_cancelled(reg_addr): +def test_aio_cancelled_from_aio_causes_trio_cancelled( + reg_addr: tuple, +): ''' When the `asyncio.Task` cancels itself the `trio` side cshould also cancel and teardown and relay the cancellation cross-process @@ -405,6 +419,7 @@ async def stream_from_aio( sequence=seq, expect_cancel=raise_err or exit_early, fail_early=aio_raise_err, + ) as (first, chan): assert first is True @@ -423,10 +438,15 @@ async def stream_from_aio( if raise_err: raise Exception elif exit_early: + print('`consume()` breaking early!\n') break + print('returning from `consume()`..\n') + + # run 2 tasks each pulling from + # the inter-task-channel with the 2nd + # using a fan-out `BroadcastReceiver`. if fan_out: - # start second task that get's the same stream value set. async with ( # NOTE: this has to come first to avoid @@ -436,11 +456,19 @@ async def stream_from_aio( trio.open_nursery() as n, ): + # start 2nd task that get's broadcast the same + # value set. n.start_soon(consume, br) await consume(chan) else: await consume(chan) + except BaseException as err: + import logging + log = logging.getLogger() + log.exception('aio-subactor errored!\n') + raise err + finally: if ( @@ -461,7 +489,8 @@ async def stream_from_aio( assert not fan_out assert pulled == expect[:51] - print('trio guest mode task completed!') + print('trio guest-mode task completed!') + assert chan._aio_task.done() @pytest.mark.parametrize( @@ -501,19 +530,37 @@ def test_trio_error_cancels_intertask_chan(reg_addr): excinfo.value.boxed_type is Exception -def test_trio_closes_early_and_channel_exits(reg_addr): +def test_trio_closes_early_and_channel_exits( + reg_addr: tuple[str, int], +): + ''' + Check that if the `trio`-task "exits early" on `async for`ing the + inter-task-channel (via a `break`) we exit silently from the + `open_channel_from()` block and get a final `Return[None]` msg. + + ''' async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( - stream_from_aio, - exit_early=True, - infect_asyncio=True, - ) - # should raise RAE diectly - await portal.result() + with trio.fail_after(2): + async with tractor.open_nursery( + # debug_mode=True, + # enable_stack_on_sig=True, + ) as n: + portal = await n.run_in_actor( + stream_from_aio, + exit_early=True, + infect_asyncio=True, + ) + # should raise RAE diectly + print('waiting on final infected subactor result..') + res: None = await portal.wait_for_result() + assert res is None + print('infected subactor returned result: {res!r}\n') # should be a quiet exit on a simple channel exit - trio.run(main) + trio.run( + main, + # strict_exception_groups=False, + ) def test_aio_errors_and_channel_propagates_and_closes(reg_addr): @@ -660,6 +707,7 @@ def test_echoserver_detailed_mechanics( ) def test_infected_root_actor( raise_error_mid_stream: bool|Exception, + # conftest wide loglevel: str, debug_mode: bool, @@ -670,36 +718,38 @@ def test_infected_root_actor( ''' async def _trio_main(): + with trio.fail_after(2): + first: str + chan: to_asyncio.LinkedTaskChannel + async with ( + tractor.open_root_actor( + debug_mode=debug_mode, + loglevel=loglevel, + ), + to_asyncio.open_channel_from( + aio_echo_server, + ) as (first, chan), + ): + assert first == 'start' - first: str - chan: to_asyncio.LinkedTaskChannel - async with ( - tractor.open_root_actor( - debug_mode=debug_mode, - loglevel=loglevel, - ), - to_asyncio.open_channel_from( - aio_echo_server, - ) as (first, chan), - ): - assert first == 'start' + for i in range(1000): + await chan.send(i) + out = await chan.receive() + assert out == i + print(f'asyncio echoing {i}') - for i in range(1000): - await chan.send(i) - out = await chan.receive() - assert out == i - print(f'asyncio echoing {i}') + if raise_error_mid_stream and i == 500: + raise raise_error_mid_stream - if raise_error_mid_stream and i == 500: - raise raise_error_mid_stream - - if out is None: - try: - out = await chan.receive() - except trio.EndOfChannel: - break - else: - raise RuntimeError('aio channel never stopped?') + if out is None: + try: + out = await chan.receive() + except trio.EndOfChannel: + break + else: + raise RuntimeError( + 'aio channel never stopped?' + ) if raise_error_mid_stream: with pytest.raises(raise_error_mid_stream): @@ -947,6 +997,158 @@ def test_sigint_closes_lifetime_stack( trio.run(main) +async def sync_and_err( + # just signature placeholders for compat with + # ``to_asyncio.open_channel_from()`` + to_trio: trio.MemorySendChannel, + from_trio: asyncio.Queue, + ev: asyncio.Event, + +): + if to_trio: + to_trio.send_nowait('start') + + await ev.wait() + raise RuntimeError('asyncio-side') + + +@pytest.mark.parametrize( + 'aio_err_trigger', + [ + 'before_start_point', + 'after_trio_task_starts', + 'after_start_point', + ], + ids='aio_err_triggered={}'.format +) +def test_trio_prestarted_task_bubbles( + aio_err_trigger: str, + + # conftest wide + loglevel: str, + debug_mode: bool, +): + + async def pre_started_err( + raise_err: bool = False, + pre_sleep: float|None = None, + aio_trigger: asyncio.Event|None = None, + task_status=trio.TASK_STATUS_IGNORED, + ): + ''' + Maybe pre-started error then sleep. + + ''' + if pre_sleep is not None: + print(f'Sleeping from trio for {pre_sleep!r}s !') + await trio.sleep(pre_sleep) + + # signal aio-task to raise JUST AFTER this task + # starts but has not yet `.started()` + if aio_trigger: + print('Signalling aio-task to raise from `trio`!!') + aio_trigger.set() + + if raise_err: + print('Raising from trio!') + raise TypeError('trio-side') + + task_status.started() + await trio.sleep_forever() + + async def _trio_main(): + # with trio.fail_after(2): + with trio.fail_after(999): + first: str + chan: to_asyncio.LinkedTaskChannel + aio_ev = asyncio.Event() + + async with ( + tractor.open_root_actor( + debug_mode=False, + loglevel=loglevel, + ), + + # where we'll start a sub-task that errors BEFORE + # calling `.started()` such that the error should + # bubble before the guest run terminates! + trio.open_nursery() as tn, + + # THEN start an infect task which should error just + # after the trio-side's task does. + to_asyncio.open_channel_from( + partial( + sync_and_err, + ev=aio_ev, + ) + ) as (first, chan), + ): + + for i in range(5): + pre_sleep: float|None = None + raise_err: bool = False + last_iter: bool = (i == 4) + + if last_iter: + raise_err: bool = True + + # trigger aio task to error on next loop + # tick/checkpoint + if aio_err_trigger == 'before_start_point': + aio_ev.set() + + pre_sleep: float = 0 + + await tn.start( + pre_started_err, + raise_err, + pre_sleep, + (aio_ev if ( + aio_err_trigger == 'after_trio_task_starts' + and + last_iter + ) else None + ), + ) + + if ( + aio_err_trigger == 'after_start_point' + and + last_iter + ): + aio_ev.set() + + with pytest.raises( + expected_exception=ExceptionGroup, + ) as excinfo: + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + + eg = excinfo.value + rte_eg, rest_eg = eg.split(RuntimeError) + + # ensure the trio-task's error bubbled despite the aio-side + # having (maybe) errored first. + if aio_err_trigger in ( + 'after_trio_task_starts', + 'after_start_point', + ): + assert len(errs := rest_eg.exceptions) == 1 + typerr = errs[0] + assert ( + type(typerr) is TypeError + and + 'trio-side' in typerr.args + ) + + # when aio errors BEFORE (last) trio task is scheduled, we should + # never see anythinb but the aio-side. + else: + assert len(rtes := rte_eg.exceptions) == 1 + assert 'asyncio-side' in rtes[0].args[0] + + # TODO: debug_mode tests once we get support for `asyncio`! # # -[ ] need tests to wrap both scripts: -- 2.34.1 From a60837550e7bd32fc0156daf4b95dab6c0cc6646 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 10 Jan 2025 12:42:23 -0500 Subject: [PATCH 236/305] Yield a boxed-maybe-error from `open_crash_handler()` Along the lines of something like `pytest.raises()` where the handled exception can be inspected from the `pdbp` REPL using its `.value` field B) This is super handy in particular for understanding `BaseException[Group]`s without manually adding surrounding handler code to assign the `except[*] Exception as exc_var:` particularly when trying to understand multi-cancelled eg trees. --- tractor/devx/_debug.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 7c178ab1..04df000f 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -317,6 +317,7 @@ class Lock: we_released: bool = False ctx_in_debug: Context|None = cls.ctx_in_debug repl_task: Task|Thread|None = DebugStatus.repl_task + message: str = '' try: if not DebugStatus.is_main_trio_thread(): @@ -444,7 +445,10 @@ class Lock: f'|_{repl_task}\n' ) - log.devx(message) + if message: + log.devx(message) + else: + import pdbp; pdbp.set_trace() return we_released @@ -3168,7 +3172,7 @@ async def maybe_wait_for_debugger( @cm def open_crash_handler( catch: set[BaseException] = { - Exception, + # Exception, BaseException, }, ignore: set[BaseException] = { @@ -3189,10 +3193,20 @@ def open_crash_handler( ''' __tracebackhide__: bool = tb_hide + class BoxedMaybeException(Struct): + value: BaseException|None = None + + # TODO, yield a `outcome.Error`-like boxed type? + # -[~] use `outcome.Value/Error` X-> frozen! + # -[x] write our own..? + # -[ ] consider just wtv is used by `pytest.raises()`? + # + boxed_maybe_exc = BoxedMaybeException() err: BaseException try: - yield + yield boxed_maybe_exc except tuple(catch) as err: + boxed_maybe_exc.value = err if ( type(err) not in ignore and @@ -3210,13 +3224,13 @@ def open_crash_handler( ) except bdb.BdbQuit: __tracebackhide__: bool = False - raise + raise err # XXX NOTE, `pdbp`'s version seems to lose the up-stack # tb-info? # pdbp.xpm() - raise + raise err @cm -- 2.34.1 From 2bd4cc9727dfd3da28c5c65762ee5155d2fbfc2d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 10 Jan 2025 15:46:00 -0500 Subject: [PATCH 237/305] Add a "raise-from-`finally:`" example test Since i wasted 2 days just to find an example of this inside an `@acm`, figured I better reproduce for the purposes of maybe implementing a warning sys (inside our wip proto `open_taskman()`) when a nursery detects a single `Cancelled` in an eg where the `.__context__` is set to some non-cancel error (which likely means a cancel-causing source exception was suppressed by accident). Left in a buncha commented code using `maybe_open_nursery()` which i thought might be part of the issue but didn't end up being required; will likely remove on a follow up refinement. --- tests/test_trioisms.py | 61 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/test_trioisms.py b/tests/test_trioisms.py index 27dc6c34..be29965e 100644 --- a/tests/test_trioisms.py +++ b/tests/test_trioisms.py @@ -3,6 +3,10 @@ Reminders for oddities in `trio` that we need to stay aware of and/or want to see changed. ''' +from contextlib import ( + asynccontextmanager as acm, +) + import pytest import trio from trio import TaskStatus @@ -80,3 +84,60 @@ def test_stashed_child_nursery(use_start_soon): with pytest.raises(NameError): trio.run(main) + + +# @pytest.mark.parametrize( +# 'open_tn_outside_acm', +# [True, False] +# # ids='aio_err_triggered={}'.format +# ) +@pytest.mark.parametrize( + 'canc_from_finally', + [True, False] + # ids='aio_err_triggered={}'.format +) +def test_acm_embedded_nursery_propagates_enter_err( + canc_from_finally: bool, + # open_tn_outside_acm: bool, +): + # from tractor.trionics import maybe_open_nursery + + # async def canc_then_checkpoint(tn): + # tn.cancel_scope.cancel() + # await trio.lowlevel.checkpoint() + + @acm + async def wraps_tn_that_always_cancels( + # maybe_tn: trio.Nursery|None = None + ): + # async with maybe_open_nursery(maybe_tn) as tn: + async with trio.open_nursery() as tn: + try: + yield tn + finally: + if canc_from_finally: + # await canc_then_checkpoint(tn) + tn.cancel_scope.cancel() + await trio.lowlevel.checkpoint() + + async def _main(): + # open_nursery = ( + # trio.open_nursery if open_tn_outside_acm + # else nullcontext + # ) + + async with ( + # open_nursery() as tn, + # wraps_tn_that_always_cancels(maybe_tn=tn) as tn + wraps_tn_that_always_cancels() as tn + ): + assert not tn.cancel_scope.cancel_called + assert 0 + + with pytest.raises(ExceptionGroup) as excinfo: + trio.run(_main) + + eg = excinfo.value + assert_eg, rest_eg = eg.split(AssertionError) + + assert len(assert_eg.exceptions) == 1 -- 2.34.1 From 1075ea3687bef7a45576401a7bb521734dffc1f3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 10 Jan 2025 17:29:11 -0500 Subject: [PATCH 238/305] Impl a proto "unmasker" `@acm` alongside our test Such that the suite verifies the wip `maybe_raise_from_masking_exc()` will raise from a `trio.Cancelled.__context__` since I can't think of any reason a `Cancelled` should ever be raised in-place of a non-`Cancelled` XD Not sure what should be raised instead (or maybe just a `log.warning()` emitted?) but this starts a draft for refinement at the least. Use the new `@pytest.mark.parametrize` explicit tuple-of-params form with an `pytest.param + `.mark.xfail()` for the default behaviour case. --- tests/test_trioisms.py | 115 ++++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 30 deletions(-) diff --git a/tests/test_trioisms.py b/tests/test_trioisms.py index be29965e..fad99f11 100644 --- a/tests/test_trioisms.py +++ b/tests/test_trioisms.py @@ -86,58 +86,113 @@ def test_stashed_child_nursery(use_start_soon): trio.run(main) -# @pytest.mark.parametrize( -# 'open_tn_outside_acm', -# [True, False] -# # ids='aio_err_triggered={}'.format -# ) @pytest.mark.parametrize( - 'canc_from_finally', - [True, False] - # ids='aio_err_triggered={}'.format + ('unmask_from_canc', 'canc_from_finally'), + [ + (True, False), + (True, True), + pytest.param(False, True, + marks=pytest.mark.xfail(reason="never raises!") + ), + ], + # TODO, ask ronny how to impl this .. XD + # ids='unmask_from_canc={0}, canc_from_finally={1}',#.format, ) def test_acm_embedded_nursery_propagates_enter_err( canc_from_finally: bool, - # open_tn_outside_acm: bool, + unmask_from_canc: bool, ): - # from tractor.trionics import maybe_open_nursery + ''' + Demo how a masking `trio.Cancelled` could be handled by unmasking from the + `.__context__` field when a user (by accident) re-raises from a `finally:`. - # async def canc_then_checkpoint(tn): - # tn.cancel_scope.cancel() - # await trio.lowlevel.checkpoint() + ''' + import tractor @acm - async def wraps_tn_that_always_cancels( - # maybe_tn: trio.Nursery|None = None + async def maybe_raise_from_masking_exc( + tn: trio.Nursery, + unmask_from: BaseException|None = trio.Cancelled + + # TODO, maybe offer a collection? + # unmask_from: set[BaseException] = { + # trio.Cancelled, + # }, ): - # async with maybe_open_nursery(maybe_tn) as tn: - async with trio.open_nursery() as tn: + if not unmask_from: + yield + return + + try: + yield + except* unmask_from as be_eg: + + # TODO, if we offer `unmask_from: set` + # for masker_exc_type in unmask_from: + + matches, rest = be_eg.split(unmask_from) + if not matches: + raise + + for exc_match in be_eg.exceptions: + if ( + (exc_ctx := exc_match.__context__) + and + type(exc_ctx) not in { + # trio.Cancelled, # always by default? + unmask_from, + } + ): + exc_ctx.add_note( + f'\n' + f'WARNING: the above error was masked by a {unmask_from!r} !?!\n' + f'Are you always cancelling? Say from a `finally:` ?\n\n' + + f'{tn!r}' + ) + raise exc_ctx from exc_match + + + @acm + async def wraps_tn_that_always_cancels(): + async with ( + trio.open_nursery() as tn, + maybe_raise_from_masking_exc( + tn=tn, + unmask_from=( + trio.Cancelled + if unmask_from_canc + else None + ), + ) + ): try: yield tn finally: if canc_from_finally: - # await canc_then_checkpoint(tn) tn.cancel_scope.cancel() await trio.lowlevel.checkpoint() async def _main(): - # open_nursery = ( - # trio.open_nursery if open_tn_outside_acm - # else nullcontext - # ) + with tractor.devx.open_crash_handler() as bxerr: + assert not bxerr.value - async with ( - # open_nursery() as tn, - # wraps_tn_that_always_cancels(maybe_tn=tn) as tn - wraps_tn_that_always_cancels() as tn - ): - assert not tn.cancel_scope.cancel_called - assert 0 + async with ( + wraps_tn_that_always_cancels() as tn, + ): + assert not tn.cancel_scope.cancel_called + assert 0 + + assert ( + (err := bxerr.value) + and + type(err) is AssertionError + ) with pytest.raises(ExceptionGroup) as excinfo: trio.run(_main) - eg = excinfo.value + eg: ExceptionGroup = excinfo.value assert_eg, rest_eg = eg.split(AssertionError) assert len(assert_eg.exceptions) == 1 -- 2.34.1 From f26d4870008b24e49cf58e1414ff94a5689cfb53 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 10 Jan 2025 17:57:54 -0500 Subject: [PATCH 239/305] Add a `tests/test_root_infect_asyncio` Might as well break apart the specific test set since there are some (minor) subtleties and the orig test mod is already getting pretty big XD Includes both the new "independent"-event-loops test as well as the std usage base case suite. --- tests/test_infected_asyncio.py | 222 +-------------------------- tests/test_root_infect_asyncio.py | 244 ++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 220 deletions(-) create mode 100644 tests/test_root_infect_asyncio.py diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 5d88920a..d462f59d 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -5,7 +5,7 @@ The hipster way to force SC onto the stdlib's "async": 'infection mode'. import asyncio import builtins from contextlib import ExitStack -from functools import partial +# from functools import partial import itertools import importlib import os @@ -224,7 +224,7 @@ async def trio_ctx( ) await trio.sleep_forever() - # TODO, factor this into a `trionics.callapse()`? + # TODO, factor this into a `trionics.collapse()`? except* BaseException as beg: # await tractor.pause(shield=True) if len(excs := beg.exceptions) == 1: @@ -696,72 +696,6 @@ def test_echoserver_detailed_mechanics( trio.run(main) -@pytest.mark.parametrize( - 'raise_error_mid_stream', - [ - False, - Exception, - KeyboardInterrupt, - ], - ids='raise_error={}'.format, -) -def test_infected_root_actor( - raise_error_mid_stream: bool|Exception, - - # conftest wide - loglevel: str, - debug_mode: bool, -): - ''' - Verify you can run the `tractor` runtime with `Actor.is_infected_aio() == True` - in the root actor. - - ''' - async def _trio_main(): - with trio.fail_after(2): - first: str - chan: to_asyncio.LinkedTaskChannel - async with ( - tractor.open_root_actor( - debug_mode=debug_mode, - loglevel=loglevel, - ), - to_asyncio.open_channel_from( - aio_echo_server, - ) as (first, chan), - ): - assert first == 'start' - - for i in range(1000): - await chan.send(i) - out = await chan.receive() - assert out == i - print(f'asyncio echoing {i}') - - if raise_error_mid_stream and i == 500: - raise raise_error_mid_stream - - if out is None: - try: - out = await chan.receive() - except trio.EndOfChannel: - break - else: - raise RuntimeError( - 'aio channel never stopped?' - ) - - if raise_error_mid_stream: - with pytest.raises(raise_error_mid_stream): - tractor.to_asyncio.run_as_asyncio_guest( - trio_main=_trio_main, - ) - else: - tractor.to_asyncio.run_as_asyncio_guest( - trio_main=_trio_main, - ) - - @tractor.context async def manage_file( ctx: tractor.Context, @@ -997,158 +931,6 @@ def test_sigint_closes_lifetime_stack( trio.run(main) -async def sync_and_err( - # just signature placeholders for compat with - # ``to_asyncio.open_channel_from()`` - to_trio: trio.MemorySendChannel, - from_trio: asyncio.Queue, - ev: asyncio.Event, - -): - if to_trio: - to_trio.send_nowait('start') - - await ev.wait() - raise RuntimeError('asyncio-side') - - -@pytest.mark.parametrize( - 'aio_err_trigger', - [ - 'before_start_point', - 'after_trio_task_starts', - 'after_start_point', - ], - ids='aio_err_triggered={}'.format -) -def test_trio_prestarted_task_bubbles( - aio_err_trigger: str, - - # conftest wide - loglevel: str, - debug_mode: bool, -): - - async def pre_started_err( - raise_err: bool = False, - pre_sleep: float|None = None, - aio_trigger: asyncio.Event|None = None, - task_status=trio.TASK_STATUS_IGNORED, - ): - ''' - Maybe pre-started error then sleep. - - ''' - if pre_sleep is not None: - print(f'Sleeping from trio for {pre_sleep!r}s !') - await trio.sleep(pre_sleep) - - # signal aio-task to raise JUST AFTER this task - # starts but has not yet `.started()` - if aio_trigger: - print('Signalling aio-task to raise from `trio`!!') - aio_trigger.set() - - if raise_err: - print('Raising from trio!') - raise TypeError('trio-side') - - task_status.started() - await trio.sleep_forever() - - async def _trio_main(): - # with trio.fail_after(2): - with trio.fail_after(999): - first: str - chan: to_asyncio.LinkedTaskChannel - aio_ev = asyncio.Event() - - async with ( - tractor.open_root_actor( - debug_mode=False, - loglevel=loglevel, - ), - - # where we'll start a sub-task that errors BEFORE - # calling `.started()` such that the error should - # bubble before the guest run terminates! - trio.open_nursery() as tn, - - # THEN start an infect task which should error just - # after the trio-side's task does. - to_asyncio.open_channel_from( - partial( - sync_and_err, - ev=aio_ev, - ) - ) as (first, chan), - ): - - for i in range(5): - pre_sleep: float|None = None - raise_err: bool = False - last_iter: bool = (i == 4) - - if last_iter: - raise_err: bool = True - - # trigger aio task to error on next loop - # tick/checkpoint - if aio_err_trigger == 'before_start_point': - aio_ev.set() - - pre_sleep: float = 0 - - await tn.start( - pre_started_err, - raise_err, - pre_sleep, - (aio_ev if ( - aio_err_trigger == 'after_trio_task_starts' - and - last_iter - ) else None - ), - ) - - if ( - aio_err_trigger == 'after_start_point' - and - last_iter - ): - aio_ev.set() - - with pytest.raises( - expected_exception=ExceptionGroup, - ) as excinfo: - tractor.to_asyncio.run_as_asyncio_guest( - trio_main=_trio_main, - ) - - eg = excinfo.value - rte_eg, rest_eg = eg.split(RuntimeError) - - # ensure the trio-task's error bubbled despite the aio-side - # having (maybe) errored first. - if aio_err_trigger in ( - 'after_trio_task_starts', - 'after_start_point', - ): - assert len(errs := rest_eg.exceptions) == 1 - typerr = errs[0] - assert ( - type(typerr) is TypeError - and - 'trio-side' in typerr.args - ) - - # when aio errors BEFORE (last) trio task is scheduled, we should - # never see anythinb but the aio-side. - else: - assert len(rtes := rte_eg.exceptions) == 1 - assert 'asyncio-side' in rtes[0].args[0] - - # TODO: debug_mode tests once we get support for `asyncio`! # # -[ ] need tests to wrap both scripts: diff --git a/tests/test_root_infect_asyncio.py b/tests/test_root_infect_asyncio.py new file mode 100644 index 00000000..331b6311 --- /dev/null +++ b/tests/test_root_infect_asyncio.py @@ -0,0 +1,244 @@ +''' +Special attention cases for using "infect `asyncio`" mode from a root +actor; i.e. not using a std `trio.run()` bootstrap. + +''' +import asyncio +from functools import partial + +import pytest +import trio +import tractor +from tractor import ( + to_asyncio, +) +from tests.test_infected_asyncio import ( + aio_echo_server, +) + + +@pytest.mark.parametrize( + 'raise_error_mid_stream', + [ + False, + Exception, + KeyboardInterrupt, + ], + ids='raise_error={}'.format, +) +def test_infected_root_actor( + raise_error_mid_stream: bool|Exception, + + # conftest wide + loglevel: str, + debug_mode: bool, +): + ''' + Verify you can run the `tractor` runtime with `Actor.is_infected_aio() == True` + in the root actor. + + ''' + async def _trio_main(): + with trio.fail_after(2): + first: str + chan: to_asyncio.LinkedTaskChannel + async with ( + tractor.open_root_actor( + debug_mode=debug_mode, + loglevel=loglevel, + ), + to_asyncio.open_channel_from( + aio_echo_server, + ) as (first, chan), + ): + assert first == 'start' + + for i in range(1000): + await chan.send(i) + out = await chan.receive() + assert out == i + print(f'asyncio echoing {i}') + + if raise_error_mid_stream and i == 500: + raise raise_error_mid_stream + + if out is None: + try: + out = await chan.receive() + except trio.EndOfChannel: + break + else: + raise RuntimeError( + 'aio channel never stopped?' + ) + + if raise_error_mid_stream: + with pytest.raises(raise_error_mid_stream): + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + else: + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + + + +async def sync_and_err( + # just signature placeholders for compat with + # ``to_asyncio.open_channel_from()`` + to_trio: trio.MemorySendChannel, + from_trio: asyncio.Queue, + ev: asyncio.Event, + +): + if to_trio: + to_trio.send_nowait('start') + + await ev.wait() + raise RuntimeError('asyncio-side') + + +@pytest.mark.parametrize( + 'aio_err_trigger', + [ + 'before_start_point', + 'after_trio_task_starts', + 'after_start_point', + ], + ids='aio_err_triggered={}'.format +) +def test_trio_prestarted_task_bubbles( + aio_err_trigger: str, + + # conftest wide + loglevel: str, + debug_mode: bool, +): + async def pre_started_err( + raise_err: bool = False, + pre_sleep: float|None = None, + aio_trigger: asyncio.Event|None = None, + task_status=trio.TASK_STATUS_IGNORED, + ): + ''' + Maybe pre-started error then sleep. + + ''' + if pre_sleep is not None: + print(f'Sleeping from trio for {pre_sleep!r}s !') + await trio.sleep(pre_sleep) + + # signal aio-task to raise JUST AFTER this task + # starts but has not yet `.started()` + if aio_trigger: + print('Signalling aio-task to raise from `trio`!!') + aio_trigger.set() + + if raise_err: + print('Raising from trio!') + raise TypeError('trio-side') + + task_status.started() + await trio.sleep_forever() + + async def _trio_main(): + # with trio.fail_after(2): + with trio.fail_after(999): + first: str + chan: to_asyncio.LinkedTaskChannel + aio_ev = asyncio.Event() + + async with ( + tractor.open_root_actor( + debug_mode=False, + loglevel=loglevel, + ), + ): + # TODO, tests for this with 3.13 egs? + # from tractor.devx import open_crash_handler + # with open_crash_handler(): + async with ( + # where we'll start a sub-task that errors BEFORE + # calling `.started()` such that the error should + # bubble before the guest run terminates! + trio.open_nursery() as tn, + + # THEN start an infect task which should error just + # after the trio-side's task does. + to_asyncio.open_channel_from( + partial( + sync_and_err, + ev=aio_ev, + ) + ) as (first, chan), + ): + + for i in range(5): + pre_sleep: float|None = None + last_iter: bool = (i == 4) + + # TODO, missing cases? + # -[ ] error as well on + # 'after_start_point' case as well for + # another case? + raise_err: bool = False + + if last_iter: + raise_err: bool = True + + # trigger aio task to error on next loop + # tick/checkpoint + if aio_err_trigger == 'before_start_point': + aio_ev.set() + + pre_sleep: float = 0 + + await tn.start( + pre_started_err, + raise_err, + pre_sleep, + (aio_ev if ( + aio_err_trigger == 'after_trio_task_starts' + and + last_iter + ) else None + ), + ) + + if ( + aio_err_trigger == 'after_start_point' + and + last_iter + ): + aio_ev.set() + + with pytest.raises( + expected_exception=ExceptionGroup, + ) as excinfo: + tractor.to_asyncio.run_as_asyncio_guest( + trio_main=_trio_main, + ) + + eg = excinfo.value + rte_eg, rest_eg = eg.split(RuntimeError) + + # ensure the trio-task's error bubbled despite the aio-side + # having (maybe) errored first. + if aio_err_trigger in ( + 'after_trio_task_starts', + 'after_start_point', + ): + assert len(errs := rest_eg.exceptions) == 1 + typerr = errs[0] + assert ( + type(typerr) is TypeError + and + 'trio-side' in typerr.args + ) + + # when aio errors BEFORE (last) trio task is scheduled, we should + # never see anythinb but the aio-side. + else: + assert len(rtes := rte_eg.exceptions) == 1 + assert 'asyncio-side' in rtes[0].args[0] -- 2.34.1 From 1ff79f86b71b21f0c9547af1f71096b8d692b801 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 21 Feb 2025 18:39:18 -0500 Subject: [PATCH 240/305] Raise "independent" task errors in an eg The (rare) condition is heavily detailed in new comments in the `cancel_trio()` callback but, more or less the idea here is to be extra pedantic in raising an `Exceptiongroup` of errors from each task (both `asyncio` and `trio`) whenever the 2 tasks raise "independently" - in the sense that it's not obviously one side's task causing an error (or cancellation) in the other. In this case we set the error for each side on the `LinkedTaskChannel` (via new attrs described later). As a synopsis, most of this work was refined out of supporting `infected_aio=True` mode in the **root actor** and in particular as part of getting that to work inside the `modden` daemon which at the time of writing was still using the `i3ipc` lib and thus `asyncio`. Impl deats, - extend the `LinkedTaskChannel` field/API set (and type it), - `._trio_task: trio.Task` for test/user introspection. - also "stage" some ideas for a more refined interface, - `.started()` to deliver the value yielded to the `trio.Task` parent. |_ also includes some todos for how to implement this design underneath. - `._aio_first: Any|None = None` to hold that value ^. - `.wait_aio_complete()` for syncing to the asyncio task. - some detailed logging around "asyncio cancelled trio" case. - Move `AsyncioCancelled` in this module. Styling changes, - generally more explicit var naming. - some todos for getting modern and fancy with typing.. NB, Let it be known this commit msg was written on a friday with the help of various "mr. white" solns. --- tractor/to_asyncio.py | 225 +++++++++++++++++++++++++++++++----------- 1 file changed, 167 insertions(+), 58 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 3f8d20d3..75dfb5cb 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -34,7 +34,6 @@ from typing import ( import tractor from tractor._exceptions import ( - AsyncioCancelled, is_multi_cancelled, ) from tractor._state import ( @@ -46,6 +45,11 @@ from tractor.log import ( get_logger, StackLevelAdapter, ) +# TODO, wite the equiv of `trio.abc.Channel` but without attrs.. +# -[ ] `trionics.chan_types.ChanStruct` maybe? +# from tractor.msg import ( +# pretty_struct, +# ) from tractor.trionics._broadcast import ( broadcast_receiver, BroadcastReceiver, @@ -66,7 +70,12 @@ __all__ = [ @dataclass -class LinkedTaskChannel(trio.abc.Channel): +class LinkedTaskChannel( + trio.abc.Channel, + + # XXX LAME! meta-base conflict.. + # pretty_struct.Struct, +): ''' A "linked task channel" which allows for two-way synchronized msg passing between a ``trio``-in-guest-mode task and an ``asyncio`` @@ -77,12 +86,14 @@ class LinkedTaskChannel(trio.abc.Channel): _from_aio: trio.MemoryReceiveChannel _to_trio: trio.MemorySendChannel _trio_cs: trio.CancelScope + _trio_task: trio.Task _aio_task_complete: trio.Event _trio_err: BaseException|None = None _trio_exited: bool = False # set after ``asyncio.create_task()`` + # _aio_first: Any|None = None _aio_task: asyncio.Task|None = None _aio_err: BaseException|None = None _broadcaster: BroadcastReceiver|None = None @@ -90,6 +101,25 @@ class LinkedTaskChannel(trio.abc.Channel): async def aclose(self) -> None: await self._from_aio.aclose() + def started( + self, + val: Any = None, + ) -> None: + self._aio_started_val = val + return self._to_trio.send_nowait(val) + + # TODO, mk this side-agnostic? + # + # -[ ] add private meths for both sides and dynamically + # determine which to use based on task-type read at calltime? + # -[ ] `._recv_trio()`: receive to trio<-asyncio + # -[ ] `._send_trio()`: send from trio->asyncio + # -[ ] `._recv_aio()`: send from asyncio->trio + # -[ ] `._send_aio()`: receive to asyncio<-trio + # + # -[ ] pass the instance to the aio side instead of the separate + # per-side chan types? + # async def receive(self) -> Any: ''' Receive a value from the paired `asyncio.Task` with @@ -115,7 +145,16 @@ class LinkedTaskChannel(trio.abc.Channel): ): raise err - async def wait_asyncio_complete(self) -> None: + async def send(self, item: Any) -> None: + ''' + Send a value through to the asyncio task presuming + it defines a ``from_trio`` argument, if it does not + this method will raise an error. + + ''' + self._to_aio.put_nowait(item) + + async def wait_aio_complete(self) -> None: await self._aio_task_complete.wait() def cancel_asyncio_task( @@ -126,15 +165,6 @@ class LinkedTaskChannel(trio.abc.Channel): msg=msg, ) - async def send(self, item: Any) -> None: - ''' - Send a value through to the asyncio task presuming - it defines a ``from_trio`` argument, if it does not - this method will raise an error. - - ''' - self._to_aio.put_nowait(item) - def closed(self) -> bool: return self._from_aio._closed # type: ignore @@ -218,7 +248,8 @@ def _run_asyncio_task( coro = func(**kwargs) - cancel_scope = trio.CancelScope() + trio_task: trio.Task = trio.lowlevel.current_task() + trio_cs = trio.CancelScope() aio_task_complete = trio.Event() aio_err: BaseException|None = None @@ -226,7 +257,8 @@ def _run_asyncio_task( _to_aio=aio_q, # asyncio.Queue _from_aio=from_aio, # recv chan _to_trio=to_trio, # send chan - _trio_cs=cancel_scope, + _trio_cs=trio_cs, + _trio_task=trio_task, _aio_task_complete=aio_task_complete, ) @@ -274,6 +306,9 @@ def _run_asyncio_task( to_trio.send_nowait(result) finally: + # breakpoint() + # import pdbp; pdbp.set_trace() + # if the task was spawned using `open_channel_from()` # then we close the channels on exit. if provide_channels: @@ -281,7 +316,6 @@ def _run_asyncio_task( # a ``trio.EndOfChannel`` to the trio (consumer) side. to_trio.close() - # import pdbp; pdbp.set_trace() aio_task_complete.set() # await asyncio.sleep(0.1) log.info( @@ -325,14 +359,17 @@ def _run_asyncio_task( ) greenback.bestow_portal(task) - def cancel_trio(task: asyncio.Task) -> None: + def cancel_trio( + task: asyncio.Task, + ) -> None: ''' - Cancel the calling `trio` task on error. + Cancel the parent `trio` task on any error raised by the + `asyncio` side. ''' nonlocal chan - aio_err: BaseException|None = chan._aio_err - task_err: BaseException|None = None + relayed_aio_err: BaseException|None = chan._aio_err + aio_err: BaseException|None = None # only to avoid `asyncio` complaining about uncaptured # task exceptions @@ -343,20 +380,20 @@ def _run_asyncio_task( f'|_{res}\n' ) except BaseException as _aio_err: - task_err: BaseException = _aio_err - + aio_err: BaseException = _aio_err # read again AFTER the `asyncio` side errors in case # it was cancelled due to an error from `trio` (or - # some other out of band exc). - aio_err: BaseException|None = chan._aio_err + # some other out of band exc) and then set to something + # else? + relayed_aio_err: BaseException|None = chan._aio_err # always true right? assert ( - type(_aio_err) is type(aio_err) + type(_aio_err) is type(relayed_aio_err) ), ( f'`asyncio`-side task errors mismatch?!?\n\n' - f'caught: {_aio_err}\n' - f'chan._aio_err: {aio_err}\n' + f'(caught) aio_err: {aio_err}\n' + f'chan._aio_err: {relayed_aio_err}\n' ) msg: str = ( @@ -381,12 +418,13 @@ def _run_asyncio_task( msg.format(etype_str='errored') ) - - if aio_err is not None: + trio_err: BaseException|None = chan._trio_err + if ( + relayed_aio_err + or + trio_err + ): # import pdbp; pdbp.set_trace() - # XXX: uhh is this true? - # assert task_err, f'Asyncio task {task.get_name()} discrepancy!?' - # NOTE: currently mem chan closure may act as a form # of error relay (at least in the `asyncio.CancelledError` # case) since we have no way to directly trigger a `trio` @@ -394,8 +432,6 @@ def _run_asyncio_task( # We might want to change this in the future though. from_aio.close() - if task_err is None: - assert aio_err # wait, wut? # aio_err.with_traceback(aio_err.__traceback__) @@ -404,7 +440,7 @@ def _run_asyncio_task( # elif ( # type(aio_err) is CancelledError # and # trio was the cause? - # cancel_scope.cancel_called + # trio_cs.cancel_called # ): # log.cancel( # 'infected task was cancelled by `trio`-side' @@ -415,26 +451,83 @@ def _run_asyncio_task( # error in case the trio task is blocking on # a checkpoint. if ( - not cancel_scope.cancelled_caught + not trio_cs.cancelled_caught or - not cancel_scope.cancel_called + not trio_cs.cancel_called ): # import pdbp; pdbp.set_trace() - cancel_scope.cancel() + trio_cs.cancel() - if task_err: + # maybe the `trio` task errored independent from the + # `asyncio` one and likely in between + # a guest-run-sched-tick. + # + # The obvious ex. is where one side errors during + # the current tick and then the other side immediately + # errors before its next checkpoint; i.e. the 2 errors + # are "independent". + # + # "Independent" here means in the sense that neither task + # was the explicit cause of the other side's exception + # according to our `tractor.to_asyncio` SC API's error + # relaying mechanism(s); the error pair is *possibly + # due-to* but **not necessarily** inter-related by some + # (subsys) state between the tasks, + # + # NOTE, also see the `test_trio_prestarted_task_bubbles` + # for reproducing detailed edge cases as per the above + # cases. + # + if ( + not trio_cs.cancelled_caught + and + (trio_err := chan._trio_err) + and + type(trio_err) not in { + trio.Cancelled, + } + and ( + aio_err + and + type(aio_err) not in { + asyncio.CancelledError + } + ) + ): + eg = ExceptionGroup( + 'Both the `trio` and `asyncio` tasks errored independently!!\n', + (trio_err, aio_err), + ) + chan._trio_err = eg + chan._aio_err = eg + raise eg + + elif aio_err: # XXX raise any `asyncio` side error IFF it doesn't # match the one we just caught from the task above! # (that would indicate something weird/very-wrong # going on?) - if aio_err is not task_err: - # import pdbp; pdbp.set_trace() - raise aio_err from task_err + if aio_err is not relayed_aio_err: + raise aio_err from relayed_aio_err + + raise aio_err task.add_done_callback(cancel_trio) return chan +class AsyncioCancelled(Exception): + ''' + Asyncio cancelled translation (non-base) error + for use with the ``to_asyncio`` module + to be raised in the ``trio`` side task + + NOTE: this should NOT inherit from `asyncio.CancelledError` or + tests should break! + + ''' + + class TrioTaskExited(AsyncioCancelled): ''' The `trio`-side task exited without explicitly cancelling the @@ -483,13 +576,12 @@ async def translate_aio_errors( # import pdbp; pdbp.set_trace() # lolevel-debug - # relay cancel through to called ``asyncio`` task + # relay cancel through to called `asyncio` task chan._aio_err = AsyncioCancelled( f'trio`-side cancelled the `asyncio`-side,\n' f'c)>\n' f' |_{trio_task}\n\n' - f'{trio_err!r}\n' ) @@ -546,6 +638,7 @@ async def translate_aio_errors( raise except BaseException as _trio_err: + # await tractor.pause(shield=True) trio_err = _trio_err log.exception( '`trio`-side task errored?' @@ -619,11 +712,17 @@ async def translate_aio_errors( # pump the other side's task? needed? await trio.lowlevel.checkpoint() + # from tractor._state import is_root_process + # if is_root_process(): + # breakpoint() + if ( not chan._trio_err and (fut := aio_task._fut_waiter) ): + # await trio.lowlevel.checkpoint() + # import pdbp; pdbp.set_trace() fut.set_exception( TrioTaskExited( f'The peer `asyncio` task is still blocking/running?\n' @@ -632,11 +731,6 @@ async def translate_aio_errors( ) ) else: - # from tractor._state import is_root_process - # if is_root_process(): - # breakpoint() - # import pdbp; pdbp.set_trace() - aio_taskc_warn: str = ( f'\n' f'MANUALLY Cancelling `asyncio`-task: {aio_task.get_name()}!\n\n' @@ -663,7 +757,7 @@ async def translate_aio_errors( # it erroed out up there! # if wait_on_aio_task: - # await chan.wait_asyncio_complete() + # await chan.wait_aio_complete() await chan._aio_task_complete.wait() log.info( 'asyncio-task is done and unblocked trio-side!\n' @@ -771,11 +865,22 @@ async def open_channel_from( # sync to a "started()"-like first delivered value from the # ``asyncio`` task. try: - with chan._trio_cs: + with (cs := chan._trio_cs): first = await chan.receive() # deliver stream handle upward yield first, chan + except trio.Cancelled as taskc: + # await tractor.pause(shield=True) # ya it worx ;) + if cs.cancel_called: + log.cancel( + f'trio-side was manually cancelled by aio side\n' + f'|_c>}}{cs!r}?\n' + ) + # TODO, maybe a special `TrioCancelled`??? + + raise taskc + finally: chan._trio_exited = True chan._to_trio.close() @@ -893,12 +998,12 @@ def run_as_asyncio_guest( _sigint_loop_pump_delay: float = 0, ) -> None: -# ^-TODO-^ technically whatever `trio_main` returns.. we should -# try to use func-typevar-params at leaast by 3.13! -# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#callback-protocols -# -[ ] https://peps.python.org/pep-0646/#using-type-variable-tuples-in-functions -# -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#unpack-for-keyword-arguments -# -[ ] https://peps.python.org/pep-0718/ + # ^-TODO-^ technically whatever `trio_main` returns.. we should + # try to use func-typevar-params at leaast by 3.13! + # -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#callback-protocols + # -[ ] https://peps.python.org/pep-0646/#using-type-variable-tuples-in-functions + # -[ ] https://typing.readthedocs.io/en/latest/spec/callables.html#unpack-for-keyword-arguments + # -[ ] https://peps.python.org/pep-0718/ ''' Entry for an "infected ``asyncio`` actor". @@ -957,15 +1062,15 @@ def run_as_asyncio_guest( # force_reload=True, # ) - def trio_done_callback(main_outcome): + def trio_done_callback(main_outcome: Outcome): log.runtime( f'`trio` guest-run finishing with outcome\n' f'>) {main_outcome}\n' f'|_{trio_done_fute}\n' ) + # import pdbp; pdbp.set_trace() if isinstance(main_outcome, Error): - # import pdbp; pdbp.set_trace() error: BaseException = main_outcome.error # show an dedicated `asyncio`-side tb from the error @@ -1165,6 +1270,10 @@ def run_as_asyncio_guest( ) raise AsyncioRuntimeTranslationError(message) from state_err + # XXX, should never get here ;) + # else: + # import pdbp; pdbp.set_trace() + # might as well if it's installed. try: import uvloop -- 2.34.1 From b7aa72465d8cd6d8cf86d26c9dbaba802d5062fc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 24 Feb 2025 13:08:23 -0500 Subject: [PATCH 241/305] Draft test-doc for "out-of-band" `asyncio.Task`.. Since there's no way to activate `greenback`'s portal in such cases, we should at least have a test verifying our very loud error about the inability to support this usage.. --- tests/conftest.py | 12 ++++++++++++ tests/devx/test_pause_from_non_trio.py | 25 +++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 5ce84425..810b642a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -150,6 +150,18 @@ def pytest_generate_tests(metafunc): metafunc.parametrize("start_method", [spawn_backend], scope='module') +# TODO: a way to let test scripts (like from `examples/`) +# guarantee they won't registry addr collide! +# @pytest.fixture +# def open_test_runtime( +# reg_addr: tuple, +# ) -> AsyncContextManager: +# return partial( +# tractor.open_nursery, +# registry_addrs=[reg_addr], +# ) + + def sig_prog(proc, sig): "Kill the actor-process with ``sig``." proc.send_signal(sig) diff --git a/tests/devx/test_pause_from_non_trio.py b/tests/devx/test_pause_from_non_trio.py index f3fd15ad..3a7140e6 100644 --- a/tests/devx/test_pause_from_non_trio.py +++ b/tests/devx/test_pause_from_non_trio.py @@ -218,10 +218,9 @@ def expect_any_of( ) return expected_patts - # yield child -def test_pause_from_asyncio_task( +def test_sync_pause_from_aio_task( spawn, ctlc: bool # ^TODO, fix for `asyncio`!! @@ -327,3 +326,25 @@ def test_pause_from_asyncio_task( child.sendline('c') child.expect(EOF) + + +def test_sync_pause_from_non_greenbacked_aio_task(): + ''' + Where the `breakpoint()` caller task is NOT spawned by + `tractor.to_asyncio` and thus never activates + a `greenback.ensure_portal()` beforehand, presumably bc the task + was started by some lib/dep as in often seen in the field. + + Ensure sync pausing works when the pause is in, + + - the root actor running in infected-mode? + |_ since we don't need any IPC to acquire the debug lock? + |_ is there some way to handle this like the non-main-thread case? + + All other cases need to error out appropriately right? + + - for any subactor we can't avoid needing the repl lock.. + |_ is there a way to hook into `asyncio.ensure_future(obj)`? + + ''' + pass -- 2.34.1 From 97b3b98893b0d655eb97138f871070b5e109e51b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 24 Feb 2025 20:46:03 -0500 Subject: [PATCH 242/305] Bump various (dev) deps and prefer sys python Since it turns out there's a few gotchas moving to python 3.13, - we need to pin to new(er) `trio` which now flips to strict exception groups (something to be handled in a follow up patch). - since we're now using `uv` we should (at least for now) prefer the system `python` (over astral's distis) since they compile for `libedit` in terms of what the (new) `readline.backend: str` will read as; this will break our tab-completion and vi-mode settings in the `pdbp` REPL without a user configuring a `~/.editrc` appropriately. - go back to using latest `pdbp` (not a local dev version) since it should work fine presuming the previous bullet is addressed. Lock bumps, - for now use latest `trio==0.29.0` (which i gotta feeling might have broken some existing attempts at strict-eg handling i've tried..) - update to latest `xonsh`, `pdbp` and its dep `tabcompleter` Other cleaning, - put back in various deps "comments" from `poetry` content. - drop the `xonsh-vox` and `xontrib-vox` dev deps; no `vox` support with `uv` rn anyway.. --- pyproject.toml | 49 +++++++++++++++++++++++++++++---------------- uv.lock | 54 +++++++++++++++----------------------------------- 2 files changed, 48 insertions(+), 55 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9372685e..b1792340 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,24 +32,21 @@ classifiers = [ "Topic :: System :: Distributed Computing", ] dependencies = [ -# trio runtime and friends + # trio runtime and friends # (poetry) proper range specs, # https://packaging.python.org/en/latest/discussions/install-requires-vs-requirements/#id5 # TODO, for 3.13 we must go go `0.27` which means we have to # disable strict egs or port to handling them internally! - # trio='^0.27' - "trio>=0.24,<0.25", + "trio>0.27", "tricycle>=0.4.1,<0.5", "trio-typing>=0.10.0,<0.11", - "wrapt>=1.16.0,<2", "colorlog>=6.8.2,<7", - -# built-in multi-actor `pdb` REPL - "pdbp>=1.5.0,<2", - -# typed IPC msging -# TODO, get back on release once 3.13 support is out! + # built-in multi-actor `pdb` REPL + "pdbp>=1.6,<2", # windows only (from `pdbp`) + "tabcompleter>=1.4.0", + # typed IPC msging + # TODO, get back on release once 3.13 support is out! "msgspec", ] @@ -65,30 +62,46 @@ dev = [ # `tractor.devx` tooling "greenback>=1.2.1,<2", "stackscope>=0.2.2,<0.3", - - # xonsh usage/integration (namely as @goodboy's sh of choice Bp) - "xonsh>=0.19.1", - "xontrib-vox>=0.0.1,<0.0.2", - "prompt-toolkit>=3.0.43,<4", - "xonsh-vox-tabcomplete>=0.5,<0.6", "pyperclip>=1.9.0", + "prompt-toolkit>=3.0.50", + "xonsh>=0.19.2", ] +# ------ dependency-groups ------ + [tool.uv.sources] msgspec = { git = "https://github.com/jcrist/msgspec.git" } +# XXX NOTE, only for @goodboy's hacking on `pprint(sort_dicts=False)` +# for the `pp` alias.. +# pdbp = { path = "../pdbp", editable = true } + # ------ tool.uv.sources ------ # TODO, distributed (multi-host) extensions # linux kernel networking # 'pyroute2 +# ------ tool.uv.sources ------ + +[tool.uv] +# XXX NOTE, prefer the sys python bc apparently the distis from +# `astral` are built in a way that breaks `pdbp`+`tabcompleter`'s +# likely due to linking against `libedit` over `readline`.. +# |_https://docs.astral.sh/uv/concepts/python-versions/#managed-python-distributions +# |_https://gregoryszorc.com/docs/python-build-standalone/main/quirks.html#use-of-libedit-on-linux +# +# https://docs.astral.sh/uv/reference/settings/#python-preference +python-preference = 'system' + +# ------ tool.uv ------ + [tool.hatch.build.targets.sdist] include = ["tractor"] [tool.hatch.build.targets.wheel] include = ["tractor"] -# ------ dependency-groups ------ +# ------ tool.hatch ------ [tool.towncrier] package = "tractor" @@ -138,3 +151,5 @@ log_cli = false # TODO: maybe some of these layout choices? # https://docs.pytest.org/en/8.0.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules # pythonpath = "src" + +# ------ tool.pytest ------ diff --git a/uv.lock b/uv.lock index 97b2e166..277152c5 100644 --- a/uv.lock +++ b/uv.lock @@ -330,6 +330,7 @@ dependencies = [ { name = "colorlog" }, { name = "msgspec" }, { name = "pdbp" }, + { name = "tabcompleter" }, { name = "tricycle" }, { name = "trio" }, { name = "trio-typing" }, @@ -345,17 +346,16 @@ dev = [ { name = "pytest" }, { name = "stackscope" }, { name = "xonsh" }, - { name = "xonsh-vox-tabcomplete" }, - { name = "xontrib-vox" }, ] [package.metadata] requires-dist = [ { name = "colorlog", specifier = ">=6.8.2,<7" }, { name = "msgspec", git = "https://github.com/jcrist/msgspec.git" }, - { name = "pdbp", specifier = ">=1.5.0,<2" }, + { name = "pdbp", specifier = ">=1.6,<2" }, + { name = "tabcompleter", specifier = ">=1.4.0" }, { name = "tricycle", specifier = ">=0.4.1,<0.5" }, - { name = "trio", specifier = ">=0.24,<0.25" }, + { name = "trio", specifier = ">0.27" }, { name = "trio-typing", specifier = ">=0.10.0,<0.11" }, { name = "wrapt", specifier = ">=1.16.0,<2" }, ] @@ -364,13 +364,11 @@ requires-dist = [ dev = [ { name = "greenback", specifier = ">=1.2.1,<2" }, { name = "pexpect", specifier = ">=4.9.0,<5" }, - { name = "prompt-toolkit", specifier = ">=3.0.43,<4" }, + { name = "prompt-toolkit", specifier = ">=3.0.50" }, { name = "pyperclip", specifier = ">=1.9.0" }, { name = "pytest", specifier = ">=8.2.0,<9" }, { name = "stackscope", specifier = ">=0.2.2,<0.3" }, - { name = "xonsh", specifier = ">=0.19.1" }, - { name = "xonsh-vox-tabcomplete", specifier = ">=0.5,<0.6" }, - { name = "xontrib-vox", specifier = ">=0.0.1,<0.0.2" }, + { name = "xonsh", specifier = ">=0.19.2" }, ] [[package]] @@ -387,7 +385,7 @@ wheels = [ [[package]] name = "trio" -version = "0.24.0" +version = "0.29.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -397,9 +395,9 @@ dependencies = [ { name = "sniffio" }, { name = "sortedcontainers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8a/f3/07c152213222c615fe2391b8e1fea0f5af83599219050a549c20fcbd9ba2/trio-0.24.0.tar.gz", hash = "sha256:ffa09a74a6bf81b84f8613909fb0beaee84757450183a7a2e0b47b455c0cac5d", size = 545131 } +sdist = { url = "https://files.pythonhosted.org/packages/a1/47/f62e62a1a6f37909aed0bf8f5d5411e06fa03846cfcb64540cd1180ccc9f/trio-0.29.0.tar.gz", hash = "sha256:ea0d3967159fc130acb6939a0be0e558e364fee26b5deeecc893a6b08c361bdf", size = 588952 } wheels = [ - { url = "https://files.pythonhosted.org/packages/14/fb/9299cf74953f473a15accfdbe2c15218e766bae8c796f2567c83bae03e98/trio-0.24.0-py3-none-any.whl", hash = "sha256:c3bd3a4e3e3025cd9a2241eae75637c43fe0b9e88b4c97b9161a55b9e54cd72c", size = 460205 }, + { url = "https://files.pythonhosted.org/packages/c9/55/c4d9bea8b3d7937901958f65124123512419ab0eb73695e5f382521abbfb/trio-0.29.0-py3-none-any.whl", hash = "sha256:d8c463f1a9cc776ff63e331aba44c125f423a5a13c684307e828d930e625ba66", size = 492920 }, ] [[package]] @@ -492,35 +490,15 @@ wheels = [ [[package]] name = "xonsh" -version = "0.19.1" +version = "0.19.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/98/6e/b54a0b2685535995ee50f655103c463f9d339455c9b08c4bce3e03e7bb17/xonsh-0.19.1.tar.gz", hash = "sha256:5d3de649c909f6d14bc69232219bcbdb8152c830e91ddf17ad169c672397fb97", size = 796468 } +sdist = { url = "https://files.pythonhosted.org/packages/68/4e/56e95a5e607eb3b0da37396f87cde70588efc8ef819ab16f02d5b8378dc4/xonsh-0.19.2.tar.gz", hash = "sha256:cfdd0680d954a2c3aefd6caddcc7143a3d06aa417ed18365a08219bb71b960b0", size = 799960 } wheels = [ - { url = "https://files.pythonhosted.org/packages/8c/e6/db44068c5725af9678e37980ae9503165393d51b80dc8517fa4ec74af1cf/xonsh-0.19.1-py310-none-any.whl", hash = "sha256:83eb6610ed3535f8542abd80af9554fb7e2805b0b3f96e445f98d4b5cf1f7046", size = 640686 }, - { url = "https://files.pythonhosted.org/packages/77/4e/e487e82349866b245c559433c9ba626026a2e66bd17d7f9ac1045082f146/xonsh-0.19.1-py311-none-any.whl", hash = "sha256:c176e515b0260ab803963d1f0924f1e32f1064aa6fd5d791aa0cf6cda3a924ae", size = 640680 }, - { url = "https://files.pythonhosted.org/packages/5d/88/09060815548219b8f6953a06c247cb5c92d03cbdf7a02a980bda1b5754db/xonsh-0.19.1-py312-none-any.whl", hash = "sha256:fe1266c86b117aced3bdc4d5972420bda715864435d0bd3722d63451e8001036", size = 640604 }, - { url = "https://files.pythonhosted.org/packages/83/ff/7873cb8184cffeafddbf861712831c2baa2e9dbecdbfd33b1228f0db0019/xonsh-0.19.1-py313-none-any.whl", hash = "sha256:3f158b6fc0bba954e0b989004d4261bafc4bd94c68c2abd75b825da23e5a869c", size = 641166 }, - { url = "https://files.pythonhosted.org/packages/cc/03/b9f8dd338df0a330011d104e63d4d0acd8bbbc1e990ff049487b6bdf585d/xonsh-0.19.1-py39-none-any.whl", hash = "sha256:a900a6eb87d881a7ef90b1ac8522ba3699582f0bcb1e9abd863d32f6d63faf04", size = 632912 }, -] - -[[package]] -name = "xonsh-vox-tabcomplete" -version = "0.5" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/fd/af0c2ee6c067c2a4dc64ec03598c94de1f6ec5984b3116af917f3add4a16/xonsh_vox_tabcomplete-0.5-py3-none-any.whl", hash = "sha256:9701b198180f167071234e77eab87b7befa97c1873b088d0b3fbbe6d6d8dcaad", size = 14381 }, -] - -[[package]] -name = "xontrib-vox" -version = "0.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "xonsh" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6c/ac/a5db68a1f2e4036f7ff4c8546b1cbe29edee2ff40e0ff931836745988b79/xontrib-vox-0.0.1.tar.gz", hash = "sha256:c1f0b155992b4b0ebe6dcfd651084a8707ade7372f7e456c484d2a85339d9907", size = 16504 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/23/58/dcdf11849c8340033da00669527ce75d8292a4e8d82605c082ed236a081a/xontrib_vox-0.0.1-py3-none-any.whl", hash = "sha256:df2bbb815832db5b04d46684f540eac967ee40ef265add2662a95d6947d04c70", size = 13467 }, + { url = "https://files.pythonhosted.org/packages/6c/13/281094759df87b23b3c02dc4a16603ab08ea54d7f6acfeb69f3341137c7a/xonsh-0.19.2-py310-none-any.whl", hash = "sha256:ec7f163fd3a4943782aa34069d4e72793328c916a5975949dbec8536cbfc089b", size = 642301 }, + { url = "https://files.pythonhosted.org/packages/29/41/a51e4c3918fe9a293b150cb949b1b8c6d45eb17dfed480dcb76ea43df4e7/xonsh-0.19.2-py311-none-any.whl", hash = "sha256:53c45f7a767901f2f518f9b8dd60fc653e0498e56e89825e1710bb0859985049", size = 642286 }, + { url = "https://files.pythonhosted.org/packages/0a/93/9a77b731f492fac27c577dea2afb5a2bcc2a6a1c79be0c86c95498060270/xonsh-0.19.2-py312-none-any.whl", hash = "sha256:b24c619aa52b59eae4d35c4195dba9b19a2c548fb5c42c6f85f2b8ccb96807b5", size = 642386 }, + { url = "https://files.pythonhosted.org/packages/be/75/070324769c1ff88d971ce040f4f486339be98e0a365c8dd9991eb654265b/xonsh-0.19.2-py313-none-any.whl", hash = "sha256:c53ef6c19f781fbc399ed1b382b5c2aac2125010679a3b61d643978273c27df0", size = 642873 }, + { url = "https://files.pythonhosted.org/packages/fa/cb/2c7ccec54f5b0e73fdf7650e8336582ff0347d9001c5ef8271dc00c034fe/xonsh-0.19.2-py39-none-any.whl", hash = "sha256:bcc0225dc3847f1ed2f175dac6122fbcc54cea67d9c2dc2753d9615e2a5ff284", size = 634602 }, ] [[package]] -- 2.34.1 From 8573cd32639a5510e0d8dc6a142c7d5719ec5cf8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 25 Feb 2025 11:16:01 -0500 Subject: [PATCH 243/305] Tweak some test asserts to better `is` style --- tests/test_cancellation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_cancellation.py b/tests/test_cancellation.py index 92540ed4..ece4d3c7 100644 --- a/tests/test_cancellation.py +++ b/tests/test_cancellation.py @@ -130,7 +130,7 @@ def test_multierror( try: await portal2.result() except tractor.RemoteActorError as err: - assert err.boxed_type == AssertionError + assert err.boxed_type is AssertionError print("Look Maa that first actor failed hard, hehh") raise @@ -182,7 +182,7 @@ def test_multierror_fast_nursery(reg_addr, start_method, num_subactors, delay): for exc in exceptions: assert isinstance(exc, tractor.RemoteActorError) - assert exc.boxed_type == AssertionError + assert exc.boxed_type is AssertionError async def do_nothing(): @@ -504,7 +504,9 @@ def test_cancel_via_SIGINT_other_task( if is_win(): # smh timeout += 1 - async def spawn_and_sleep_forever(task_status=trio.TASK_STATUS_IGNORED): + async def spawn_and_sleep_forever( + task_status=trio.TASK_STATUS_IGNORED + ): async with tractor.open_nursery() as tn: for i in range(3): await tn.run_in_actor( -- 2.34.1 From 9a44c677280c4ba3aa49987623f776bc7c2649df Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 25 Feb 2025 11:20:07 -0500 Subject: [PATCH 244/305] Drop `asyncio`-canc error from `._exceptions` --- tractor/_exceptions.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 89ea21ad..b4386db0 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -981,18 +981,6 @@ class MessagingError(Exception): ''' -class AsyncioCancelled(Exception): - ''' - Asyncio cancelled translation (non-base) error - for use with the ``to_asyncio`` module - to be raised in the ``trio`` side task - - NOTE: this should NOT inherit from `asyncio.CancelledError` or - tests should break! - - ''' - - def pack_error( exc: BaseException|RemoteActorError, @@ -1172,7 +1160,7 @@ def is_multi_cancelled( trio.Cancelled in ignore_nested # XXX always count-in `trio`'s native signal ): - ignore_nested |= {trio.Cancelled} + ignore_nested.update({trio.Cancelled}) if isinstance(exc, BaseExceptionGroup): matched_exc: BaseExceptionGroup|None = exc.subgroup( -- 2.34.1 From c6ef88a4b2409b4745db5c185d6266fe32110220 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 25 Feb 2025 11:20:57 -0500 Subject: [PATCH 245/305] Clean up some imports in `._clustering` --- tractor/_clustering.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tractor/_clustering.py b/tractor/_clustering.py index 93562fe8..46224d6f 100644 --- a/tractor/_clustering.py +++ b/tractor/_clustering.py @@ -19,10 +19,13 @@ Actor cluster helpers. ''' from __future__ import annotations - -from contextlib import asynccontextmanager as acm +from contextlib import ( + asynccontextmanager as acm, +) from multiprocessing import cpu_count -from typing import AsyncGenerator, Optional +from typing import ( + AsyncGenerator, +) import trio import tractor -- 2.34.1 From de4c33d15812fec7ed4b35e139df599bcc8ac5e6 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 25 Feb 2025 19:37:30 -0500 Subject: [PATCH 246/305] Flip to `strict_exception_groups=False` in core tns Since it'll likely need a bit of detailing to get the test suite running identically with strict egs (exception groups), i've opted to just flip the switch on a few core nursery scopes for now until as such a time i can focus enough to port the matching internals.. Xp --- tractor/_context.py | 5 ++++- tractor/_root.py | 17 +++++++++++++---- tractor/_rpc.py | 8 ++++++-- tractor/_supervise.py | 10 ++++++++-- 4 files changed, 31 insertions(+), 9 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index d4cad88e..1c904b55 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1982,7 +1982,10 @@ async def open_context_from_portal( ctxc_from_callee: ContextCancelled|None = None try: async with ( - trio.open_nursery() as tn, + trio.open_nursery( + strict_exception_groups=False, + ) as tn, + msgops.maybe_limit_plds( ctx=ctx, spec=ctx_meta.get('pld_spec'), diff --git a/tractor/_root.py b/tractor/_root.py index e10b02ef..ed71f69e 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -362,7 +362,10 @@ async def open_root_actor( ) # start the actor runtime in a new task - async with trio.open_nursery() as nursery: + async with trio.open_nursery( + strict_exception_groups=False, + # ^XXX^ TODO? instead unpack any RAE as per "loose" style? + ) as nursery: # ``_runtime.async_main()`` creates an internal nursery # and blocks here until any underlying actor(-process) @@ -457,12 +460,19 @@ def run_daemon( start_method: str | None = None, debug_mode: bool = False, + + # TODO, support `infected_aio=True` mode by, + # - calling the appropriate entrypoint-func from `.to_asyncio` + # - maybe init-ing `greenback` as done above in + # `open_root_actor()`. + **kwargs ) -> None: ''' - Spawn daemon actor which will respond to RPC; the main task simply - starts the runtime and then sleeps forever. + Spawn a root (daemon) actor which will respond to RPC; the main + task simply starts the runtime and then blocks via embedded + `trio.sleep_forever()`. This is a very minimal convenience wrapper around starting a "run-until-cancelled" root actor which can be started with a set @@ -475,7 +485,6 @@ def run_daemon( importlib.import_module(path) async def _main(): - async with open_root_actor( registry_addrs=registry_addrs, name=name, diff --git a/tractor/_rpc.py b/tractor/_rpc.py index a77c2af7..e170024c 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -620,7 +620,11 @@ async def _invoke( tn: trio.Nursery rpc_ctx_cs: CancelScope async with ( - trio.open_nursery() as tn, + trio.open_nursery( + strict_exception_groups=False, + # ^XXX^ TODO? instead unpack any RAE as per "loose" style? + + ) as tn, msgops.maybe_limit_plds( ctx=ctx, spec=ctx_meta.get('pld_spec'), @@ -733,8 +737,8 @@ async def _invoke( # XXX: do we ever trigger this block any more? except ( BaseExceptionGroup, - trio.Cancelled, BaseException, + trio.Cancelled, ) as scope_error: if ( diff --git a/tractor/_supervise.py b/tractor/_supervise.py index de268078..b07498b0 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -395,7 +395,10 @@ async def _open_and_supervise_one_cancels_all_nursery( # `ActorNursery.start_actor()`). # errors from this daemon actor nursery bubble up to caller - async with trio.open_nursery() as da_nursery: + async with trio.open_nursery( + strict_exception_groups=False, + # ^XXX^ TODO? instead unpack any RAE as per "loose" style? + ) as da_nursery: try: # This is the inner level "run in actor" nursery. It is # awaited first since actors spawned in this way (using @@ -405,7 +408,10 @@ async def _open_and_supervise_one_cancels_all_nursery( # immediately raised for handling by a supervisor strategy. # As such if the strategy propagates any error(s) upwards # the above "daemon actor" nursery will be notified. - async with trio.open_nursery() as ria_nursery: + async with trio.open_nursery( + strict_exception_groups=False, + # ^XXX^ TODO? instead unpack any RAE as per "loose" style? + ) as ria_nursery: an = ActorNursery( actor, -- 2.34.1 From 4de48972aa74ee8df847124366dd14cb523b1821 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 25 Feb 2025 20:14:38 -0500 Subject: [PATCH 247/305] Unset `$PYTHON_COLORS` for test debugger suite.. Since obvi all our `pexpect` patterns aren't going to match with a heck-ton of terminal color escape sequences in the output XD --- tests/devx/conftest.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py index e1ad2ea3..ae594c7c 100644 --- a/tests/devx/conftest.py +++ b/tests/devx/conftest.py @@ -30,7 +30,7 @@ from conftest import ( @pytest.fixture def spawn( start_method, - testdir: pytest.Testdir, + testdir: pytest.Pytester, reg_addr: tuple[str, int], ) -> Callable[[str], None]: @@ -44,16 +44,32 @@ def spawn( '`pexpect` based tests only supported on `trio` backend' ) + def unset_colors(): + ''' + Python 3.13 introduced colored tracebacks that break patt + matching, + + https://docs.python.org/3/using/cmdline.html#envvar-PYTHON_COLORS + https://docs.python.org/3/using/cmdline.html#using-on-controlling-color + + ''' + import os + os.environ['PYTHON_COLORS'] = '0' + def _spawn( cmd: str, **mkcmd_kwargs, ): + unset_colors() return testdir.spawn( cmd=mk_cmd( cmd, **mkcmd_kwargs, ), expect_timeout=3, + # preexec_fn=unset_colors, + # ^TODO? get `pytest` core to expose underlying + # `pexpect.spawn()` stuff? ) # such that test-dep can pass input script name. -- 2.34.1 From 1f951a94f372159e8eff05724a3f93f961b10091 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 25 Feb 2025 20:15:59 -0500 Subject: [PATCH 248/305] Another `is` fix.. --- tests/test_inter_peer_cancellation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_inter_peer_cancellation.py b/tests/test_inter_peer_cancellation.py index 7bf9a2bd..bac9a791 100644 --- a/tests/test_inter_peer_cancellation.py +++ b/tests/test_inter_peer_cancellation.py @@ -170,7 +170,7 @@ def test_do_not_swallow_error_before_started_by_remote_contextcancelled( trio.run(main) rae = excinfo.value - assert rae.boxed_type == TypeError + assert rae.boxed_type is TypeError @tractor.context -- 2.34.1 From 747f89c3ef4e5fdadb8ad6299fd97e2ab3777685 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Feb 2025 13:04:37 -0500 Subject: [PATCH 249/305] Expose `hide_tb: bool` from `.open_nursery()` Such that it gets passed through to `.open_root_actor()` in the `implicit_runtime==True` case - useful for debugging cases where `.devx._debug` APIs might be used to avoid REPL clobbering in subactors. --- tractor/_root.py | 8 +++++++- tractor/_supervise.py | 21 ++++++++++++++------- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index ed71f69e..2a9beaa3 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -111,8 +111,8 @@ async def open_root_actor( Runtime init entry point for ``tractor``. ''' - __tracebackhide__: bool = hide_tb _debug.hide_runtime_frames() + __tracebackhide__: bool = hide_tb # TODO: stick this in a `@cm` defined in `devx._debug`? # @@ -390,6 +390,12 @@ async def open_root_actor( BaseExceptionGroup, ) as err: + # TODO, in beginning to handle the subsubactor with + # crashed grandparent cases.. + # + # was_locked: bool = await _debug.maybe_wait_for_debugger( + # child_in_debug=True, + # ) # XXX NOTE XXX see equiv note inside # `._runtime.Actor._stream_handler()` where in the # non-root or root-that-opened-this-mahually case we diff --git a/tractor/_supervise.py b/tractor/_supervise.py index b07498b0..4ecc1a29 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -402,7 +402,7 @@ async def _open_and_supervise_one_cancels_all_nursery( try: # This is the inner level "run in actor" nursery. It is # awaited first since actors spawned in this way (using - # ``ActorNusery.run_in_actor()``) are expected to only + # `ActorNusery.run_in_actor()`) are expected to only # return a single result and then complete (i.e. be canclled # gracefully). Errors collected from these actors are # immediately raised for handling by a supervisor strategy. @@ -478,8 +478,8 @@ async def _open_and_supervise_one_cancels_all_nursery( ContextCancelled, }: log.cancel( - 'Actor-nursery caught remote cancellation\n\n' - + 'Actor-nursery caught remote cancellation\n' + '\n' f'{inner_err.tb_str}' ) else: @@ -571,7 +571,9 @@ async def _open_and_supervise_one_cancels_all_nursery( @acm # @api_frame async def open_nursery( + hide_tb: bool = False, **kwargs, + # ^TODO, paramspec for `open_root_actor()` ) -> typing.AsyncGenerator[ActorNursery, None]: ''' @@ -589,7 +591,7 @@ async def open_nursery( which cancellation scopes correspond to each spawned subactor set. ''' - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb implicit_runtime: bool = False actor: Actor = current_actor(err_on_no_runtime=False) an: ActorNursery|None = None @@ -605,7 +607,10 @@ async def open_nursery( # mark us for teardown on exit implicit_runtime: bool = True - async with open_root_actor(**kwargs) as actor: + async with open_root_actor( + hide_tb=hide_tb, + **kwargs, + ) as actor: assert actor is current_actor() try: @@ -643,8 +648,10 @@ async def open_nursery( # show frame on any internal runtime-scope error if ( an - and not an.cancelled - and an._scope_error + and + not an.cancelled + and + an._scope_error ): __tracebackhide__: bool = False -- 2.34.1 From 18528dde33c869f3973d554fa32f33999184a542 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Feb 2025 13:16:15 -0500 Subject: [PATCH 250/305] Log format tweaks for sclang reprs A space here, a newline there.. --- tractor/_context.py | 5 +++-- tractor/_entry.py | 2 +- tractor/_rpc.py | 4 ++-- tractor/_runtime.py | 7 +++++-- tractor/_spawn.py | 7 ++++--- 5 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 1c904b55..eb66aade 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -950,7 +950,7 @@ class Context: # f'Context.cancel() => {self.chan.uid}\n' f'c)=> {self.chan.uid}\n' # f'{self.chan.uid}\n' - f' |_ @{self.dst_maddr}\n' + f' |_ @{self.dst_maddr}\n' f' >> {self.repr_rpc}\n' # f' >> {self._nsf}() -> {codec}[dict]:\n\n' # TODO: pull msg-type from spec re #320 @@ -1003,7 +1003,8 @@ class Context: ) else: log.cancel( - 'Timed out on cancel request of remote task?\n' + f'Timed out on cancel request of remote task?\n' + f'\n' f'{reminfo}' ) diff --git a/tractor/_entry.py b/tractor/_entry.py index 19dcb9f6..8156d25f 100644 --- a/tractor/_entry.py +++ b/tractor/_entry.py @@ -238,7 +238,7 @@ def _trio_main( nest_from_op( input_op='>(', # see syntax ideas above tree_str=actor_info, - back_from_op=1, + back_from_op=2, # since "complete" ) ) logmeth = log.info diff --git a/tractor/_rpc.py b/tractor/_rpc.py index e170024c..9e50c5de 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -851,8 +851,8 @@ async def try_ship_error_to_remote( log.critical( 'IPC transport failure -> ' f'failed to ship error to {remote_descr}!\n\n' - f'X=> {channel.uid}\n\n' - + f'{type(msg)!r}[{msg.boxed_type}] X=> {channel.uid}\n' + f'\n' # TODO: use `.msg.preetty_struct` for this! f'{msg}\n' ) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index 7a00d613..fef92e66 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -1283,7 +1283,8 @@ class Actor: msg: str = ( f'Actor-runtime cancel request from {requester_type}\n\n' f'<=c) {requesting_uid}\n' - f' |_{self}\n' + f' |_{self}\n' + f'\n' ) # TODO: what happens here when we self-cancel tho? @@ -1303,13 +1304,15 @@ class Actor: lock_req_ctx.has_outcome ): msg += ( - '-> Cancelling active debugger request..\n' + f'\n' + f'-> Cancelling active debugger request..\n' f'|_{_debug.Lock.repr()}\n\n' f'|_{lock_req_ctx}\n\n' ) # lock_req_ctx._scope.cancel() # TODO: wrap this in a method-API.. debug_req.req_cs.cancel() + # if lock_req_ctx: # self-cancel **all** ongoing RPC tasks await self.cancel_rpc_tasks( diff --git a/tractor/_spawn.py b/tractor/_spawn.py index 562c7e5b..3159508d 100644 --- a/tractor/_spawn.py +++ b/tractor/_spawn.py @@ -327,9 +327,10 @@ async def soft_kill( uid: tuple[str, str] = portal.channel.uid try: log.cancel( - 'Soft killing sub-actor via portal request\n' - f'c)> {portal.chan.uid}\n' - f' |_{proc}\n' + f'Soft killing sub-actor via portal request\n' + f'\n' + f'(c=> {portal.chan.uid}\n' + f' |_{proc}\n' ) # wait on sub-proc to signal termination await wait_func(proc) -- 2.34.1 From a25f093ba5c82302e3361f76735f131e6aceedee Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Feb 2025 13:49:14 -0500 Subject: [PATCH 251/305] Disable tb colors in `._testing.mk_cmd()` Unset the appropriate cpython osenv var such that our `pexpect` script runs in the test suite can maintain original matching logic. --- tractor/_testing/__init__.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/tractor/_testing/__init__.py b/tractor/_testing/__init__.py index 1f6624e9..43507c33 100644 --- a/tractor/_testing/__init__.py +++ b/tractor/_testing/__init__.py @@ -19,7 +19,10 @@ Various helpers/utils for auditing your `tractor` app and/or the core runtime. ''' -from contextlib import asynccontextmanager as acm +from contextlib import ( + asynccontextmanager as acm, +) +import os import pathlib import tractor @@ -59,7 +62,12 @@ def mk_cmd( exs_subpath: str = 'debugging', ) -> str: ''' - Generate a shell command suitable to pass to ``pexpect.spawn()``. + Generate a shell command suitable to pass to `pexpect.spawn()` + which runs the script as a python program's entrypoint. + + In particular ensure we disable the new tb coloring via unsetting + `$PYTHON_COLORS` so that `pexpect` can pattern match without + color-escape-codes. ''' script_path: pathlib.Path = ( @@ -67,10 +75,15 @@ def mk_cmd( / exs_subpath / f'{ex_name}.py' ) - return ' '.join([ + py_cmd: str = ' '.join([ 'python', str(script_path) ]) + # XXX, required for py 3.13+ + # https://docs.python.org/3/using/cmdline.html#using-on-controlling-color + # https://docs.python.org/3/using/cmdline.html#envvar-PYTHON_COLORS + os.environ['PYTHON_COLORS'] = '0' + return py_cmd @acm -- 2.34.1 From 3a9a15ceb2542966acb73bbe7409dd2a538d06f8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Feb 2025 18:06:06 -0500 Subject: [PATCH 252/305] A couple more loose-egs flag flips Namely inside, - `ActorNursery.open_portal()` which uses `.trionics.maybe_open_nursery()` and is now adjusted to pass-through `**kwargs` for at least this flag. - inside the `.trionics.gather_contexts()`. --- tractor/_portal.py | 4 ++++ tractor/trionics/_broadcast.py | 2 +- tractor/trionics/_mngrs.py | 13 ++++++++++--- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/tractor/_portal.py b/tractor/_portal.py index f5a66836..7fbf69b2 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -533,6 +533,10 @@ async def open_portal( async with maybe_open_nursery( tn, shield=shield, + strict_exception_groups=False, + # ^XXX^ TODO? soo roll our own then ?? + # -> since we kinda want the "if only one `.exception` then + # just raise that" interface? ) as tn: if not channel.connected(): diff --git a/tractor/trionics/_broadcast.py b/tractor/trionics/_broadcast.py index 154b037d..2286e70d 100644 --- a/tractor/trionics/_broadcast.py +++ b/tractor/trionics/_broadcast.py @@ -15,7 +15,7 @@ # along with this program. If not, see . ''' -``tokio`` style broadcast channel. +`tokio` style broadcast channel. https://docs.rs/tokio/1.11.0/tokio/sync/broadcast/index.html ''' diff --git a/tractor/trionics/_mngrs.py b/tractor/trionics/_mngrs.py index fd224d65..9a5ed156 100644 --- a/tractor/trionics/_mngrs.py +++ b/tractor/trionics/_mngrs.py @@ -57,6 +57,8 @@ async def maybe_open_nursery( shield: bool = False, lib: ModuleType = trio, + **kwargs, # proxy thru + ) -> AsyncGenerator[trio.Nursery, Any]: ''' Create a new nursery if None provided. @@ -67,7 +69,7 @@ async def maybe_open_nursery( if nursery is not None: yield nursery else: - async with lib.open_nursery() as nursery: + async with lib.open_nursery(**kwargs) as nursery: nursery.cancel_scope.shield = shield yield nursery @@ -143,9 +145,14 @@ async def gather_contexts( 'Use a non-lazy iterator or sequence type intead!' ) - async with trio.open_nursery() as n: + async with trio.open_nursery( + strict_exception_groups=False, + # ^XXX^ TODO? soo roll our own then ?? + # -> since we kinda want the "if only one `.exception` then + # just raise that" interface? + ) as tn: for mngr in mngrs: - n.start_soon( + tn.start_soon( _enter_and_wait, mngr, unwrapped, -- 2.34.1 From cd1628e3a32b0ecfbf6f39f8ed0efb906b75b32f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Feb 2025 18:21:19 -0500 Subject: [PATCH 253/305] Handle egs on failed `request_root_stdio_lock()` Namely when the subactor fails to lock the root, in which case we try to be very verbose about how/what failed in logging as well as ensure we cancel the employed IPC ctx. Implement the outer `BaseException` handler to handle both styles, - match on an eg (or the prior std cancel excs) only raising a lone sub-exc from for former. - always `as _req_err:` and assign to a new func-global `req_err` to enable the above matching. Other, - raise `DebugStateError` on `status.subactor_uid != actor_uid`. - fix a `_repl_fail_report` ref error due to making silly assumptions about the `_repl_fail_msg` global; now copy from global as default. - various log-fmt and logic expression styling tweaks. - ignore `trio.Cancelled` by default in `open_crash_handler()`. --- tractor/devx/_debug.py | 137 +++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 53 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 04df000f..884c5aea 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -317,8 +317,6 @@ class Lock: we_released: bool = False ctx_in_debug: Context|None = cls.ctx_in_debug repl_task: Task|Thread|None = DebugStatus.repl_task - message: str = '' - try: if not DebugStatus.is_main_trio_thread(): thread: threading.Thread = threading.current_thread() @@ -333,6 +331,10 @@ class Lock: return False task: Task = current_task() + message: str = ( + 'TTY NOT RELEASED on behalf of caller\n' + f'|_{task}\n' + ) # sanity check that if we're the root actor # the lock is marked as such. @@ -347,11 +349,6 @@ class Lock: else: assert DebugStatus.repl_task is not task - message: str = ( - 'TTY lock was NOT released on behalf of caller\n' - f'|_{task}\n' - ) - lock: trio.StrictFIFOLock = cls._debug_lock owner: Task = lock.statistics().owner if ( @@ -366,23 +363,21 @@ class Lock: # correct task, greenback-spawned-task and/or thread # being set to the `.repl_task` such that the above # condition matches and we actually release the lock. + # # This is particular of note from `.pause_from_sync()`! - ): cls._debug_lock.release() we_released: bool = True if repl_task: message: str = ( - 'Lock released on behalf of root-actor-local REPL owner\n' + 'TTY released on behalf of root-actor-local REPL owner\n' f'|_{repl_task}\n' ) else: message: str = ( - 'TTY lock released by us on behalf of remote peer?\n' - f'|_ctx_in_debug: {ctx_in_debug}\n\n' + 'TTY released by us on behalf of remote peer?\n' + f'{ctx_in_debug}\n' ) - # mk_pdb().set_trace() - # elif owner: except RuntimeError as rte: log.exception( @@ -400,7 +395,8 @@ class Lock: req_handler_finished: trio.Event|None = Lock.req_handler_finished if ( not lock_stats.owner - and req_handler_finished is None + and + req_handler_finished is None ): message += ( '-> No new task holds the TTY lock!\n\n' @@ -418,8 +414,8 @@ class Lock: repl_task ) message += ( - f'A non-caller task still owns this lock on behalf of ' - f'`{behalf_of_task}`\n' + f'A non-caller task still owns this lock on behalf of\n' + f'{behalf_of_task}\n' f'lock owner task: {lock_stats.owner}\n' ) @@ -447,8 +443,6 @@ class Lock: if message: log.devx(message) - else: - import pdbp; pdbp.set_trace() return we_released @@ -668,10 +662,11 @@ async def lock_stdio_for_peer( fail_reason: str = ( f'on behalf of peer\n\n' f'x)<=\n' - f' |_{subactor_task_uid!r}@{ctx.chan.uid!r}\n\n' - + f' |_{subactor_task_uid!r}@{ctx.chan.uid!r}\n' + f'\n' 'Forcing `Lock.release()` due to acquire failure!\n\n' - f'x)=> {ctx}\n' + f'x)=>\n' + f' {ctx}' ) if isinstance(req_err, trio.Cancelled): fail_reason = ( @@ -1179,7 +1174,7 @@ async def request_root_stdio_lock( log.devx( 'Initing stdio-lock request task with root actor' ) - # TODO: likely we can implement this mutex more generally as + # TODO: can we implement this mutex more generally as # a `._sync.Lock`? # -[ ] simply add the wrapping needed for the debugger specifics? # - the `__pld_spec__` impl and maybe better APIs for the client @@ -1190,6 +1185,7 @@ async def request_root_stdio_lock( # - https://docs.python.org/3.8/library/multiprocessing.html#multiprocessing.RLock DebugStatus.req_finished = trio.Event() DebugStatus.req_task = current_task() + req_err: BaseException|None = None try: from tractor._discovery import get_root # NOTE: we need this to ensure that this task exits @@ -1212,6 +1208,7 @@ async def request_root_stdio_lock( # ) DebugStatus.req_cs = req_cs req_ctx: Context|None = None + ctx_eg: BaseExceptionGroup|None = None try: # TODO: merge into single async with ? async with get_root() as portal: @@ -1242,7 +1239,12 @@ async def request_root_stdio_lock( ) # try: - assert status.subactor_uid == actor_uid + if (locker := status.subactor_uid) != actor_uid: + raise DebugStateError( + f'Root actor locked by another peer !?\n' + f'locker: {locker!r}\n' + f'actor_uid: {actor_uid}\n' + ) assert status.cid # except AttributeError: # log.exception('failed pldspec asserts!') @@ -1279,10 +1281,11 @@ async def request_root_stdio_lock( f'Exitting {req_ctx.side!r}-side of locking req_ctx\n' ) - except ( + except* ( tractor.ContextCancelled, trio.Cancelled, - ): + ) as _taskc_eg: + ctx_eg = _taskc_eg log.cancel( 'Debug lock request was CANCELLED?\n\n' f'<=c) {req_ctx}\n' @@ -1291,21 +1294,23 @@ async def request_root_stdio_lock( ) raise - except ( + except* ( BaseException, - ) as ctx_err: + ) as _ctx_eg: + ctx_eg = _ctx_eg message: str = ( - 'Failed during debug request dialog with root actor?\n\n' + 'Failed during debug request dialog with root actor?\n' ) if (req_ctx := DebugStatus.req_ctx): message += ( - f'<=x) {req_ctx}\n\n' + f'<=x)\n' + f' |_{req_ctx}\n' f'Cancelling IPC ctx!\n' ) try: await req_ctx.cancel() except trio.ClosedResourceError as terr: - ctx_err.add_note( + ctx_eg.add_note( # f'Failed with {type(terr)!r} x)> `req_ctx.cancel()` ' f'Failed with `req_ctx.cancel()` bool: actor: Actor = current_actor() if not is_root_process(): - raise RuntimeError('This is a root-actor only API!') + raise InternalError('This is a root-actor only API!') if ( (ctx := Lock.ctx_in_debug) @@ -2143,11 +2172,12 @@ async def _pause( # `_enter_repl_sync()` into a common @cm? except BaseException as _pause_err: pause_err: BaseException = _pause_err + _repl_fail_report: str|None = _repl_fail_msg if isinstance(pause_err, bdb.BdbQuit): log.devx( 'REPL for pdb was explicitly quit!\n' ) - _repl_fail_msg = None + _repl_fail_report = None # when the actor is mid-runtime cancellation the # `Actor._service_n` might get closed before we can spawn @@ -2167,16 +2197,16 @@ async def _pause( return elif isinstance(pause_err, trio.Cancelled): - _repl_fail_msg = ( + _repl_fail_report += ( 'You called `tractor.pause()` from an already cancelled scope!\n\n' 'Consider `await tractor.pause(shield=True)` to make it work B)\n' ) else: - _repl_fail_msg += f'on behalf of {repl_task} ??\n' + _repl_fail_report += f'on behalf of {repl_task} ??\n' - if _repl_fail_msg: - log.exception(_repl_fail_msg) + if _repl_fail_report: + log.exception(_repl_fail_report) if not actor.is_infected_aio(): DebugStatus.release(cancel_req_task=True) @@ -3051,7 +3081,8 @@ async def maybe_wait_for_debugger( if ( not debug_mode() - and not child_in_debug + and + not child_in_debug ): return False @@ -3109,7 +3140,7 @@ async def maybe_wait_for_debugger( logmeth( msg + - '\nRoot is waiting on tty lock to release from\n\n' + '\n^^ Root is waiting on tty lock release.. ^^\n' # f'{caller_frame_info}\n' ) @@ -3172,11 +3203,11 @@ async def maybe_wait_for_debugger( @cm def open_crash_handler( catch: set[BaseException] = { - # Exception, BaseException, }, ignore: set[BaseException] = { KeyboardInterrupt, + trio.Cancelled, }, tb_hide: bool = True, ): -- 2.34.1 From 04bc7cbfa466cb85e0d33fcd9b1cc0e46e5460b1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 26 Feb 2025 18:37:43 -0500 Subject: [PATCH 254/305] Another loose-egs flag in `test_child_manages_service_nursery` --- tests/test_child_manages_service_nursery.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_child_manages_service_nursery.py b/tests/test_child_manages_service_nursery.py index 21fb3920..956fccd2 100644 --- a/tests/test_child_manages_service_nursery.py +++ b/tests/test_child_manages_service_nursery.py @@ -117,7 +117,9 @@ async def open_actor_local_nursery( ctx: tractor.Context, ): global _nursery - async with trio.open_nursery() as n: + async with trio.open_nursery( + strict_exception_groups=False, + ) as n: _nursery = n await ctx.started() await trio.sleep(10) -- 2.34.1 From 266d8e0febe6a263616640a2403d7ca3fe928cec Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 11:14:32 -0500 Subject: [PATCH 255/305] Expose `._state.debug_mode()` predicate at top level --- tractor/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tractor/__init__.py b/tractor/__init__.py index a27a3b59..6ddbf199 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -44,6 +44,7 @@ from ._state import ( current_actor as current_actor, is_root_process as is_root_process, current_ipc_ctx as current_ipc_ctx, + debug_mode as debug_mode ) from ._exceptions import ( ContextCancelled as ContextCancelled, @@ -66,3 +67,4 @@ from ._root import ( from ._ipc import Channel as Channel from ._portal import Portal as Portal from ._runtime import Actor as Actor +from . import hilevel as hilevel -- 2.34.1 From 08fa266de48d62edb485f58a13b5df5030f076cc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 1 Mar 2025 21:25:05 -0500 Subject: [PATCH 256/305] Add per-side graceful-exit/cancel excs-as-signals Such that any combination of task terminations/exits can be explicitly handled and "dual side independent" crash cases re-raised in egs. The main error-or-exit impl changes include, - use of new per-side "signaling exceptions": - TrioTaskExited|TrioCancelled for signalling aio. - AsyncioTaskExited|AsyncioCancelled for signalling trio. - NOT overloading the `LinkedTaskChannel._trio/aio_err` fields for err-as-signal relay and instead add a new pair of `._trio/aio_to_raise` maybe-exc-attrs which allow each side's task to specify what it would want the other side to raise to signal its/a termination outcome: - `._trio_to_raise: AsyncioTaskExited|AsyncioCancelled` to signal, |_ the aio task having returned while the trio side was still reading from the `asyncio.Queue` or is just not `.done()`. |_ the aio task being self or trio-request cancelled where a `asyncio.CancelledError` is raised and caught but NOT relayed as is back to trio; instead signal a "more explicit" exc type. - `._aio_to_raise: TrioTaskExited|TrioCancelled` to signal, |_ the trio task having returned while the aio side was still reading from the mem chan and indicating that the trio side might not care any more about future streamed values (like the `Stop/EndOfChannel` equivs for ipc `Context`s). |_ when the trio task canceld we do a `asyncio.Future.set_exception(TrioTaskExited())` to indicate to the aio side verbosely that it should cancel due to the trio parent. - `_aio/trio_err` are now left to only capturing the **actual** per-side task excs for introspection / other side's handling logic. - supporting "graceful exits" depending on API in use from `translate_aio_errors()` such that if either side exits but the other side isn't expect to consume the final `return`ed value, we just exit silently, which required: - adding a `suppress_graceful_exits: bool` flag. - adjusting the `maybe_raise_aio_side_err()` logic to use that flag and suppress only on certain combos of `._trio_to_raise/._trio_err`. - prefer to raise `._trio_to_raise` when the aio-side is the src and vice versa. - filling out pedantic logging for cancellation cases indicating which side is the cause. - add a `LinkedTaskChannel._aio_result` modelled after our `Context._result` a a similar `.wait_for_result()` interface which allows maybe accessing the aio task's final return value if desired when using the `open_channel_from()` API. - rename `cancel_trio()` done handler -> `signal_trio_when_done()` Also some fairly major test suite updates, - add a `delay: int` producing fixture which delivers a much larger timeout whenever `debug_mode` is set so that the REPL can be used without a surrounding cancel firing. - add a new `test_aio_exits_early_relays_AsyncioTaskExited` including a paired `exit_early: bool` flag to `push_from_aio_task()`. - adjust `test_trio_closes_early_causes_aio_checkpoint_raise` to expect a `to_asyncio.TrioTaskExited`. --- tests/test_infected_asyncio.py | 237 ++++++++--- tractor/_exceptions.py | 52 ++- tractor/to_asyncio.py | 740 ++++++++++++++++++++++++--------- 3 files changed, 775 insertions(+), 254 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index d462f59d..77877568 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -32,6 +32,17 @@ from tractor.trionics import BroadcastReceiver from tractor._testing import expect_ctxc +@pytest.fixture( + scope='module', + # autouse=True, +) +def delay(debug_mode: bool) -> int: + if debug_mode: + return 999 + else: + return 1 + + async def sleep_and_err( sleep_for: float = 0.1, @@ -59,20 +70,24 @@ async def trio_cancels_single_aio_task(): await tractor.to_asyncio.run_task(aio_sleep_forever) -def test_trio_cancels_aio_on_actor_side(reg_addr): +def test_trio_cancels_aio_on_actor_side( + reg_addr: tuple[str, int], + delay: int, +): ''' Spawn an infected actor that is cancelled by the ``trio`` side task using std cancel scope apis. ''' async def main(): - async with tractor.open_nursery( - registry_addrs=[reg_addr] - ) as n: - await n.run_in_actor( - trio_cancels_single_aio_task, - infect_asyncio=True, - ) + with trio.fail_after(1 + delay): + async with tractor.open_nursery( + registry_addrs=[reg_addr] + ) as n: + await n.run_in_actor( + trio_cancels_single_aio_task, + infect_asyncio=True, + ) trio.run(main) @@ -116,7 +131,9 @@ async def asyncio_actor( raise -def test_aio_simple_error(reg_addr): +def test_aio_simple_error( + reg_addr: tuple[str, int], +): ''' Verify a simple remote asyncio error propagates back through trio to the parent actor. @@ -153,7 +170,9 @@ def test_aio_simple_error(reg_addr): assert err.boxed_type is AssertionError -def test_tractor_cancels_aio(reg_addr): +def test_tractor_cancels_aio( + reg_addr: tuple[str, int], +): ''' Verify we can cancel a spawned asyncio task gracefully. @@ -172,7 +191,9 @@ def test_tractor_cancels_aio(reg_addr): trio.run(main) -def test_trio_cancels_aio(reg_addr): +def test_trio_cancels_aio( + reg_addr: tuple[str, int], +): ''' Much like the above test with ``tractor.Portal.cancel_actor()`` except we just use a standard ``trio`` cancellation api. @@ -203,7 +224,8 @@ async def trio_ctx( # this will block until the ``asyncio`` task sends a "first" # message. - with trio.fail_after(2): + delay: int = 999 if tractor.debug_mode() else 1 + with trio.fail_after(1 + delay): try: async with ( trio.open_nursery( @@ -239,7 +261,8 @@ async def trio_ctx( ids='parent_actor_cancels_child={}'.format ) def test_context_spawns_aio_task_that_errors( - reg_addr, + reg_addr: tuple[str, int], + delay: int, parent_cancels: bool, ): ''' @@ -249,7 +272,7 @@ def test_context_spawns_aio_task_that_errors( ''' async def main(): - with trio.fail_after(2): + with trio.fail_after(1 + delay): async with tractor.open_nursery() as n: p = await n.start_actor( 'aio_daemon', @@ -322,11 +345,12 @@ async def aio_cancel(): def test_aio_cancelled_from_aio_causes_trio_cancelled( reg_addr: tuple, + delay: int, ): ''' - When the `asyncio.Task` cancels itself the `trio` side cshould + When the `asyncio.Task` cancels itself the `trio` side should also cancel and teardown and relay the cancellation cross-process - to the caller (parent). + to the parent caller. ''' async def main(): @@ -342,7 +366,7 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled( # NOTE: normally the `an.__aexit__()` waits on the # portal's result but we do it explicitly here # to avoid indent levels. - with trio.fail_after(1): + with trio.fail_after(1 + delay): await p.wait_for_result() with pytest.raises( @@ -353,11 +377,10 @@ def test_aio_cancelled_from_aio_causes_trio_cancelled( # might get multiple `trio.Cancelled`s as well inside an inception err: RemoteActorError|ExceptionGroup = excinfo.value if isinstance(err, ExceptionGroup): - err = next(itertools.dropwhile( - lambda exc: not isinstance(exc, tractor.RemoteActorError), - err.exceptions - )) - assert err + excs = err.exceptions + assert len(excs) == 1 + final_exc = excs[0] + assert isinstance(final_exc, tractor.RemoteActorError) # relayed boxed error should be our `trio`-task's # cancel-signal-proxy-equivalent of `asyncio.CancelledError`. @@ -370,15 +393,18 @@ async def no_to_trio_in_args(): async def push_from_aio_task( - sequence: Iterable, to_trio: trio.abc.SendChannel, expect_cancel: False, fail_early: bool, + exit_early: bool, ) -> None: try: + # print('trying breakpoint') + # breakpoint() + # sync caller ctx manager to_trio.send_nowait(True) @@ -387,10 +413,27 @@ async def push_from_aio_task( to_trio.send_nowait(i) await asyncio.sleep(0.001) - if i == 50 and fail_early: - raise Exception + if ( + i == 50 + ): + if fail_early: + print('Raising exc from aio side!') + raise Exception - print('asyncio streamer complete!') + if exit_early: + # TODO? really you could enforce the same + # SC-proto we use for actors here with asyncio + # such that a Return[None] msg would be + # implicitly delivered to the trio side? + # + # XXX => this might be the end-all soln for + # converting any-inter-task system (regardless + # of maybe-remote runtime or language) to be + # SC-compat no? + print(f'asyncio breaking early @ {i!r}') + break + + print('asyncio streaming complete!') except asyncio.CancelledError: if not expect_cancel: @@ -402,9 +445,10 @@ async def push_from_aio_task( async def stream_from_aio( - exit_early: bool = False, - raise_err: bool = False, + trio_exit_early: bool = False, + trio_raise_err: bool = False, aio_raise_err: bool = False, + aio_exit_early: bool = False, fan_out: bool = False, ) -> None: @@ -417,8 +461,17 @@ async def stream_from_aio( async with to_asyncio.open_channel_from( push_from_aio_task, sequence=seq, - expect_cancel=raise_err or exit_early, + expect_cancel=trio_raise_err or trio_exit_early, fail_early=aio_raise_err, + exit_early=aio_exit_early, + + # such that we can test exit early cases + # for each side explicitly. + suppress_graceful_exits=(not( + aio_exit_early + or + trio_exit_early + )) ) as (first, chan): @@ -435,9 +488,9 @@ async def stream_from_aio( pulled.append(value) if value == 50: - if raise_err: + if trio_raise_err: raise Exception - elif exit_early: + elif trio_exit_early: print('`consume()` breaking early!\n') break @@ -471,10 +524,14 @@ async def stream_from_aio( finally: - if ( - not raise_err and - not exit_early and - not aio_raise_err + if not ( + trio_raise_err + or + trio_exit_early + or + aio_raise_err + or + aio_exit_early ): if fan_out: # we get double the pulled values in the @@ -484,6 +541,7 @@ async def stream_from_aio( assert list(sorted(pulled)) == expect else: + # await tractor.pause() assert pulled == expect else: assert not fan_out @@ -497,7 +555,10 @@ async def stream_from_aio( 'fan_out', [False, True], ids='fan_out_w_chan_subscribe={}'.format ) -def test_basic_interloop_channel_stream(reg_addr, fan_out): +def test_basic_interloop_channel_stream( + reg_addr: tuple[str, int], + fan_out: bool, +): async def main(): async with tractor.open_nursery() as n: portal = await n.run_in_actor( @@ -517,7 +578,7 @@ def test_trio_error_cancels_intertask_chan(reg_addr): async with tractor.open_nursery() as n: portal = await n.run_in_actor( stream_from_aio, - raise_err=True, + trio_raise_err=True, infect_asyncio=True, ) # should trigger remote actor error @@ -530,42 +591,114 @@ def test_trio_error_cancels_intertask_chan(reg_addr): excinfo.value.boxed_type is Exception -def test_trio_closes_early_and_channel_exits( +def test_trio_closes_early_causes_aio_checkpoint_raise( reg_addr: tuple[str, int], + delay: int, ): ''' - Check that if the `trio`-task "exits early" on `async for`ing the - inter-task-channel (via a `break`) we exit silently from the - `open_channel_from()` block and get a final `Return[None]` msg. + Check that if the `trio`-task "exits early and silently" (in this + case during `async for`-ing the inter-task-channel via + a `break`-from-loop), we raise `TrioTaskExited` on the + `asyncio`-side which also then bubbles up through the + `open_channel_from()` block indicating that the `asyncio.Task` + hit a ran another checkpoint despite the `trio.Task` exit. ''' async def main(): - with trio.fail_after(2): + with trio.fail_after(1 + delay): async with tractor.open_nursery( # debug_mode=True, # enable_stack_on_sig=True, ) as n: portal = await n.run_in_actor( stream_from_aio, - exit_early=True, + trio_exit_early=True, infect_asyncio=True, ) # should raise RAE diectly print('waiting on final infected subactor result..') res: None = await portal.wait_for_result() assert res is None - print('infected subactor returned result: {res!r}\n') + print(f'infected subactor returned result: {res!r}\n') # should be a quiet exit on a simple channel exit - trio.run( - main, - # strict_exception_groups=False, - ) + with pytest.raises(RemoteActorError) as excinfo: + trio.run(main) + + # ensure remote error is an explicit `AsyncioCancelled` sub-type + # which indicates to the aio task that the trio side exited + # silently WITHOUT raising a `trio.Cancelled` (which would + # normally be raised instead as a `AsyncioCancelled`). + excinfo.value.boxed_type is to_asyncio.TrioTaskExited -def test_aio_errors_and_channel_propagates_and_closes(reg_addr): +def test_aio_exits_early_relays_AsyncioTaskExited( + # TODO, parametrize the 3 possible trio side conditions: + # - trio blocking on receive, aio exits early + # - trio cancelled AND aio exits early on its next tick + # - trio errors AND aio exits early on its next tick + reg_addr: tuple[str, int], + debug_mode: bool, + delay: int, +): + ''' + Check that if the `asyncio`-task "exits early and silently" (in this + case during `push_from_aio_task()` pushing to the `InterLoopTaskChannel` + it `break`s from the loop), we raise `AsyncioTaskExited` on the + `trio`-side which then DOES NOT BUBBLE up through the + `open_channel_from()` block UNLESS, + + - the trio.Task also errored/cancelled, in which case we wrap + both errors in an eg + - the trio.Task was blocking on rxing a value from the + `InterLoopTaskChannel`. + + ''' async def main(): - async with tractor.open_nursery() as n: + with trio.fail_after(1 + delay): + async with tractor.open_nursery( + debug_mode=debug_mode, + # enable_stack_on_sig=True, + ) as an: + portal = await an.run_in_actor( + stream_from_aio, + infect_asyncio=True, + trio_exit_early=False, + aio_exit_early=True, + ) + # should raise RAE diectly + print('waiting on final infected subactor result..') + res: None = await portal.wait_for_result() + assert res is None + print(f'infected subactor returned result: {res!r}\n') + + # should be a quiet exit on a simple channel exit + with pytest.raises(RemoteActorError) as excinfo: + trio.run(main) + + exc = excinfo.value + + # TODO, wow bug! + # -[ ] bp handler not replaced!?!? + # breakpoint() + + # import pdbp; pdbp.set_trace() + + # ensure remote error is an explicit `AsyncioCancelled` sub-type + # which indicates to the aio task that the trio side exited + # silently WITHOUT raising a `trio.Cancelled` (which would + # normally be raised instead as a `AsyncioCancelled`). + assert exc.boxed_type is to_asyncio.AsyncioTaskExited + + +def test_aio_errors_and_channel_propagates_and_closes( + reg_addr: tuple[str, int], + debug_mode: bool, +): + async def main(): + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as n: portal = await n.run_in_actor( stream_from_aio, aio_raise_err=True, @@ -852,6 +985,8 @@ def test_sigint_closes_lifetime_stack( ''' async def main(): + + delay = 999 if tractor.debug_mode() else 1 try: an: tractor.ActorNursery async with tractor.open_nursery( @@ -902,7 +1037,7 @@ def test_sigint_closes_lifetime_stack( if wait_for_ctx: print('waiting for ctx outcome in parent..') try: - with trio.fail_after(1): + with trio.fail_after(1 + delay): await ctx.wait_for_result() except tractor.ContextCancelled as ctxc: assert ctxc.canceller == ctx.chan.uid diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index b4386db0..3382be10 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -82,6 +82,39 @@ class InternalError(RuntimeError): ''' +class AsyncioCancelled(Exception): + ''' + Asyncio cancelled translation (non-base) error + for use with the ``to_asyncio`` module + to be raised in the ``trio`` side task + + NOTE: this should NOT inherit from `asyncio.CancelledError` or + tests should break! + + ''' + + +class AsyncioTaskExited(Exception): + ''' + asyncio.Task "exited" translation error for use with the + `to_asyncio` APIs to be raised in the `trio` side task indicating + on `.run_task()`/`.open_channel_from()` exit that the aio side + exited early/silently. + + ''' + +class TrioTaskExited(AsyncioCancelled): + ''' + The `trio`-side task exited without explicitly cancelling the + `asyncio.Task` peer. + + This is very similar to how `trio.ClosedResource` acts as + a "clean shutdown" signal to the consumer side of a mem-chan, + + https://trio.readthedocs.io/en/stable/reference-core.html#clean-shutdown-with-channels + + ''' + # NOTE: more or less should be close to these: # 'boxed_type', @@ -127,8 +160,8 @@ _body_fields: list[str] = list( def get_err_type(type_name: str) -> BaseException|None: ''' - Look up an exception type by name from the set of locally - known namespaces: + Look up an exception type by name from the set of locally known + namespaces: - `builtins` - `tractor._exceptions` @@ -358,6 +391,13 @@ class RemoteActorError(Exception): self._ipc_msg.src_type_str ) + if not self._src_type: + raise TypeError( + f'Failed to lookup src error type with ' + f'`tractor._exceptions.get_err_type()` :\n' + f'{self.src_type_str}' + ) + return self._src_type @property @@ -652,16 +692,10 @@ class RemoteActorError(Exception): failing actor's remote env. ''' - src_type_ref: Type[BaseException] = self.src_type - if not src_type_ref: - raise TypeError( - 'Failed to lookup src error type:\n' - f'{self.src_type_str}' - ) - # TODO: better tb insertion and all the fancier dunder # metadata stuff as per `.__context__` etc. and friends: # https://github.com/python-trio/trio/issues/611 + src_type_ref: Type[BaseException] = self.src_type return src_type_ref(self.tb_str) # TODO: local recontruction of nested inception for a given diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 75dfb5cb..f65cc7ef 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -20,7 +20,12 @@ Infection apis for ``asyncio`` loops running ``trio`` using guest mode. ''' from __future__ import annotations import asyncio -from asyncio.exceptions import CancelledError +from asyncio.exceptions import ( + CancelledError, +) +from asyncio import ( + QueueShutDown, +) from contextlib import asynccontextmanager as acm from dataclasses import dataclass import inspect @@ -34,12 +39,18 @@ from typing import ( import tractor from tractor._exceptions import ( + InternalError, is_multi_cancelled, + TrioTaskExited, + TrioCancelled, + AsyncioTaskExited, + AsyncioCancelled, ) from tractor._state import ( debug_mode, _runtime_vars, ) +from tractor._context import Unresolved from tractor.devx import _debug from tractor.log import ( get_logger, @@ -69,6 +80,21 @@ __all__ = [ ] +# TODO, generally speaking we can generalize this abstraction, a "SC linked +# parent->child task pair", as the same "supervision scope primitive" +# **that is** our `._context.Context` with the only difference being +# in how the tasks conduct msg-passing comms. +# +# For `LinkedTaskChannel` we are passing the equivalent of (once you +# include all the recently added `._trio/aio_to_raise` +# exd-as-signals) our SC-dialog-proto over each asyncIO framework's +# mem-chan impl, +# +# verus in `Context` +# +# We are doing the same thing but msg-passing comms happens over an +# IPC transport between tasks in different memory domains. + @dataclass class LinkedTaskChannel( trio.abc.Channel, @@ -84,18 +110,85 @@ class LinkedTaskChannel( ''' _to_aio: asyncio.Queue _from_aio: trio.MemoryReceiveChannel + _to_trio: trio.MemorySendChannel _trio_cs: trio.CancelScope _trio_task: trio.Task _aio_task_complete: trio.Event + _suppress_graceful_exits: bool = True + _trio_err: BaseException|None = None + _trio_to_raise: ( + AsyncioTaskExited| # aio task exits while trio ongoing + AsyncioCancelled| # aio task is (self-)cancelled + BaseException| + None + ) = None _trio_exited: bool = False - # set after ``asyncio.create_task()`` - # _aio_first: Any|None = None + # set after `asyncio.create_task()` _aio_task: asyncio.Task|None = None _aio_err: BaseException|None = None + _aio_to_raise: ( + TrioTaskExited| # trio task exits while aio ongoing + BaseException| + None + ) = None + # _aio_first: Any|None = None # TODO? + _aio_result: Any|Unresolved = Unresolved + + def _final_result_is_set(self) -> bool: + return self._aio_result is not Unresolved + + # TODO? equiv from `Context`? + # @property + # def has_outcome(self) -> bool: + # return ( + # bool(self.maybe_error) + # or + # self._final_result_is_set() + # ) + + async def wait_for_result( + self, + hide_tb: bool = True, + + ) -> Any: + ''' + Wait for the `asyncio.Task.result()` from `trio` + + ''' + __tracebackhide__: bool = hide_tb + assert self._portal, ( + '`Context.wait_for_result()` can not be called from callee side!' + ) + if self._final_result_is_set(): + return self._aio_result + + async with translate_aio_errors( + chan=self, + wait_aio_task=False, + ): + await self._aio_task_complete.wait() + + if ( + not self._final_result_is_set() + ): + if (trio_to_raise := self._trio_to_raise): + raise trio_to_raise from self._aio_err + + elif aio_err := self._aio_err: + raise aio_err + + else: + raise InternalError( + f'Asyncio-task has no result or error set !?\n' + f'{self._aio_task}' + ) + + return self._aio_result + _broadcaster: BroadcastReceiver|None = None async def aclose(self) -> None: @@ -137,7 +230,9 @@ class LinkedTaskChannel( return await self._from_aio.receive() except BaseException as err: async with translate_aio_errors( - self, + chan=self, + # NOTE, determined by `open_channel_from()` input arg + suppress_graceful_exits=self._suppress_graceful_exits, # XXX: obviously this will deadlock if an on-going stream is # being procesed. @@ -154,8 +249,9 @@ class LinkedTaskChannel( ''' self._to_aio.put_nowait(item) - async def wait_aio_complete(self) -> None: - await self._aio_task_complete.wait() + # TODO? needed? + # async def wait_aio_complete(self) -> None: + # await self._aio_task_complete.wait() def cancel_asyncio_task( self, @@ -208,6 +304,7 @@ def _run_asyncio_task( *, qsize: int = 1, provide_channels: bool = False, + suppress_graceful_exits: bool = True, hide_tb: bool = False, **kwargs, @@ -260,6 +357,7 @@ def _run_asyncio_task( _trio_cs=trio_cs, _trio_task=trio_task, _aio_task_complete=aio_task_complete, + _suppress_graceful_exits=suppress_graceful_exits, ) async def wait_on_coro_final_result( @@ -269,17 +367,16 @@ def _run_asyncio_task( ) -> None: ''' - Await `coro` and relay result back to `trio`. - - This can only be run as an `asyncio.Task`! + Await input `coro` as/in an `asyncio.Task` and deliver final + `return`-ed result back to `trio`. ''' - nonlocal aio_err nonlocal chan orig = result = id(coro) try: - result = await coro + result: Any = await coro + chan._aio_result = result except BaseException as aio_err: chan._aio_err = aio_err if isinstance(aio_err, CancelledError): @@ -291,7 +388,6 @@ def _run_asyncio_task( '`asyncio` task errored\n' ) raise - else: if ( result != orig @@ -306,22 +402,46 @@ def _run_asyncio_task( to_trio.send_nowait(result) finally: - # breakpoint() - # import pdbp; pdbp.set_trace() - # if the task was spawned using `open_channel_from()` # then we close the channels on exit. if provide_channels: + # breakpoint() # TODO! why no work!? + # import pdbp; pdbp.set_trace() + + # IFF there is a blocked trio waiter, we set the + # aio-side error to be an explicit "exited early" + # (much like a `Return` in our SC IPC proto) for the + # `.open_channel_from()` case where the parent trio + # task might not wait directly for a final returned + # result (i.e. the trio side might be waiting on + # a streamed value) - this is a signal that the + # asyncio.Task has returned early! + # + # TODO, solve other cases where trio side might, + # - raise Cancelled but aio side exits on next tick. + # - raise error but aio side exits on next tick. + # - raise error and aio side errors "independently" + # on next tick (SEE draft HANDLER BELOW). + stats: trio.MemoryChannelStatistics = to_trio.statistics() + if ( + stats.tasks_waiting_receive + and + not chan._aio_err + ): + chan._trio_to_raise = AsyncioTaskExited( + f'Task existed with final result\n' + f'{result!r}\n' + ) + # only close the sender side which will relay - # a ``trio.EndOfChannel`` to the trio (consumer) side. + # a `trio.EndOfChannel` to the trio (consumer) side. to_trio.close() aio_task_complete.set() - # await asyncio.sleep(0.1) - log.info( - f'`asyncio` task terminated\n' - f'x)>\n' - f' |_{task}\n' + log.runtime( + f'`asyncio` task completed\n' + f')>\n' + f' |_{task}\n' ) # start the asyncio task we submitted from trio @@ -331,6 +451,7 @@ def _run_asyncio_task( f'{coro!r}' ) + # schedule the (bg) `asyncio.Task` task: asyncio.Task = asyncio.create_task( wait_on_coro_final_result( to_trio, @@ -359,16 +480,36 @@ def _run_asyncio_task( ) greenback.bestow_portal(task) - def cancel_trio( + def signal_trio_when_done( task: asyncio.Task, ) -> None: ''' - Cancel the parent `trio` task on any error raised by the - `asyncio` side. + Maybe-cancel, relay-and-raise an error to, OR pack a final + `return`-value for the parent (in SC terms) `trio.Task` on + completion of the `asyncio.Task`. + + Note for certain "edge" scheduling-race-conditions we allow + the aio side to dictate dedicated `tractor`-defined excs to + be raised in the `trio` parent task; the intention is to + indicate those races in a VERY pedantic manner! ''' nonlocal chan - relayed_aio_err: BaseException|None = chan._aio_err + trio_err: BaseException|None = chan._trio_err + + # XXX, since the original error we read from the asyncio.Task + # might change between BEFORE and AFTER we here call + # `asyncio.Task.result()` + # + # -> THIS is DUE TO US in `translate_aio_errors()`! + # + # => for example we might set a special exc + # (`AsyncioCancelled|AsyncioTaskExited`) meant to be raised + # in trio (and maybe absorbed depending on the called API) + # BEFORE this done-callback is invoked by `asyncio`'s + # runtime. + trio_to_raise: BaseException|None = chan._trio_to_raise + orig_aio_err: BaseException|None = chan._aio_err aio_err: BaseException|None = None # only to avoid `asyncio` complaining about uncaptured @@ -376,24 +517,45 @@ def _run_asyncio_task( try: res: Any = task.result() log.info( - '`trio` received final result from {task}\n' - f'|_{res}\n' + f'`trio` received final result from `asyncio` task,\n' + f')> {res}\n' + f' |_{task}\n' ) + if not chan._aio_result: + chan._aio_result = res + + # ?TODO, should we also raise `AsyncioTaskExited[res]` + # in any case where trio is NOT blocking on the + # `._to_trio` chan? + # + # -> ?NO RIGHT? since the + # `open_channel_from().__aexit__()` should detect this + # and then set any final `res` from above as a field + # that can optionally be read by the trio-paren-task as + # needed (just like in our + # `Context.wait_for_result()/.result` API yah? + # + # if provide_channels: + except BaseException as _aio_err: aio_err: BaseException = _aio_err - # read again AFTER the `asyncio` side errors in case + + # READ AGAIN, AFTER the `asyncio` side errors, in case # it was cancelled due to an error from `trio` (or # some other out of band exc) and then set to something # else? - relayed_aio_err: BaseException|None = chan._aio_err + curr_aio_err: BaseException|None = chan._aio_err # always true right? assert ( - type(_aio_err) is type(relayed_aio_err) + type(aio_err) + is type(orig_aio_err) + is type(curr_aio_err) ), ( f'`asyncio`-side task errors mismatch?!?\n\n' f'(caught) aio_err: {aio_err}\n' - f'chan._aio_err: {relayed_aio_err}\n' + f'ORIG chan._aio_err: {orig_aio_err}\n' + f'chan._aio_err: {curr_aio_err}\n' ) msg: str = ( @@ -401,7 +563,7 @@ def _run_asyncio_task( '{etype_str}\n' # ^NOTE filled in below ) - if isinstance(_aio_err, CancelledError): + if isinstance(aio_err, CancelledError): msg += ( f'c)>\n' f' |_{task}\n' @@ -409,6 +571,28 @@ def _run_asyncio_task( log.cancel( msg.format(etype_str='cancelled') ) + + # XXX when the asyncio.Task exits early (before the trio + # side) we relay through an exc-as-signal which is + # normally suppressed unless the trio.Task also errors + # + # ?TODO, is this even needed (does it happen) now? + elif isinstance(aio_err, QueueShutDown): + # import pdbp; pdbp.set_trace() + trio_err = AsyncioTaskExited( + 'Task exited before `trio` side' + ) + if not chan._trio_err: + chan._trio_err = trio_err + + msg += ( + f')>\n' + f' |_{task}\n' + ) + log.info( + msg.format(etype_str='exited') + ) + else: msg += ( f'x)>\n' @@ -418,13 +602,20 @@ def _run_asyncio_task( msg.format(etype_str='errored') ) + # is trio the src of the aio task's exc-as-outcome? trio_err: BaseException|None = chan._trio_err + curr_aio_err: BaseException|None = chan._aio_err if ( - relayed_aio_err + curr_aio_err or trio_err + or + trio_to_raise ): - # import pdbp; pdbp.set_trace() + # XXX, if not already, ALWAYs cancel the trio-side on an + # aio-side error or early return. In the case where the trio task is + # blocking on a checkpoint or `asyncio.Queue.get()`. + # NOTE: currently mem chan closure may act as a form # of error relay (at least in the `asyncio.CancelledError` # case) since we have no way to directly trigger a `trio` @@ -432,30 +623,18 @@ def _run_asyncio_task( # We might want to change this in the future though. from_aio.close() - # wait, wut? - # aio_err.with_traceback(aio_err.__traceback__) - - # TODO: show when cancellation originated - # from each side more pedantically in log-msg? - # elif ( - # type(aio_err) is CancelledError - # and # trio was the cause? - # trio_cs.cancel_called - # ): - # log.cancel( - # 'infected task was cancelled by `trio`-side' - # ) - # raise aio_err from task_err - - # XXX: if not already, alway cancel the scope on a task - # error in case the trio task is blocking on - # a checkpoint. if ( not trio_cs.cancelled_caught or not trio_cs.cancel_called ): - # import pdbp; pdbp.set_trace() + log.cancel( + f'Cancelling `trio` side due to aio-side src exc\n' + f'{curr_aio_err}\n' + f'\n' + f'(c>\n' + f' |_{trio_task}\n' + ) trio_cs.cancel() # maybe the `trio` task errored independent from the @@ -478,28 +657,36 @@ def _run_asyncio_task( # for reproducing detailed edge cases as per the above # cases. # + trio_to_raise: AsyncioCancelled|AsyncioTaskExited = chan._trio_to_raise + aio_to_raise: TrioTaskExited|TrioCancelled = chan._aio_to_raise if ( + not chan._aio_result + and not trio_cs.cancelled_caught - and - (trio_err := chan._trio_err) - and - type(trio_err) not in { - trio.Cancelled, - } and ( - aio_err - and - type(aio_err) not in { + (aio_err and type(aio_err) not in { asyncio.CancelledError - } + }) + or + aio_to_raise + ) + and ( + ((trio_err := chan._trio_err) and type(trio_err) not in { + trio.Cancelled, + }) + or + trio_to_raise ) ): eg = ExceptionGroup( 'Both the `trio` and `asyncio` tasks errored independently!!\n', - (trio_err, aio_err), + ( + trio_to_raise or trio_err, + aio_to_raise or aio_err, + ), ) - chan._trio_err = eg - chan._aio_err = eg + # chan._trio_err = eg + # chan._aio_err = eg raise eg elif aio_err: @@ -507,45 +694,34 @@ def _run_asyncio_task( # match the one we just caught from the task above! # (that would indicate something weird/very-wrong # going on?) - if aio_err is not relayed_aio_err: - raise aio_err from relayed_aio_err + if ( + aio_err is not trio_to_raise + and ( + not suppress_graceful_exits + and ( + chan._aio_result is not Unresolved + and + isinstance(trio_to_raise, AsyncioTaskExited) + ) + ) + ): + # raise aio_err from relayed_aio_err + raise trio_to_raise from curr_aio_err raise aio_err - task.add_done_callback(cancel_trio) + task.add_done_callback(signal_trio_when_done) return chan -class AsyncioCancelled(Exception): - ''' - Asyncio cancelled translation (non-base) error - for use with the ``to_asyncio`` module - to be raised in the ``trio`` side task - - NOTE: this should NOT inherit from `asyncio.CancelledError` or - tests should break! - - ''' - - -class TrioTaskExited(AsyncioCancelled): - ''' - The `trio`-side task exited without explicitly cancelling the - `asyncio.Task` peer. - - This is very similar to how `trio.ClosedResource` acts as - a "clean shutdown" signal to the consumer side of a mem-chan, - - https://trio.readthedocs.io/en/stable/reference-core.html#clean-shutdown-with-channels - - ''' - - @acm async def translate_aio_errors( chan: LinkedTaskChannel, wait_on_aio_task: bool = False, cancel_aio_task_on_trio_exit: bool = True, + suppress_graceful_exits: bool = True, + + hide_tb: bool = True, ) -> AsyncIterator[None]: ''' @@ -558,17 +734,20 @@ async def translate_aio_errors( appropriately translates errors and cancels into ``trio`` land. ''' + __tracebackhide__: bool = hide_tb + trio_task = trio.lowlevel.current_task() - - aio_err: BaseException|None = None - + aio_err: BaseException|None = chan._aio_err aio_task: asyncio.Task = chan._aio_task + aio_done_before_trio: bool = aio_task.done() assert aio_task trio_err: BaseException|None = None + to_raise_trio: BaseException|None = None try: yield # back to one of the cross-loop apis except trio.Cancelled as taskc: trio_err = taskc + chan._trio_err = trio_err # should NEVER be the case that `trio` is cancel-handling # BEFORE the other side's task-ref was set!? @@ -577,12 +756,12 @@ async def translate_aio_errors( # import pdbp; pdbp.set_trace() # lolevel-debug # relay cancel through to called `asyncio` task - chan._aio_err = AsyncioCancelled( + chan._aio_to_raise = TrioCancelled( f'trio`-side cancelled the `asyncio`-side,\n' f'c)>\n' - f' |_{trio_task}\n\n' - - f'{trio_err!r}\n' + f' |_{trio_task}\n' + f'\n' + f'trio src exc: {trio_err!r}\n' ) # XXX NOTE XXX seems like we can get all sorts of unreliable @@ -595,22 +774,32 @@ async def translate_aio_errors( # ) # raise + # XXX always passthrough EoC since this translator is often + # called from `LinkedTaskChannel.receive()` which we want + # passthrough and further we have no special meaning for it in + # terms of relaying errors or signals from the aio side! + except trio.EndOfChannel: + raise + # NOTE ALSO SEE the matching note in the `cancel_trio()` asyncio # task-done-callback. + # + # when the aio side is (possibly self-)cancelled it will close + # the `chan._to_trio` and thus trigger the trio side to raise + # a dedicated `AsyncioCancelled` except ( trio.ClosedResourceError, - # trio.BrokenResourceError, ) as cre: - trio_err = cre + chan._trio_err = cre aio_err = chan._aio_err - # import pdbp; pdbp.set_trace() # XXX if an underlying `asyncio.CancelledError` triggered # this channel close, raise our (non-`BaseException`) wrapper # exception (`AsyncioCancelled`) from that source error. if ( # aio-side is cancelled? - aio_task.cancelled() # not set until it terminates?? + # |_ first not set until it terminates?? + aio_task.cancelled() and type(aio_err) is CancelledError @@ -618,32 +807,26 @@ async def translate_aio_errors( # silent-exit-by-`trio` case? # -[ ] the parent task can also just catch it though? # -[ ] OR, offer a `signal_aio_side_on_exit=True` ?? - # - # or - # aio_err is None - # and - # chan._trio_exited - ): - raise AsyncioCancelled( + # await tractor.pause(shield=True) + chan._trio_to_raise = AsyncioCancelled( f'asyncio`-side cancelled the `trio`-side,\n' f'c(>\n' f' |_{aio_task}\n\n' - f'{trio_err!r}\n' - ) from aio_err + f'(triggered on the `trio`-side by a {cre!r})\n' + ) + # TODO?? needed or does this just get reraised in the + # `finally:` block below? + # raise to_raise_trio from aio_err # maybe the chan-closure is due to something else? else: - raise + raise cre except BaseException as _trio_err: - # await tractor.pause(shield=True) - trio_err = _trio_err - log.exception( - '`trio`-side task errored?' - ) - + trio_err = chan._trio_err = trio_err + # await tractor.pause(shield=True) # workx! entered: bool = await _debug._maybe_enter_pm( trio_err, api_frame=inspect.currentframe(), @@ -653,89 +836,177 @@ async def translate_aio_errors( and not is_multi_cancelled(trio_err) ): - log.exception('actor crashed\n') + log.exception( + '`trio`-side task errored?' + ) + # __tracebackhide__: bool = False - aio_taskc = AsyncioCancelled( - f'`trio`-side task errored!\n' - f'{trio_err}' - ) #from trio_err + # TODO, just a log msg here indicating the scope closed + # and that the trio-side expects that and what the final + # result from the aio side was? + # + # if isinstance(chan._aio_err, AsyncioTaskExited): + # await tractor.pause(shield=True) - try: - aio_task.set_exception(aio_taskc) - except ( - asyncio.InvalidStateError, - RuntimeError, - # ^XXX, uhh bc apparently we can't use `.set_exception()` - # any more XD .. ?? + # if aio side is still active cancel it due to the trio-side + # error! + # ?TODO, mk `AsyncioCancelled[typeof(trio_err)]` embed the + # current exc? + if ( + # not aio_task.cancelled() + # and + not aio_task.done() # TODO? only need this one? + + # XXX LOL, so if it's not set it's an error !? + # yet another good jerb by `ascyncio`.. + # and + # not aio_task.exception() ): + aio_taskc = TrioCancelled( + f'The `trio`-side task crashed!\n' + f'{trio_err}' + ) + aio_task.set_exception(aio_taskc) wait_on_aio_task = False - - # import pdbp; pdbp.set_trace() - # raise aio_taskc from trio_err + # try: + # aio_task.set_exception(aio_taskc) + # except ( + # asyncio.InvalidStateError, + # RuntimeError, + # # ^XXX, uhh bc apparently we can't use `.set_exception()` + # # any more XD .. ?? + # ): + # wait_on_aio_task = False finally: # record wtv `trio`-side error transpired - chan._trio_err = trio_err - ya_trio_exited: bool = chan._trio_exited + if trio_err: + if chan._trio_err is not trio_err: + await tractor.pause(shield=True) - # NOTE! by default always cancel the `asyncio` task if + # assert chan._trio_err is trio_err + + ya_trio_exited: bool = chan._trio_exited + graceful_trio_exit: bool = ( + ya_trio_exited + and + not chan._trio_err # XXX CRITICAL, `asyncio.Task.cancel()` is cucked man.. + ) + + # XXX NOTE! XXX by default always cancel the `asyncio` task if # we've made it this far and it's not done. # TODO, how to detect if there's an out-of-band error that # caused the exit? if ( - cancel_aio_task_on_trio_exit - and not aio_task.done() - and - aio_err + and ( + cancel_aio_task_on_trio_exit + # and + # chan._aio_err # TODO, if it's not .done() is this possible? - # or the trio side has exited it's surrounding cancel scope - # indicating the lifetime of the ``asyncio``-side task - # should also be terminated. - or ( - ya_trio_exited - and - not chan._trio_err # XXX CRITICAL, `asyncio.Task.cancel()` is cucked man.. + # did the `.open_channel_from()` parent caller already + # (gracefully) exit scope before this translator was + # invoked? + # => since we couple the lifetime of the `asyncio.Task` + # to the `trio` parent task, it should should also be + # terminated via either, + # + # 1. raising an explicit `TrioTaskExited|TrioCancelled` + # in task via `asyncio.Task._fut_waiter.set_exception()` + # + # 2. or (worst case) by cancelling the aio task using + # the std-but-never-working `asyncio.Task.cancel()` + # (which i can't figure out why that nor + # `Task.set_exception()` seem to never ever do the + # rignt thing! XD). + or + graceful_trio_exit ) ): report: str = ( 'trio-side exited silently!' ) - assert not aio_err, 'WTF how did asyncio do this?!' + assert not chan._aio_err, ( + 'WTF why duz asyncio have err but not dun?!' + ) - # if the `trio.Task` already exited the `open_channel_from()` - # block we ensure the asyncio-side gets signalled via an - # explicit exception and its `Queue` is shutdown. + # if the `trio.Task` terminated without raising + # `trio.Cancelled` (curently handled above) there's + # 2 posibilities, + # + # i. it raised a `trio_err` + # ii. it did a "silent exit" where the + # `open_channel_from().__aexit__()` phase ran without + # any raise or taskc (task cancel) and no final result + # was collected (yet) from the aio side. + # + # SO, ensure the asyncio-side is notified and terminated + # by a dedicated exc-as-signal which distinguishes + # various aio-task-state at termination cases. + # + # Consequently if the aio task doesn't absorb said + # exc-as-signal, the trio side should then see the same exc + # propagate up through the .open_channel_from() call to + # the parent task. + # + # if the `trio.Task` already exited (only can happen for + # the `open_channel_from()` use case) block due to to + # either plain ol' graceful `__aexit__()` or due to taskc + # or an error, we ensure the aio-side gets signalled via + # an explicit exception and its `Queue` is shutdown. if ya_trio_exited: + # raise `QueueShutDown` on next `Queue.get()` call on + # aio side. chan._to_aio.shutdown() - # pump the other side's task? needed? + # pump this event-loop (well `Runner` but ya) + # + # TODO? is this actually needed? + # -[ ] theory is this let's the aio side error on + # next tick and then we sync task states from + # here onward? await trio.lowlevel.checkpoint() - # from tractor._state import is_root_process - # if is_root_process(): - # breakpoint() - + # TODO? factor the next 2 branches into a func like + # `try_terminate_aio_task()` and use it for the taskc + # case above as well? + fut: asyncio.Future|None = aio_task._fut_waiter if ( - not chan._trio_err + fut and - (fut := aio_task._fut_waiter) + not fut.done() ): - # await trio.lowlevel.checkpoint() - # import pdbp; pdbp.set_trace() - fut.set_exception( - TrioTaskExited( - f'The peer `asyncio` task is still blocking/running?\n' - f'>>\n' - f'|_{aio_task!r}\n' + # await tractor.pause() + if graceful_trio_exit: + fut.set_exception( + TrioTaskExited( + f'the `trio.Task` gracefully exited but ' + f'its `asyncio` peer is not done?\n' + f')>\n' + f' |_{trio_task}\n' + f'\n' + f'>>\n' + f' |_{aio_task!r}\n' + ) + ) + + # TODO? should this need to exist given the equiv + # `TrioCancelled` equivalent in the be handler + # above?? + else: + fut.set_exception( + TrioTaskExited( + f'The `trio`-side task crashed!\n' + f'{trio_err}' + ) ) - ) else: aio_taskc_warn: str = ( f'\n' f'MANUALLY Cancelling `asyncio`-task: {aio_task.get_name()}!\n\n' f'**THIS CAN SILENTLY SUPPRESS ERRORS FYI\n\n' ) + # await tractor.pause() report += aio_taskc_warn # TODO XXX, figure out the case where calling this makes the # `test_infected_asyncio.py::test_trio_closes_early_and_channel_exits` @@ -745,19 +1016,17 @@ async def translate_aio_errors( log.warning(report) - # Required to sync with the far end `asyncio`-task to ensure - # any error is captured (via monkeypatching the - # `channel._aio_err`) before calling ``maybe_raise_aio_err()`` - # below! + # sync with the `asyncio.Task`'s completion to ensure any + # error is captured and relayed (via + # `channel._aio_err/._trio_to_raise`) BEFORE calling + # `maybe_raise_aio_side_err()` below! # - # XXX NOTE XXX the `task.set_exception(aio_taskc)` call above - # MUST NOT EXCEPT or this WILL HANG!! - # - # so if you get a hang maybe step through and figure out why - # it erroed out up there! + # XXX WARNING NOTE + # the `task.set_exception(aio_taskc)` call above MUST NOT + # EXCEPT or this WILL HANG!! SO, if you get a hang maybe step + # through and figure out why it erroed out up there! # if wait_on_aio_task: - # await chan.wait_aio_complete() await chan._aio_task_complete.wait() log.info( 'asyncio-task is done and unblocked trio-side!\n' @@ -767,6 +1036,8 @@ async def translate_aio_errors( # -[ ] make this a channel method, OR # -[ ] just put back inline below? # + # await tractor.pause(shield=True) + # TODO, go back to inlining this.. def maybe_raise_aio_side_err( trio_err: Exception, ) -> None: @@ -778,31 +1049,86 @@ async def translate_aio_errors( ''' aio_err: BaseException|None = chan._aio_err + trio_to_raise: ( + AsyncioCancelled| + AsyncioTaskExited| + None + ) = chan._trio_to_raise + + if not suppress_graceful_exits: + raise trio_to_raise from (aio_err or trio_err) + + if trio_to_raise: + # import pdbp; pdbp.set_trace() + match ( + trio_to_raise, + trio_err, + ): + case ( + AsyncioTaskExited(), + trio.Cancelled()|None, + ): + log.info( + 'Ignoring aio exit signal since trio also exited!' + ) + return + + case ( + AsyncioCancelled(), + trio.Cancelled(), + ): + if not aio_done_before_trio: + log.info( + 'Ignoring aio cancelled signal since trio was also cancelled!' + ) + return + case _: + raise trio_to_raise from (aio_err or trio_err) # Check if the asyncio-side is the cause of the trio-side # error. - if ( + elif ( aio_err is not None and type(aio_err) is not AsyncioCancelled + # and ( + # type(aio_err) is not AsyncioTaskExited + # and + # not ya_trio_exited + # and + # not trio_err + # ) - # not isinstance(aio_err, CancelledError) - # type(aio_err) is not CancelledError + # TODO, case where trio_err is not None and + # aio_err is AsyncioTaskExited => raise eg! + # -[ ] maybe use a match bc this get's real + # complex fast XD + # + # or + # type(aio_err) is not AsyncioTaskExited + # and + # trio_err + # ) ): # always raise from any captured asyncio error if trio_err: raise trio_err from aio_err + # XXX NOTE! above in the `trio.ClosedResourceError` + # handler we specifically set the + # `aio_err = AsyncioCancelled` such that it is raised + # as that special exc here! raise aio_err if trio_err: raise trio_err + # await tractor.pause() # NOTE: if any ``asyncio`` error was caught, raise it here inline # here in the ``trio`` task # if trio_err: maybe_raise_aio_side_err( - trio_err=trio_err + trio_err=to_raise_trio or trio_err ) @@ -829,20 +1155,24 @@ async def run_task( async with translate_aio_errors( chan, wait_on_aio_task=True, + suppress_graceful_exits=chan._suppress_graceful_exits, ): # return single value that is the output from the - # ``asyncio`` function-as-task. Expect the mem chan api to - # do the job of handling cross-framework cancellations + # ``asyncio`` function-as-task. Expect the mem chan api + # to do the job of handling cross-framework cancellations # / errors via closure and translation in the - # ``translate_aio_errors()`` in the above ctx mngr. - return await chan.receive() + # `translate_aio_errors()` in the above ctx mngr. + + return await chan._from_aio.receive() + # return await chan.receive() @acm async def open_channel_from( target: Callable[..., Any], - **kwargs, + suppress_graceful_exits: bool = True, + **target_kwargs, ) -> AsyncIterator[Any]: ''' @@ -854,13 +1184,15 @@ async def open_channel_from( target, qsize=2**8, provide_channels=True, - **kwargs, + suppress_graceful_exits=suppress_graceful_exits, + **target_kwargs, ) # TODO, tuple form here? async with chan._from_aio: async with translate_aio_errors( chan, wait_on_aio_task=True, + suppress_graceful_exits=suppress_graceful_exits, ): # sync to a "started()"-like first delivered value from the # ``asyncio`` task. @@ -873,17 +1205,37 @@ async def open_channel_from( except trio.Cancelled as taskc: # await tractor.pause(shield=True) # ya it worx ;) if cs.cancel_called: - log.cancel( - f'trio-side was manually cancelled by aio side\n' - f'|_c>}}{cs!r}?\n' - ) + if isinstance(chan._trio_to_raise, AsyncioCancelled): + log.cancel( + f'trio-side was manually cancelled by aio side\n' + f'|_c>}}{cs!r}?\n' + ) # TODO, maybe a special `TrioCancelled`??? raise taskc finally: chan._trio_exited = True - chan._to_trio.close() + + # when the aio side is still ongoing but trio exits + # early we signal with a special exc (kinda like + # a `Return`-msg for IPC ctxs) + aio_task: asyncio.Task = chan._aio_task + if not aio_task.done(): + fut: asyncio.Future|None = aio_task._fut_waiter + if fut: + fut.set_exception( + TrioTaskExited( + f'but the child `asyncio` task is still running?\n' + f'>>\n' + f' |_{aio_task!r}\n' + ) + ) + else: + # XXX SHOULD NEVER HAPPEN! + await tractor.pause() + else: + chan._to_trio.close() class AsyncioRuntimeTranslationError(RuntimeError): -- 2.34.1 From 985c5a4af724a163b1e52b947bf7741695438659 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:11:50 -0500 Subject: [PATCH 257/305] More `debug_mode` test support, better nursery var names --- tests/test_infected_asyncio.py | 64 ++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 77877568..8726ad80 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -34,7 +34,6 @@ from tractor._testing import expect_ctxc @pytest.fixture( scope='module', - # autouse=True, ) def delay(debug_mode: bool) -> int: if debug_mode: @@ -73,6 +72,7 @@ async def trio_cancels_single_aio_task(): def test_trio_cancels_aio_on_actor_side( reg_addr: tuple[str, int], delay: int, + debug_mode: bool, ): ''' Spawn an infected actor that is cancelled by the ``trio`` side @@ -82,9 +82,10 @@ def test_trio_cancels_aio_on_actor_side( async def main(): with trio.fail_after(1 + delay): async with tractor.open_nursery( - registry_addrs=[reg_addr] - ) as n: - await n.run_in_actor( + registry_addrs=[reg_addr], + debug_mode=debug_mode, + ) as an: + await an.run_in_actor( trio_cancels_single_aio_task, infect_asyncio=True, ) @@ -133,6 +134,7 @@ async def asyncio_actor( def test_aio_simple_error( reg_addr: tuple[str, int], + debug_mode: bool, ): ''' Verify a simple remote asyncio error propagates back through trio @@ -142,9 +144,10 @@ def test_aio_simple_error( ''' async def main(): async with tractor.open_nursery( - registry_addrs=[reg_addr] - ) as n: - await n.run_in_actor( + registry_addrs=[reg_addr], + debug_mode=debug_mode, + ) as an: + await an.run_in_actor( asyncio_actor, target='sleep_and_err', expect_err='AssertionError', @@ -172,14 +175,17 @@ def test_aio_simple_error( def test_tractor_cancels_aio( reg_addr: tuple[str, int], + debug_mode: bool, ): ''' Verify we can cancel a spawned asyncio task gracefully. ''' async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + portal = await an.run_in_actor( asyncio_actor, target='aio_sleep_forever', expect_err='trio.Cancelled', @@ -264,6 +270,7 @@ def test_context_spawns_aio_task_that_errors( reg_addr: tuple[str, int], delay: int, parent_cancels: bool, + debug_mode: bool, ): ''' Verify that spawning a task via an intertask channel ctx mngr that @@ -273,12 +280,12 @@ def test_context_spawns_aio_task_that_errors( ''' async def main(): with trio.fail_after(1 + delay): - async with tractor.open_nursery() as n: - p = await n.start_actor( + async with tractor.open_nursery() as an: + p = await an.start_actor( 'aio_daemon', enable_modules=[__name__], infect_asyncio=True, - # debug_mode=True, + debug_mode=debug_mode, loglevel='cancel', ) async with ( @@ -507,11 +514,11 @@ async def stream_from_aio( # tasks are joined.. chan.subscribe() as br, - trio.open_nursery() as n, + trio.open_nursery() as tn, ): # start 2nd task that get's broadcast the same # value set. - n.start_soon(consume, br) + tn.start_soon(consume, br) await consume(chan) else: @@ -560,8 +567,8 @@ def test_basic_interloop_channel_stream( fan_out: bool, ): async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( + async with tractor.open_nursery() as an: + portal = await an.run_in_actor( stream_from_aio, infect_asyncio=True, fan_out=fan_out, @@ -575,8 +582,8 @@ def test_basic_interloop_channel_stream( # TODO: parametrize the above test and avoid the duplication here? def test_trio_error_cancels_intertask_chan(reg_addr): async def main(): - async with tractor.open_nursery() as n: - portal = await n.run_in_actor( + async with tractor.open_nursery() as an: + portal = await an.run_in_actor( stream_from_aio, trio_raise_err=True, infect_asyncio=True, @@ -594,6 +601,7 @@ def test_trio_error_cancels_intertask_chan(reg_addr): def test_trio_closes_early_causes_aio_checkpoint_raise( reg_addr: tuple[str, int], delay: int, + debug_mode: bool, ): ''' Check that if the `trio`-task "exits early and silently" (in this @@ -607,10 +615,10 @@ def test_trio_closes_early_causes_aio_checkpoint_raise( async def main(): with trio.fail_after(1 + delay): async with tractor.open_nursery( - # debug_mode=True, + debug_mode=debug_mode, # enable_stack_on_sig=True, - ) as n: - portal = await n.run_in_actor( + ) as an: + portal = await an.run_in_actor( stream_from_aio, trio_exit_early=True, infect_asyncio=True, @@ -698,8 +706,8 @@ def test_aio_errors_and_channel_propagates_and_closes( async def main(): async with tractor.open_nursery( debug_mode=debug_mode, - ) as n: - portal = await n.run_in_actor( + ) as an: + portal = await an.run_in_actor( stream_from_aio, aio_raise_err=True, infect_asyncio=True, @@ -774,13 +782,15 @@ async def trio_to_aio_echo_server( ids='raise_error={}'.format, ) def test_echoserver_detailed_mechanics( - reg_addr, + reg_addr: tuple[str, int], + debug_mode: bool, raise_error_mid_stream, ): - async def main(): - async with tractor.open_nursery() as n: - p = await n.start_actor( + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + p = await an.start_actor( 'aio_server', enable_modules=[__name__], infect_asyncio=True, -- 2.34.1 From ea0643eab6d95c735c8b74ed46f411d11f367af0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:13:25 -0500 Subject: [PATCH 258/305] Add equiv of `AsyncioCancelled` for aio side Such that a `TrioCancelled` is raised in the aio task via `.set_exception()` to explicitly indicate and allow that task to handle a taskc request from the parent `trio.Task`. --- tractor/_exceptions.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 3382be10..f90df5fe 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -103,7 +103,16 @@ class AsyncioTaskExited(Exception): ''' -class TrioTaskExited(AsyncioCancelled): +class TrioCancelled(Exception): + ''' + Trio cancelled translation (non-base) error + for use with the `to_asyncio` module + to be raised in the `asyncio.Task` to indicate + that the `trio` side raised `Cancelled` or an error. + + ''' + +class TrioTaskExited(Exception): ''' The `trio`-side task exited without explicitly cancelling the `asyncio.Task` peer. @@ -406,6 +415,9 @@ class RemoteActorError(Exception): String-name of the (last hop's) boxed error type. ''' + # TODO, maybe support also serializing the + # `ExceptionGroup.exeptions: list[BaseException]` set under + # certain conditions? bt: Type[BaseException] = self.boxed_type if bt: return str(bt.__name__) @@ -821,8 +833,11 @@ class MsgTypeError( ''' if ( (_bad_msg := self.msgdata.get('_bad_msg')) - and - isinstance(_bad_msg, PayloadMsg) + and ( + isinstance(_bad_msg, PayloadMsg) + or + isinstance(_bad_msg, msgtypes.Start) + ) ): return _bad_msg -- 2.34.1 From 0f103f49d40e4ea7066df97b295bcd84db79574d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:17:51 -0500 Subject: [PATCH 259/305] Moar sclang log fmting tweaks --- tractor/_ipc.py | 12 ++++++------ tractor/_rpc.py | 2 +- tractor/_state.py | 1 + tractor/_streaming.py | 2 +- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/tractor/_ipc.py b/tractor/_ipc.py index a1cb0359..83186147 100644 --- a/tractor/_ipc.py +++ b/tractor/_ipc.py @@ -255,8 +255,8 @@ class MsgpackTCPStream(MsgTransport): raise TransportClosed( message=( f'IPC transport already closed by peer\n' - f'x)> {type(trans_err)}\n' - f' |_{self}\n' + f'x]> {type(trans_err)}\n' + f' |_{self}\n' ), loglevel=loglevel, ) from trans_err @@ -273,8 +273,8 @@ class MsgpackTCPStream(MsgTransport): raise TransportClosed( message=( f'IPC transport already manually closed locally?\n' - f'x)> {type(closure_err)} \n' - f' |_{self}\n' + f'x]> {type(closure_err)} \n' + f' |_{self}\n' ), loglevel='error', raise_on_report=( @@ -289,8 +289,8 @@ class MsgpackTCPStream(MsgTransport): raise TransportClosed( message=( f'IPC transport already gracefully closed\n' - f')>\n' - f'|_{self}\n' + f']>\n' + f' |_{self}\n' ), loglevel='transport', # cause=??? # handy or no? diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 9e50c5de..086cfff6 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -851,7 +851,7 @@ async def try_ship_error_to_remote( log.critical( 'IPC transport failure -> ' f'failed to ship error to {remote_descr}!\n\n' - f'{type(msg)!r}[{msg.boxed_type}] X=> {channel.uid}\n' + f'{type(msg)!r}[{msg.boxed_type_str}] X=> {channel.uid}\n' f'\n' # TODO: use `.msg.preetty_struct` for this! f'{msg}\n' diff --git a/tractor/_state.py b/tractor/_state.py index a87ad36b..79c8bdea 100644 --- a/tractor/_state.py +++ b/tractor/_state.py @@ -108,6 +108,7 @@ def is_main_process() -> bool: return mp.current_process().name == 'MainProcess' +# TODO, more verby name? def debug_mode() -> bool: ''' Bool determining if "debug mode" is on which enables diff --git a/tractor/_streaming.py b/tractor/_streaming.py index bc87164e..58e9b069 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -376,7 +376,7 @@ class MsgStream(trio.abc.Channel): f'Stream self-closed by {self._ctx.side!r}-side before EoC\n' # } bc a stream is a "scope"/msging-phase inside an IPC f'x}}>\n' - f'|_{self}\n' + f' |_{self}\n' ) log.cancel(message) self._eoc = trio.EndOfChannel(message) -- 2.34.1 From 2008372693e462354493227bbfe3b3be8251b712 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:18:10 -0500 Subject: [PATCH 260/305] Hide `open_nursery()` frame by def --- tractor/_supervise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/_supervise.py b/tractor/_supervise.py index 4ecc1a29..bc6bc983 100644 --- a/tractor/_supervise.py +++ b/tractor/_supervise.py @@ -571,7 +571,7 @@ async def _open_and_supervise_one_cancels_all_nursery( @acm # @api_frame async def open_nursery( - hide_tb: bool = False, + hide_tb: bool = True, **kwargs, # ^TODO, paramspec for `open_root_actor()` -- 2.34.1 From 36bca2844db9603dc21f09631731051d669ec1ff Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:19:11 -0500 Subject: [PATCH 261/305] Fix `roundtripped` ref error in `validate_payload_msg()` --- tractor/msg/_ops.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 2faadb9f..615ad0c8 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -796,6 +796,7 @@ def validate_payload_msg( __tracebackhide__: bool = hide_tb codec: MsgCodec = current_codec() msg_bytes: bytes = codec.encode(pld_msg) + roundtripped: Started|None = None try: roundtripped: Started = codec.decode(msg_bytes) ctx: Context = getattr(ipc, 'ctx', ipc) @@ -832,9 +833,13 @@ def validate_payload_msg( verb_header='Trying to send ', is_invalid_payload=True, ) - except BaseException: + except BaseException as _be: + if not roundtripped: + raise verr + + be = _be __tracebackhide__: bool = False - raise + raise be if not raise_mte: return mte -- 2.34.1 From e8b78ae27a2aec69bffa367e54f756493e53f698 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:20:33 -0500 Subject: [PATCH 262/305] Go to loose egs in `Actor` root & service nurseries (for now..) --- tractor/_runtime.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tractor/_runtime.py b/tractor/_runtime.py index fef92e66..e7faaedf 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -1721,11 +1721,15 @@ async def async_main( # parent is kept alive as a resilient service until # cancellation steps have (mostly) occurred in # a deterministic way. - async with trio.open_nursery() as root_nursery: + async with trio.open_nursery( + strict_exception_groups=False, + ) as root_nursery: actor._root_n = root_nursery assert actor._root_n - async with trio.open_nursery() as service_nursery: + async with trio.open_nursery( + strict_exception_groups=False, + ) as service_nursery: # This nursery is used to handle all inbound # connections to us such that if the TCP server # is killed, connections can continue to process -- 2.34.1 From 7fb6e28307e2e6c64a9f618f6af0a316c025176c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:24:29 -0500 Subject: [PATCH 263/305] Various test tweaks related to 3.13 egs Including changes like, - loose eg flagging in various test emedded `trio.open_nursery()`s. - changes to eg handling (like using `except*`). - added `debug_mode` integration to tests that needed some REPLin in order to figure out appropriate updates. --- .../ipc_failure_during_stream.py | 8 +++++--- examples/debugging/multi_daemon_subactors.py | 8 +++++--- .../debugging/multi_subactor_root_errors.py | 2 +- examples/full_fledged_streaming_service.py | 2 +- tests/devx/test_debugger.py | 9 ++++++--- tests/test_advanced_faults.py | 12 ++++-------- tests/test_advanced_streaming.py | 10 +++++++++- tests/test_cancellation.py | 10 +++++++++- tests/test_caps_based_msging.py | 8 ++++---- tests/test_child_manages_service_nursery.py | 18 +++++++++--------- tests/test_discovery.py | 4 +++- tests/test_trioisms.py | 5 ++++- 12 files changed, 60 insertions(+), 36 deletions(-) diff --git a/examples/advanced_faults/ipc_failure_during_stream.py b/examples/advanced_faults/ipc_failure_during_stream.py index 60b28c3e..950d5a6f 100644 --- a/examples/advanced_faults/ipc_failure_during_stream.py +++ b/examples/advanced_faults/ipc_failure_during_stream.py @@ -62,7 +62,9 @@ async def recv_and_spawn_net_killers( await ctx.started() async with ( ctx.open_stream() as stream, - trio.open_nursery() as n, + trio.open_nursery( + strict_exception_groups=False, + ) as tn, ): async for i in stream: print(f'child echoing {i}') @@ -77,11 +79,11 @@ async def recv_and_spawn_net_killers( i >= break_ipc_after ): broke_ipc = True - n.start_soon( + tn.start_soon( iter_ipc_stream, stream, ) - n.start_soon( + tn.start_soon( partial( break_ipc_then_error, stream=stream, diff --git a/examples/debugging/multi_daemon_subactors.py b/examples/debugging/multi_daemon_subactors.py index 7844ccef..844a228a 100644 --- a/examples/debugging/multi_daemon_subactors.py +++ b/examples/debugging/multi_daemon_subactors.py @@ -21,11 +21,13 @@ async def name_error(): async def main(): - """Test breakpoint in a streaming actor. - """ + ''' + Test breakpoint in a streaming actor. + + ''' async with tractor.open_nursery( debug_mode=True, - # loglevel='cancel', + loglevel='cancel', # loglevel='devx', ) as n: diff --git a/examples/debugging/multi_subactor_root_errors.py b/examples/debugging/multi_subactor_root_errors.py index 640f2223..31bb7dd1 100644 --- a/examples/debugging/multi_subactor_root_errors.py +++ b/examples/debugging/multi_subactor_root_errors.py @@ -40,7 +40,7 @@ async def main(): """ async with tractor.open_nursery( debug_mode=True, - # loglevel='cancel', + loglevel='devx', ) as n: # spawn both actors diff --git a/examples/full_fledged_streaming_service.py b/examples/full_fledged_streaming_service.py index be4c372e..d859f647 100644 --- a/examples/full_fledged_streaming_service.py +++ b/examples/full_fledged_streaming_service.py @@ -91,7 +91,7 @@ async def main() -> list[int]: an: ActorNursery async with tractor.open_nursery( loglevel='cancel', - debug_mode=True, + # debug_mode=True, ) as an: seed = int(1e3) diff --git a/tests/devx/test_debugger.py b/tests/devx/test_debugger.py index 254b92a1..b63c405c 100644 --- a/tests/devx/test_debugger.py +++ b/tests/devx/test_debugger.py @@ -310,10 +310,13 @@ def test_subactor_breakpoint( child.expect(EOF) assert in_prompt_msg( - child, - ['RemoteActorError:', + child, [ + 'MessagingError:', + 'RemoteActorError:', "('breakpoint_forever'", - 'bdb.BdbQuit',] + 'bdb.BdbQuit', + ], + pause_on_false=True, ) diff --git a/tests/test_advanced_faults.py b/tests/test_advanced_faults.py index a4d17791..85bac932 100644 --- a/tests/test_advanced_faults.py +++ b/tests/test_advanced_faults.py @@ -3,7 +3,6 @@ Sketchy network blackoutz, ugly byzantine gens, puedes eschuchar la cancelacion?.. ''' -import itertools from functools import partial from types import ModuleType @@ -230,13 +229,10 @@ def test_ipc_channel_break_during_stream( # get raw instance from pytest wrapper value = excinfo.value if isinstance(value, ExceptionGroup): - value = next( - itertools.dropwhile( - lambda exc: not isinstance(exc, expect_final_exc), - value.exceptions, - ) - ) - assert value + excs = value.exceptions + assert len(excs) == 1 + final_exc = excs[0] + assert isinstance(final_exc, expect_final_exc) @tractor.context diff --git a/tests/test_advanced_streaming.py b/tests/test_advanced_streaming.py index 3134b9c2..64f24167 100644 --- a/tests/test_advanced_streaming.py +++ b/tests/test_advanced_streaming.py @@ -307,7 +307,15 @@ async def inf_streamer( async with ( ctx.open_stream() as stream, - trio.open_nursery() as tn, + + # XXX TODO, INTERESTING CASE!! + # - if we don't collapse the eg then the embedded + # `trio.EndOfChannel` doesn't propagate directly to the above + # .open_stream() parent, resulting in it also raising instead + # of gracefully absorbing as normal.. so how to handle? + trio.open_nursery( + strict_exception_groups=False, + ) as tn, ): async def close_stream_on_sentinel(): async for msg in stream: diff --git a/tests/test_cancellation.py b/tests/test_cancellation.py index ece4d3c7..ca14ae4b 100644 --- a/tests/test_cancellation.py +++ b/tests/test_cancellation.py @@ -519,7 +519,9 @@ def test_cancel_via_SIGINT_other_task( async def main(): # should never timeout since SIGINT should cancel the current program with trio.fail_after(timeout): - async with trio.open_nursery() as n: + async with trio.open_nursery( + strict_exception_groups=False, + ) as n: await n.start(spawn_and_sleep_forever) if 'mp' in spawn_backend: time.sleep(0.1) @@ -612,6 +614,12 @@ def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon( nurse.start_soon(delayed_kbi) await p.run(do_nuthin) + + # need to explicitly re-raise the lone kbi..now + except* KeyboardInterrupt as kbi_eg: + assert (len(excs := kbi_eg.exceptions) == 1) + raise excs[0] + finally: duration = time.time() - start if duration > timeout: diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 6064c2cf..ba2bb101 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -874,13 +874,13 @@ def chk_pld_type( return roundtrip -def test_limit_msgspec(): - +def test_limit_msgspec( + debug_mode: bool, +): async def main(): async with tractor.open_root_actor( - debug_mode=True + debug_mode=debug_mode, ): - # ensure we can round-trip a boxing `PayloadMsg` assert chk_pld_type( payload_spec=Any, diff --git a/tests/test_child_manages_service_nursery.py b/tests/test_child_manages_service_nursery.py index 956fccd2..540e9b2e 100644 --- a/tests/test_child_manages_service_nursery.py +++ b/tests/test_child_manages_service_nursery.py @@ -95,8 +95,8 @@ async def trio_main( # stash a "service nursery" as "actor local" (aka a Python global) global _nursery - n = _nursery - assert n + tn = _nursery + assert tn async def consume_stream(): async with wrapper_mngr() as stream: @@ -104,10 +104,10 @@ async def trio_main( print(msg) # run 2 tasks to ensure broadcaster chan use - n.start_soon(consume_stream) - n.start_soon(consume_stream) + tn.start_soon(consume_stream) + tn.start_soon(consume_stream) - n.start_soon(trio_sleep_and_err) + tn.start_soon(trio_sleep_and_err) await trio.sleep_forever() @@ -119,8 +119,8 @@ async def open_actor_local_nursery( global _nursery async with trio.open_nursery( strict_exception_groups=False, - ) as n: - _nursery = n + ) as tn: + _nursery = tn await ctx.started() await trio.sleep(10) # await trio.sleep(1) @@ -134,7 +134,7 @@ async def open_actor_local_nursery( # never yields back.. aka a scenario where the # ``tractor.context`` task IS NOT in the service n's cancel # scope. - n.cancel_scope.cancel() + tn.cancel_scope.cancel() @pytest.mark.parametrize( @@ -159,7 +159,7 @@ def test_actor_managed_trio_nursery_task_error_cancels_aio( async with tractor.open_nursery() as n: p = await n.start_actor( 'nursery_mngr', - infect_asyncio=asyncio_mode, + infect_asyncio=asyncio_mode, # TODO, is this enabling debug mode? enable_modules=[__name__], ) async with ( diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 508fdbe1..8d014ce3 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -181,7 +181,9 @@ async def spawn_and_check_registry( try: async with tractor.open_nursery() as n: - async with trio.open_nursery() as trion: + async with trio.open_nursery( + strict_exception_groups=False, + ) as trion: portals = {} for i in range(3): diff --git a/tests/test_trioisms.py b/tests/test_trioisms.py index fad99f11..449ddcc2 100644 --- a/tests/test_trioisms.py +++ b/tests/test_trioisms.py @@ -101,6 +101,7 @@ def test_stashed_child_nursery(use_start_soon): def test_acm_embedded_nursery_propagates_enter_err( canc_from_finally: bool, unmask_from_canc: bool, + debug_mode: bool, ): ''' Demo how a masking `trio.Cancelled` could be handled by unmasking from the @@ -174,7 +175,9 @@ def test_acm_embedded_nursery_propagates_enter_err( await trio.lowlevel.checkpoint() async def _main(): - with tractor.devx.open_crash_handler() as bxerr: + with tractor.devx.maybe_open_crash_handler( + pdb=debug_mode, + ) as bxerr: assert not bxerr.value async with ( -- 2.34.1 From 4bbb1c363af5aee8f43da39c5b31e0ef5b6d9b08 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 12:32:25 -0500 Subject: [PATCH 264/305] Add (masked) meta-debug-fixture for determining if `debug_mode` is set in harness.. --- tests/conftest.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 810b642a..674767ff 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -75,7 +75,10 @@ def pytest_configure(config): @pytest.fixture(scope='session') def debug_mode(request): - return request.config.option.tractor_debug_mode + debug_mode: bool = request.config.option.tractor_debug_mode + # if debug_mode: + # breakpoint() + return debug_mode @pytest.fixture(scope='session', autouse=True) @@ -92,6 +95,12 @@ def spawn_backend(request) -> str: return request.config.option.spawn_backend +# @pytest.fixture(scope='function', autouse=True) +# def debug_enabled(request) -> str: +# from tractor import _state +# if _state._runtime_vars['_debug_mode']: +# breakpoint() + _ci_env: bool = os.environ.get('CI', False) -- 2.34.1 From 1143dc2862d56a479cb376442d69b61b58c1941f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 13:57:54 -0500 Subject: [PATCH 265/305] Another couple loose-ifies for discovery and advanced fault suites --- tests/test_advanced_faults.py | 11 ++++++----- tests/test_discovery.py | 4 +++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/test_advanced_faults.py b/tests/test_advanced_faults.py index 85bac932..de8a0e1c 100644 --- a/tests/test_advanced_faults.py +++ b/tests/test_advanced_faults.py @@ -255,15 +255,16 @@ async def break_ipc_after_started( def test_stream_closed_right_after_ipc_break_and_zombie_lord_engages(): ''' - Verify that is a subactor's IPC goes down just after bringing up a stream - the parent can trigger a SIGINT and the child will be reaped out-of-IPC by - the localhost process supervision machinery: aka "zombie lord". + Verify that is a subactor's IPC goes down just after bringing up + a stream the parent can trigger a SIGINT and the child will be + reaped out-of-IPC by the localhost process supervision machinery: + aka "zombie lord". ''' async def main(): with trio.fail_after(3): - async with tractor.open_nursery() as n: - portal = await n.start_actor( + async with tractor.open_nursery() as an: + portal = await an.start_actor( 'ipc_breaker', enable_modules=[__name__], ) diff --git a/tests/test_discovery.py b/tests/test_discovery.py index 8d014ce3..87455983 100644 --- a/tests/test_discovery.py +++ b/tests/test_discovery.py @@ -318,7 +318,9 @@ async def close_chans_before_nursery( async with portal2.open_stream_from( stream_forever ) as agen2: - async with trio.open_nursery() as n: + async with trio.open_nursery( + strict_exception_groups=False, + ) as n: n.start_soon(streamer, agen1) n.start_soon(cancel, use_signal, .5) try: -- 2.34.1 From b84088c3645607a0f165b9af6ab1512d8357caab Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 17:55:07 -0500 Subject: [PATCH 266/305] Fix docs tests with yet another loosie-goosie So the KBI propagates up to the actor nursery scope and also avoid running any `examples/multihost/` subdir scripts. --- examples/quick_cluster.py | 19 +++++++++++-------- tests/test_docs_examples.py | 37 +++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/examples/quick_cluster.py b/examples/quick_cluster.py index ca692a90..2378a3cf 100644 --- a/examples/quick_cluster.py +++ b/examples/quick_cluster.py @@ -3,20 +3,18 @@ import trio import tractor -async def sleepy_jane(): - uid = tractor.current_actor().uid +async def sleepy_jane() -> None: + uid: tuple = tractor.current_actor().uid print(f'Yo i am actor {uid}') await trio.sleep_forever() async def main(): ''' - Spawn a flat actor cluster, with one process per - detected core. + Spawn a flat actor cluster, with one process per detected core. ''' portal_map: dict[str, tractor.Portal] - results: dict[str, str] # look at this hip new syntax! async with ( @@ -25,11 +23,16 @@ async def main(): modules=[__name__] ) as portal_map, - trio.open_nursery() as n, + trio.open_nursery( + strict_exception_groups=False, + ) as tn, ): for (name, portal) in portal_map.items(): - n.start_soon(portal.run, sleepy_jane) + tn.start_soon( + portal.run, + sleepy_jane, + ) await trio.sleep(0.5) @@ -41,4 +44,4 @@ if __name__ == '__main__': try: trio.run(main) except KeyboardInterrupt: - pass + print('trio cancelled by KBI') diff --git a/tests/test_docs_examples.py b/tests/test_docs_examples.py index fdf54bca..cc4904f8 100644 --- a/tests/test_docs_examples.py +++ b/tests/test_docs_examples.py @@ -19,7 +19,7 @@ from tractor._testing import ( @pytest.fixture def run_example_in_subproc( loglevel: str, - testdir: pytest.Testdir, + testdir: pytest.Pytester, reg_addr: tuple[str, int], ): @@ -81,28 +81,36 @@ def run_example_in_subproc( # walk yields: (dirpath, dirnames, filenames) [ - (p[0], f) for p in os.walk(examples_dir()) for f in p[2] + (p[0], f) + for p in os.walk(examples_dir()) + for f in p[2] - if '__' not in f - and f[0] != '_' - and 'debugging' not in p[0] - and 'integration' not in p[0] - and 'advanced_faults' not in p[0] - and 'multihost' not in p[0] + if ( + '__' not in f + and f[0] != '_' + and 'debugging' not in p[0] + and 'integration' not in p[0] + and 'advanced_faults' not in p[0] + and 'multihost' not in p[0] + ) ], - ids=lambda t: t[1], ) -def test_example(run_example_in_subproc, example_script): - """Load and run scripts from this repo's ``examples/`` dir as a user +def test_example( + run_example_in_subproc, + example_script, +): + ''' + Load and run scripts from this repo's ``examples/`` dir as a user would copy and pasing them into their editor. On windows a little more "finessing" is done to make ``multiprocessing`` play nice: we copy the ``__main__.py`` into the test directory and invoke the script as a module with ``python -m test_example``. - """ - ex_file = os.path.join(*example_script) + + ''' + ex_file: str = os.path.join(*example_script) if 'rpc_bidir_streaming' in ex_file and sys.version_info < (3, 9): pytest.skip("2-way streaming example requires py3.9 async with syntax") @@ -128,7 +136,8 @@ def test_example(run_example_in_subproc, example_script): # shouldn't eventually once we figure out what's # a better way to be explicit about aio side # cancels? - and 'asyncio.exceptions.CancelledError' not in last_error + and + 'asyncio.exceptions.CancelledError' not in last_error ): raise Exception(errmsg) -- 2.34.1 From d874513448ab6a50082a4d47b003e3d4f585a0a9 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 18:01:16 -0500 Subject: [PATCH 267/305] Draft some eg collapsing helpers Inside a new `.trionics._beg` and exposed from the subpkg ns in anticipation of the `strict_exception_groups=False` being removed by `trio` in py 3.15. Notes, - mk an embedded single-exc "extractor" using a `BaseExceptionGroup.exceptions` length check, when 1 return the lone child. - use the above in a new `@acm`, async bc it's most likely to be composed in an `async with` tuple-style sequence block, called `collapse_eg()` which acts a one line "absorber" for when the above mentioned flag is no logner supported by `trio.open_nursery()`. All untested atm fwiw.. but soon to be used in our test suite(s) likely! --- tractor/trionics/__init__.py | 3 ++ tractor/trionics/_beg.py | 59 ++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 tractor/trionics/_beg.py diff --git a/tractor/trionics/__init__.py b/tractor/trionics/__init__.py index c51b7c51..df9b6f26 100644 --- a/tractor/trionics/__init__.py +++ b/tractor/trionics/__init__.py @@ -29,3 +29,6 @@ from ._broadcast import ( BroadcastReceiver as BroadcastReceiver, Lagged as Lagged, ) +from ._beg import ( + collapse_eg as collapse_eg, +) diff --git a/tractor/trionics/_beg.py b/tractor/trionics/_beg.py new file mode 100644 index 00000000..37b14238 --- /dev/null +++ b/tractor/trionics/_beg.py @@ -0,0 +1,59 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +`BaseExceptionGroup` related utils and helpers pertaining to +first-class-`trio` from a historical perspective B) + +''' +from contextlib import ( + # bontextmanager as cm, + asynccontextmanager as acm, +) + + +def maybe_collapse_eg( + beg: BaseExceptionGroup, +) -> BaseException: + ''' + If the input beg can collapse to a single non-eg sub-exception, + return it instead. + + ''' + if len(excs := beg.exceptions) == 1: + return excs[0] + + return beg + + +@acm +async def collapse_eg(): + ''' + If `BaseExceptionGroup` raised in the body scope is + "collapse-able" (in the same way that + `trio.open_nursery(strict_exception_groups=False)` works) then + only raise the lone emedded non-eg in in place. + + ''' + try: + yield + except* BaseException as beg: + if ( + exc := maybe_collapse_eg(beg) + ) is not beg: + raise exc + + raise beg -- 2.34.1 From 058f8f4ef8d85c51c3ad6e7498dbcfe1731405d4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 18:30:05 -0500 Subject: [PATCH 268/305] Use `collapse_eg()` in broadcaster suite Around the test embedded `trio.open_nursery()` calls as expected. Also tidy up the various nursery var names. --- tests/test_task_broadcasting.py | 29 ++++++++++++++++++----------- tractor/trionics/_beg.py | 1 - 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/tests/test_task_broadcasting.py b/tests/test_task_broadcasting.py index 4a2209eb..b57d63f8 100644 --- a/tests/test_task_broadcasting.py +++ b/tests/test_task_broadcasting.py @@ -2,7 +2,9 @@ Broadcast channels for fan-out to local tasks. """ -from contextlib import asynccontextmanager +from contextlib import ( + asynccontextmanager as acm, +) from functools import partial from itertools import cycle import time @@ -15,6 +17,7 @@ import tractor from tractor.trionics import ( broadcast_receiver, Lagged, + collapse_eg, ) @@ -62,7 +65,7 @@ async def ensure_sequence( break -@asynccontextmanager +@acm async def open_sequence_streamer( sequence: list[int], @@ -74,9 +77,9 @@ async def open_sequence_streamer( async with tractor.open_nursery( arbiter_addr=reg_addr, start_method=start_method, - ) as tn: + ) as an: - portal = await tn.start_actor( + portal = await an.start_actor( 'sequence_echoer', enable_modules=[__name__], ) @@ -155,9 +158,12 @@ def test_consumer_and_parent_maybe_lag( ) as stream: try: - async with trio.open_nursery() as n: + async with ( + collapse_eg(), + trio.open_nursery() as tn, + ): - n.start_soon( + tn.start_soon( ensure_sequence, stream, sequence.copy(), @@ -230,8 +236,8 @@ def test_faster_task_to_recv_is_cancelled_by_slower( ) as stream: - async with trio.open_nursery() as n: - n.start_soon( + async with trio.open_nursery() as tn: + tn.start_soon( ensure_sequence, stream, sequence.copy(), @@ -253,7 +259,7 @@ def test_faster_task_to_recv_is_cancelled_by_slower( continue print('cancelling faster subtask') - n.cancel_scope.cancel() + tn.cancel_scope.cancel() try: value = await stream.receive() @@ -371,13 +377,13 @@ def test_ensure_slow_consumers_lag_out( f'on {lags}:{value}') return - async with trio.open_nursery() as nursery: + async with trio.open_nursery() as tn: for i in range(1, num_laggers): task_name = f'sub_{i}' laggers[task_name] = 0 - nursery.start_soon( + tn.start_soon( partial( sub_and_print, delay=i*0.001, @@ -497,6 +503,7 @@ def test_no_raise_on_lag(): # internals when the no raise flag is set. loglevel='warning', ), + collapse_eg(), trio.open_nursery() as n, ): n.start_soon(slow) diff --git a/tractor/trionics/_beg.py b/tractor/trionics/_beg.py index 37b14238..843b9f70 100644 --- a/tractor/trionics/_beg.py +++ b/tractor/trionics/_beg.py @@ -20,7 +20,6 @@ first-class-`trio` from a historical perspective B) ''' from contextlib import ( - # bontextmanager as cm, asynccontextmanager as acm, ) -- 2.34.1 From adcb0272e5e4a818bbbc0fc4c00c2b653d487bc8 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 18:53:13 -0500 Subject: [PATCH 269/305] Match `maybe_open_crash_handler()` to non-maybe version Such that it will deliver a `BoxedMaybeException` to the caller regardless whether `pdb` is set, and proxy through all `**kwargs`. --- tractor/devx/_debug.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/tractor/devx/_debug.py b/tractor/devx/_debug.py index 884c5aea..c6ca1d89 100644 --- a/tractor/devx/_debug.py +++ b/tractor/devx/_debug.py @@ -2287,6 +2287,13 @@ def _set_trace( repl.set_trace(frame=caller_frame) +# XXX TODO! XXX, ensure `pytest -s` doesn't just +# hang on this being called in a test.. XD +# -[ ] maybe something in our test suite or is there +# some way we can detect output capture is enabled +# from the process itself? +# |_ronny: ? +# async def pause( *, hide_tb: bool = True, @@ -3194,6 +3201,15 @@ async def maybe_wait_for_debugger( return False +class BoxedMaybeException(Struct): + ''' + Box a maybe-exception for post-crash introspection usage + from the body of a `open_crash_handler()` scope. + + ''' + value: BaseException|None = None + + # TODO: better naming and what additionals? # - [ ] optional runtime plugging? # - [ ] detection for sync vs. async code? @@ -3224,9 +3240,6 @@ def open_crash_handler( ''' __tracebackhide__: bool = tb_hide - class BoxedMaybeException(Struct): - value: BaseException|None = None - # TODO, yield a `outcome.Error`-like boxed type? # -[~] use `outcome.Value/Error` X-> frozen! # -[x] write our own..? @@ -3268,6 +3281,8 @@ def open_crash_handler( def maybe_open_crash_handler( pdb: bool = False, tb_hide: bool = True, + + **kwargs, ): ''' Same as `open_crash_handler()` but with bool input flag @@ -3278,9 +3293,11 @@ def maybe_open_crash_handler( ''' __tracebackhide__: bool = tb_hide - rtctx = nullcontext + rtctx = nullcontext( + enter_result=BoxedMaybeException() + ) if pdb: - rtctx = open_crash_handler + rtctx = open_crash_handler(**kwargs) - with rtctx(): - yield + with rtctx as boxed_maybe_exc: + yield boxed_maybe_exc -- 2.34.1 From 2078bea7f77dba55162623dfb190cc8a4d7a4acd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 18:55:02 -0500 Subject: [PATCH 270/305] Another loosie in the trioisms suite --- tests/test_trioisms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_trioisms.py b/tests/test_trioisms.py index 449ddcc2..9f1ccec9 100644 --- a/tests/test_trioisms.py +++ b/tests/test_trioisms.py @@ -64,7 +64,9 @@ def test_stashed_child_nursery(use_start_soon): async def main(): async with ( - trio.open_nursery() as pn, + trio.open_nursery( + strict_exception_groups=False, + ) as pn, ): cn = await pn.start(mk_child_nursery) assert cn -- 2.34.1 From ef96833d6c486e853b4d9489578b53f0661333fd Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 19:45:29 -0500 Subject: [PATCH 271/305] Fix an `aio_err` ref bug --- tractor/to_asyncio.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index f65cc7ef..baef9816 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -348,7 +348,6 @@ def _run_asyncio_task( trio_task: trio.Task = trio.lowlevel.current_task() trio_cs = trio.CancelScope() aio_task_complete = trio.Event() - aio_err: BaseException|None = None chan = LinkedTaskChannel( _to_aio=aio_q, # asyncio.Queue @@ -392,7 +391,7 @@ def _run_asyncio_task( if ( result != orig and - aio_err is None + chan._aio_err is None and # in the `open_channel_from()` case we don't -- 2.34.1 From 60eca816e7f1ca9c888cf458bdb284f8c09090e2 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 3 Mar 2025 21:50:51 -0500 Subject: [PATCH 272/305] Be extra sure to re-raise EoCs from translator That is whenever `trio.EndOfChannel` is raised (presumably from the `._to_trio.receive()` call inside `LinkedTaskChannel.receive()`) we need to be extra certain that we let it bubble upward transparently DESPITE special exc-as-signal handling that is normally suppressed from the aio side; REPEAT we want to ALWAYS bubble any `trio_err == trio.EndOfChannel` in the `finally:` handler of `translate_aio_errors()` despite `chan._trio_to_raise == AsyncioTaskExited` such that the caller's iterable machinery will operate as normal when the inter-task stream is stopped (again, presumably by the aio side task terminating the inter-task stream). Main impl deats for this, - in the EoC handler block ensure we assign both `chan._trio_err` and the local `trio_err` as well as continue to re-raise. - add a case to the match block in the `finally:` handler which FOR SURE re-raises any `type(trio_err) is EndOfChannel`! Additionally fix a bad bug, - a ref bug where we were NOT using the `except BaseException as _trio_err` to assign to `chan._trio_err` (by accident was missing the leading `_`..) Unrelated impl tweak, - move all `maybe_raise_aio_side_err()` content back to inline with its parent func - makes it easier to use `tractor.pause()` mostly Bp - go back to trying to use `aio_task.set_exception(aio_taskc)` for now even though i'm pretty sure we're going to move to a try-fute-first style helper for this in the future. Adjust some tests to match/mk-them-green, - break from `aio_echo_server()` recv loop on `to_asyncio.TrioTaskExited` much like how you'd expect to (implicitly with a `for`) with a `trio.EndOfChannel`. - toss in a masked `value is None` pause point i needed for debugging inf looping caused by not re-raising EoCs per the main patch description. - add a debug-mode sized delay to root-infected test. --- tests/test_infected_asyncio.py | 16 ++- tests/test_root_infect_asyncio.py | 8 +- tractor/to_asyncio.py | 198 ++++++++++++++---------------- 3 files changed, 109 insertions(+), 113 deletions(-) diff --git a/tests/test_infected_asyncio.py b/tests/test_infected_asyncio.py index 8726ad80..465decca 100644 --- a/tests/test_infected_asyncio.py +++ b/tests/test_infected_asyncio.py @@ -491,7 +491,13 @@ async def stream_from_aio( ], ): async for value in chan: - print(f'trio received {value}') + print(f'trio received: {value!r}') + + # XXX, debugging EoC not being handled correctly + # in `transate_aio_errors()`.. + # if value is None: + # await tractor.pause(shield=True) + pulled.append(value) if value == 50: @@ -733,7 +739,13 @@ async def aio_echo_server( to_trio.send_nowait('start') while True: - msg = await from_trio.get() + try: + msg = await from_trio.get() + except to_asyncio.TrioTaskExited: + print( + 'breaking aio echo loop due to `trio` exit!' + ) + break # echo the msg back to_trio.send_nowait(msg) diff --git a/tests/test_root_infect_asyncio.py b/tests/test_root_infect_asyncio.py index 331b6311..93deba13 100644 --- a/tests/test_root_infect_asyncio.py +++ b/tests/test_root_infect_asyncio.py @@ -39,7 +39,7 @@ def test_infected_root_actor( ''' async def _trio_main(): - with trio.fail_after(2): + with trio.fail_after(2 if not debug_mode else 999): first: str chan: to_asyncio.LinkedTaskChannel async with ( @@ -59,7 +59,11 @@ def test_infected_root_actor( assert out == i print(f'asyncio echoing {i}') - if raise_error_mid_stream and i == 500: + if ( + raise_error_mid_stream + and + i == 500 + ): raise raise_error_mid_stream if out is None: diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index baef9816..7b87be0b 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -428,8 +428,7 @@ def _run_asyncio_task( not chan._aio_err ): chan._trio_to_raise = AsyncioTaskExited( - f'Task existed with final result\n' - f'{result!r}\n' + f'Task exited with final result: {result!r}\n' ) # only close the sender side which will relay @@ -741,7 +740,6 @@ async def translate_aio_errors( aio_done_before_trio: bool = aio_task.done() assert aio_task trio_err: BaseException|None = None - to_raise_trio: BaseException|None = None try: yield # back to one of the cross-loop apis except trio.Cancelled as taskc: @@ -777,8 +775,9 @@ async def translate_aio_errors( # called from `LinkedTaskChannel.receive()` which we want # passthrough and further we have no special meaning for it in # terms of relaying errors or signals from the aio side! - except trio.EndOfChannel: - raise + except trio.EndOfChannel as eoc: + trio_err = chan._trio_err = eoc + raise eoc # NOTE ALSO SEE the matching note in the `cancel_trio()` asyncio # task-done-callback. @@ -824,7 +823,7 @@ async def translate_aio_errors( raise cre except BaseException as _trio_err: - trio_err = chan._trio_err = trio_err + trio_err = chan._trio_err = _trio_err # await tractor.pause(shield=True) # workx! entered: bool = await _debug._maybe_enter_pm( trio_err, @@ -865,25 +864,27 @@ async def translate_aio_errors( f'The `trio`-side task crashed!\n' f'{trio_err}' ) - aio_task.set_exception(aio_taskc) - wait_on_aio_task = False - # try: - # aio_task.set_exception(aio_taskc) - # except ( - # asyncio.InvalidStateError, - # RuntimeError, - # # ^XXX, uhh bc apparently we can't use `.set_exception()` - # # any more XD .. ?? - # ): - # wait_on_aio_task = False + # ??TODO? move this into the func that tries to use + # `Task._fut_waiter: Future` instead?? + # + # aio_task.set_exception(aio_taskc) + # wait_on_aio_task = False + try: + aio_task.set_exception(aio_taskc) + except ( + asyncio.InvalidStateError, + RuntimeError, + # ^XXX, uhh bc apparently we can't use `.set_exception()` + # any more XD .. ?? + ): + wait_on_aio_task = False finally: # record wtv `trio`-side error transpired if trio_err: - if chan._trio_err is not trio_err: - await tractor.pause(shield=True) - - # assert chan._trio_err is trio_err + assert chan._trio_err is trio_err + # if chan._trio_err is not trio_err: + # await tractor.pause(shield=True) ya_trio_exited: bool = chan._trio_exited graceful_trio_exit: bool = ( @@ -1031,104 +1032,83 @@ async def translate_aio_errors( 'asyncio-task is done and unblocked trio-side!\n' ) - # TODO? - # -[ ] make this a channel method, OR - # -[ ] just put back inline below? - # - # await tractor.pause(shield=True) - # TODO, go back to inlining this.. - def maybe_raise_aio_side_err( - trio_err: Exception, - ) -> None: - ''' - Raise any `trio`-side-caused cancellation or legit task - error normally propagated from the caller of either, - - `open_channel_from()` - - `run_task()` + # NOTE, was a `maybe_raise_aio_side_err()` closure that + # i moved inline BP + ''' + Raise any `trio`-side-caused cancellation or legit task + error normally propagated from the caller of either, + - `open_channel_from()` + - `run_task()` - ''' - aio_err: BaseException|None = chan._aio_err - trio_to_raise: ( - AsyncioCancelled| - AsyncioTaskExited| - None - ) = chan._trio_to_raise + ''' + aio_err: BaseException|None = chan._aio_err + trio_to_raise: ( + AsyncioCancelled| + AsyncioTaskExited| + None + ) = chan._trio_to_raise - if not suppress_graceful_exits: - raise trio_to_raise from (aio_err or trio_err) + if not suppress_graceful_exits: + raise trio_to_raise from (aio_err or trio_err) - if trio_to_raise: - # import pdbp; pdbp.set_trace() - match ( - trio_to_raise, - trio_err, + if trio_to_raise: + match ( + trio_to_raise, + trio_err, + ): + case ( + AsyncioTaskExited(), + trio.Cancelled()| + None, ): - case ( - AsyncioTaskExited(), - trio.Cancelled()|None, - ): + log.info( + 'Ignoring aio exit signal since trio also exited!' + ) + return + + case ( + AsyncioTaskExited(), + trio.EndOfChannel(), + ): + raise trio_err + + case ( + AsyncioCancelled(), + trio.Cancelled(), + ): + if not aio_done_before_trio: log.info( - 'Ignoring aio exit signal since trio also exited!' + 'Ignoring aio cancelled signal since trio was also cancelled!' ) return + case _: + raise trio_to_raise from (aio_err or trio_err) - case ( - AsyncioCancelled(), - trio.Cancelled(), - ): - if not aio_done_before_trio: - log.info( - 'Ignoring aio cancelled signal since trio was also cancelled!' - ) - return - case _: - raise trio_to_raise from (aio_err or trio_err) - - # Check if the asyncio-side is the cause of the trio-side - # error. - elif ( - aio_err is not None - and - type(aio_err) is not AsyncioCancelled - # and ( - # type(aio_err) is not AsyncioTaskExited - # and - # not ya_trio_exited - # and - # not trio_err - # ) - - # TODO, case where trio_err is not None and - # aio_err is AsyncioTaskExited => raise eg! - # -[ ] maybe use a match bc this get's real - # complex fast XD - # - # or - # type(aio_err) is not AsyncioTaskExited - # and - # trio_err - # ) - ): - # always raise from any captured asyncio error - if trio_err: - raise trio_err from aio_err - - # XXX NOTE! above in the `trio.ClosedResourceError` - # handler we specifically set the - # `aio_err = AsyncioCancelled` such that it is raised - # as that special exc here! - raise aio_err - + # Check if the asyncio-side is the cause of the trio-side + # error. + elif ( + aio_err is not None + and + type(aio_err) is not AsyncioCancelled + ): + # always raise from any captured asyncio error if trio_err: - raise trio_err + raise trio_err from aio_err - # await tractor.pause() - # NOTE: if any ``asyncio`` error was caught, raise it here inline - # here in the ``trio`` task - # if trio_err: - maybe_raise_aio_side_err( - trio_err=to_raise_trio or trio_err - ) + # XXX NOTE! above in the `trio.ClosedResourceError` + # handler we specifically set the + # `aio_err = AsyncioCancelled` such that it is raised + # as that special exc here! + raise aio_err + + if trio_err: + raise trio_err + + # ^^TODO?? case where trio_err is not None and + # aio_err is AsyncioTaskExited => raise eg! + # -[x] maybe use a match bc this get's real + # complex fast XD + # => i did this above for silent exit cases ya? async def run_task( -- 2.34.1 From 915b5a5a86e55f450e7a4036668067c01c13b9e1 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 4 Mar 2025 13:54:46 -0500 Subject: [PATCH 273/305] Show frames when decode is handed bad input --- tractor/msg/_ops.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 615ad0c8..dc632217 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -258,6 +258,9 @@ class PldRx(Struct): f'|_pld={pld!r}\n' ) return pld + except TypeError as typerr: + __tracebackhide__: bool = False + raise typerr # XXX pld-value type failure except ValidationError as valerr: @@ -799,6 +802,11 @@ def validate_payload_msg( roundtripped: Started|None = None try: roundtripped: Started = codec.decode(msg_bytes) + except TypeError as typerr: + __tracebackhide__: bool = False + raise typerr + + try: ctx: Context = getattr(ipc, 'ctx', ipc) pld: PayloadT = ctx.pld_rx.decode_pld( msg=roundtripped, @@ -823,6 +831,11 @@ def validate_payload_msg( ) raise ValidationError(complaint) + # usually due to `.decode()` input type + except TypeError as typerr: + __tracebackhide__: bool = False + raise typerr + # raise any msg type error NO MATTER WHAT! except ValidationError as verr: try: -- 2.34.1 From 5ff2740b9d69ec8a635dd679a9b5ca2e5dcbc50b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 4 Mar 2025 19:53:24 -0500 Subject: [PATCH 274/305] Add a mark to `pytest.xfail()` questionably conc py stuff (ur mam `.xfail()`s bish!) --- tests/devx/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py index ae594c7c..7992e8c9 100644 --- a/tests/devx/conftest.py +++ b/tests/devx/conftest.py @@ -99,6 +99,14 @@ def ctlc( 'https://github.com/goodboy/tractor/issues/320' ) + if mark.name == 'ctlcs_bish': + pytest.skip( + f'Test {node} prolly uses something from the stdlib (namely `asyncio`..)\n' + f'The test and/or underlying example script can *sometimes* run fine ' + f'locally but more then likely until the cpython peeps get their sh#$ together, ' + f'this test will definitely not behave like `trio` under SIGINT..\n' + ) + if use_ctlc: # XXX: disable pygments highlighting for auto-tests # since some envs (like actions CI) will struggle -- 2.34.1 From 92d07233b0e463684371cdea8f6a341844f993b5 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 5 Mar 2025 09:49:13 -0500 Subject: [PATCH 275/305] Unpack errors from `pdb.bdb` Like any `bdb.BdbQuit` that might be relayed from a remote context after a REPl exit with the `quit` cmd. This fixes various issues while debugging where it may not be clear to the parent task that the child was terminated with a purposefully unrecoverable error. --- tractor/_exceptions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index f90df5fe..249ea164 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -22,6 +22,7 @@ from __future__ import annotations import builtins import importlib from pprint import pformat +from pdb import bdb import sys from types import ( TracebackType, @@ -181,6 +182,7 @@ def get_err_type(type_name: str) -> BaseException|None: builtins, _this_mod, trio, + bdb, ]: if type_ref := getattr( ns, -- 2.34.1 From b9febe68260d089a19bb355d3ce3b729353b7893 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 5 Mar 2025 09:54:56 -0500 Subject: [PATCH 276/305] Comment-tag pause points in `asycnio_bp.py` Thought i already did this but, obvi needed these to make the expect matches pass in our test. --- examples/debugging/asyncio_bp.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/debugging/asyncio_bp.py b/examples/debugging/asyncio_bp.py index b55b28fd..296dbccb 100644 --- a/examples/debugging/asyncio_bp.py +++ b/examples/debugging/asyncio_bp.py @@ -25,7 +25,7 @@ async def bp_then_error( ) -> None: - # sync with ``trio``-side (caller) task + # sync with `trio`-side (caller) task to_trio.send_nowait('start') # NOTE: what happens here inside the hook needs some refinement.. @@ -33,8 +33,7 @@ async def bp_then_error( # we set `Lock.local_task_in_debug = 'sync'`, we probably want # some further, at least, meta-data about the task/actor in debug # in terms of making it clear it's `asyncio` mucking about. - breakpoint() - + breakpoint() # asyncio-side # short checkpoint / delay await asyncio.sleep(0.5) # asyncio-side @@ -58,7 +57,6 @@ async def trio_ctx( # this will block until the ``asyncio`` task sends a "first" # message, see first line in above func. async with ( - to_asyncio.open_channel_from( bp_then_error, # raise_after_bp=not bp_before_started, @@ -69,7 +67,7 @@ async def trio_ctx( assert first == 'start' if bp_before_started: - await tractor.pause() + await tractor.pause() # trio-side await ctx.started(first) # trio-side @@ -111,7 +109,7 @@ async def main( # pause in parent to ensure no cross-actor # locking problems exist! - await tractor.pause() + await tractor.pause() # trio-root if cancel_from_root: await ctx.cancel() -- 2.34.1 From e313cb5e304e7c6c00ad9f1019f32447ec01a789 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 5 Mar 2025 11:34:36 -0500 Subject: [PATCH 277/305] Repair/update `stackscope` test Seems that on 3.13 it's not showing our script code in the output now? Gotta get an example for @oremanj to see what's up but really it'd be nice to just custom format stuff above `trio`'s runtime by def.. Anyway, update the `.devx._stackscope`, - log formatting to be a little more "sclangy" lookin. - change the per-actor "delimiter" lines style. - report the `signal.getsignal(SIGINT)` which i needed in the `sync_bp.py` with ctl-c causing a hang.. - mask the `_tree_dumped` duplicator log report as well as the "dumped fine" one. - add an example `pkill --signal SIGUSR1` cmdline. Tweak the test to cope with, - not showing our script lines now.. which i've commented in the `assert_before()` patts.. - to expect the newly formatted delimiter (ascii) lines to separate the root vs. hanger sub-actor sections. --- examples/debugging/shield_hang_in_sub.py | 6 +- tests/devx/test_tooling.py | 33 ++++++----- tractor/devx/_stackscope.py | 73 ++++++++++++++++-------- 3 files changed, 69 insertions(+), 43 deletions(-) diff --git a/examples/debugging/shield_hang_in_sub.py b/examples/debugging/shield_hang_in_sub.py index 3cc084d5..5387353f 100644 --- a/examples/debugging/shield_hang_in_sub.py +++ b/examples/debugging/shield_hang_in_sub.py @@ -39,7 +39,6 @@ async def main( loglevel='devx', ) as an, ): - ptl: tractor.Portal = await an.start_actor( 'hanger', enable_modules=[__name__], @@ -54,13 +53,16 @@ async def main( print( 'Yo my child hanging..?\n' - 'Sending SIGUSR1 to see a tree-trace!\n' + # "i'm a user who wants to see a `stackscope` tree!\n" ) # XXX simulate the wrapping test's "user actions" # (i.e. if a human didn't run this manually but wants to # know what they should do to reproduce test behaviour) if from_test: + print( + f'Sending SIGUSR1 to {cpid!r}!\n' + ) os.kill( cpid, signal.SIGUSR1, diff --git a/tests/devx/test_tooling.py b/tests/devx/test_tooling.py index 3e48844e..4e8a4600 100644 --- a/tests/devx/test_tooling.py +++ b/tests/devx/test_tooling.py @@ -15,6 +15,7 @@ TODO: ''' import os import signal +import time from .conftest import ( expect, @@ -47,41 +48,39 @@ def test_shield_pause( ] ) + script_pid: int = child.pid print( - 'Sending SIGUSR1 to see a tree-trace!', + f'Sending SIGUSR1 to {script_pid}\n' + f'(kill -s SIGUSR1 {script_pid})\n' ) os.kill( - child.pid, + script_pid, signal.SIGUSR1, ) + time.sleep(0.2) expect( child, # end-of-tree delimiter - "------ \('root', ", + "end-of-\('root'", ) - assert_before( child, [ - 'Trying to dump `stackscope` tree..', - 'Dumping `stackscope` tree for actor', + # 'Srying to dump `stackscope` tree..', + # 'Dumping `stackscope` tree for actor', "('root'", # uid line + # TODO!? this used to show? + # -[ ] mk reproducable for @oremanj? + # # parent block point (non-shielded) - 'await trio.sleep_forever() # in root', + # 'await trio.sleep_forever() # in root', ] ) - - # expect( - # child, - # # relay to the sub should be reported - # 'Relaying `SIGUSR1`[10] to sub-actor', - # ) - expect( child, # end-of-tree delimiter - "------ \('hanger', ", + "end-of-\('hanger'", ) assert_before( child, @@ -91,11 +90,11 @@ def test_shield_pause( "('hanger'", # uid line + # TODO!? SEE ABOVE # hanger LOC where it's shield-halted - 'await trio.sleep_forever() # in subactor', + # 'await trio.sleep_forever() # in subactor', ] ) - # breakpoint() # simulate the user sending a ctl-c to the hanging program. # this should result in the terminator kicking in since diff --git a/tractor/devx/_stackscope.py b/tractor/devx/_stackscope.py index 944ae49a..ccc46534 100644 --- a/tractor/devx/_stackscope.py +++ b/tractor/devx/_stackscope.py @@ -35,6 +35,7 @@ from signal import ( signal, getsignal, SIGUSR1, + SIGINT, ) # import traceback from types import ModuleType @@ -48,6 +49,7 @@ from tractor import ( _state, log as logmod, ) +from tractor.devx import _debug log = logmod.get_logger(__name__) @@ -76,22 +78,45 @@ def dump_task_tree() -> None: ) actor: Actor = _state.current_actor() thr: Thread = current_thread() + current_sigint_handler: Callable = getsignal(SIGINT) + if ( + current_sigint_handler + is not + _debug.DebugStatus._trio_handler + ): + sigint_handler_report: str = ( + 'The default `trio` SIGINT handler was replaced?!' + ) + else: + sigint_handler_report: str = ( + 'The default `trio` SIGINT handler is in use?!' + ) + + # sclang symbology + # |_ + # |_(Task/Thread/Process/Actor + # |_{Supervisor/Scope + # |_[Storage/Memory/IPC-Stream/Data-Struct + log.devx( f'Dumping `stackscope` tree for actor\n' - f'{actor.uid}:\n' - f'|_{mp.current_process()}\n' - f' |_{thr}\n' - f' |_{actor}\n\n' - - # start-of-trace-tree delimiter (mostly for testing) - '------ - ------\n' - '\n' - + - f'{tree_str}\n' - + - # end-of-trace-tree delimiter (mostly for testing) + f'(>: {actor.uid!r}\n' + f' |_{mp.current_process()}\n' + f' |_{thr}\n' + f' |_{actor}\n' f'\n' - f'------ {actor.uid!r} ------\n' + f'{sigint_handler_report}\n' + f'signal.getsignal(SIGINT) -> {current_sigint_handler!r}\n' + # f'\n' + # start-of-trace-tree delimiter (mostly for testing) + # f'------ {actor.uid!r} ------\n' + f'\n' + f'------ start-of-{actor.uid!r} ------\n' + f'|\n' + f'{tree_str}' + # end-of-trace-tree delimiter (mostly for testing) + f'|\n' + f'|_____ end-of-{actor.uid!r} ______\n' ) # TODO: can remove this right? # -[ ] was original code from author @@ -123,11 +148,11 @@ def dump_tree_on_sig( ) -> None: global _tree_dumped, _handler_lock with _handler_lock: - if _tree_dumped: - log.warning( - 'Already dumped for this actor...??' - ) - return + # if _tree_dumped: + # log.warning( + # 'Already dumped for this actor...??' + # ) + # return _tree_dumped = True @@ -161,9 +186,9 @@ def dump_tree_on_sig( ) raise - log.devx( - 'Supposedly we dumped just fine..?' - ) + # log.devx( + # 'Supposedly we dumped just fine..?' + # ) if not relay_to_subs: return @@ -202,11 +227,11 @@ def enable_stack_on_sig( (https://www.gnu.org/software/bash/manual/bash.html#Command-Substitution) you could use: - >> kill -SIGUSR1 $(pgrep -f '') + >> kill -SIGUSR1 $(pgrep -f ) - Or with with `xonsh` (which has diff capture-from-subproc syntax) + OR without a sub-shell, - >> kill -SIGUSR1 @$(pgrep -f '') + >> pkill --signal SIGUSR1 -f ''' try: -- 2.34.1 From 7e78223fb5791f4ff1121b42fee109e6ae743f18 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 5 Mar 2025 11:58:03 -0500 Subject: [PATCH 278/305] Mask ctlc borked REPL tests Namely the `tractor.pause_from_sync()` examples using both bg threads and `asyncio` which seem to go into bad states where SIGINT is ignored.. Deats, - add `maybe_expect_timeout()` cm to ensure the EOF hangs get `.xfail()`ed instead. - @pytest.mark.ctlcs_bish` `test_pause_from_sync` and don't expect the greenback prompt msg. - also mark `test_sync_pause_from_aio_task`. --- tests/devx/test_pause_from_non_trio.py | 45 ++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/tests/devx/test_pause_from_non_trio.py b/tests/devx/test_pause_from_non_trio.py index 3a7140e6..4a03a123 100644 --- a/tests/devx/test_pause_from_non_trio.py +++ b/tests/devx/test_pause_from_non_trio.py @@ -6,6 +6,9 @@ All these tests can be understood (somewhat) by running the equivalent `examples/debugging/` scripts manually. ''' +from contextlib import ( + contextmanager as cm, +) # from functools import partial # import itertools import time @@ -15,7 +18,7 @@ import time import pytest from pexpect.exceptions import ( - # TIMEOUT, + TIMEOUT, EOF, ) @@ -32,7 +35,23 @@ from .conftest import ( # _repl_fail_msg, ) +@cm +def maybe_expect_timeout( + ctlc: bool = False, +) -> None: + try: + yield + except TIMEOUT: + # breakpoint() + if ctlc: + pytest.xfail( + 'Some kinda redic threading SIGINT bug i think?\n' + 'See the notes in `examples/debugging/sync_bp.py`..\n' + ) + raise + +@pytest.mark.ctlcs_bish def test_pause_from_sync( spawn, ctlc: bool, @@ -67,10 +86,10 @@ def test_pause_from_sync( child.expect(PROMPT) # XXX shouldn't see gb loaded message with PDB loglevel! - assert not in_prompt_msg( - child, - ['`greenback` portal opened!'], - ) + # assert not in_prompt_msg( + # child, + # ['`greenback` portal opened!'], + # ) # should be same root task assert_before( child, @@ -162,7 +181,14 @@ def test_pause_from_sync( ) child.sendline('c') - child.expect(EOF) + + # XXX TODO, weird threading bug it seems despite the + # `abandon_on_cancel: bool` setting to + # `trio.to_thread.run_sync()`.. + with maybe_expect_timeout( + ctlc=ctlc, + ): + child.expect(EOF) def expect_any_of( @@ -220,8 +246,10 @@ def expect_any_of( return expected_patts +@pytest.mark.ctlcs_bish def test_sync_pause_from_aio_task( spawn, + ctlc: bool # ^TODO, fix for `asyncio`!! ): @@ -270,10 +298,12 @@ def test_sync_pause_from_aio_task( # error raised in `asyncio.Task` "raise ValueError('asyncio side error!')": [ _crash_msg, - 'return await chan.receive()', # `.to_asyncio` impl internals in tb " Date: Wed, 5 Mar 2025 12:39:16 -0500 Subject: [PATCH 279/305] Bind another `_bexc` for debuggin --- tractor/_context.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index eb66aade..4628b11f 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -1561,12 +1561,12 @@ class Context: strict_pld_parity=strict_pld_parity, hide_tb=hide_tb, ) - except BaseException as err: + except BaseException as _bexc: + err = _bexc if not isinstance(err, MsgTypeError): __tracebackhide__: bool = False - raise - + raise err # TODO: maybe a flag to by-pass encode op if already done # here in caller? -- 2.34.1 From 1c2e174406b52883c7b3030fd86eb6be2f644cf3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Mar 2025 11:51:24 -0400 Subject: [PATCH 280/305] Bump to `msgspec>=0.19.0` for py 3.13 support! --- pyproject.toml | 4 +--- uv.lock | 28 ++++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b1792340..61272a60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "tabcompleter>=1.4.0", # typed IPC msging # TODO, get back on release once 3.13 support is out! - "msgspec", + "msgspec>=0.19.0", ] # ------ project ------ @@ -70,8 +70,6 @@ dev = [ # ------ dependency-groups ------ [tool.uv.sources] -msgspec = { git = "https://github.com/jcrist/msgspec.git" } - # XXX NOTE, only for @goodboy's hacking on `pprint(sort_dicts=False)` # for the `pp` alias.. # pdbp = { path = "../pdbp", editable = true } diff --git a/uv.lock b/uv.lock index 277152c5..6b12137e 100644 --- a/uv.lock +++ b/uv.lock @@ -147,7 +147,31 @@ wheels = [ [[package]] name = "msgspec" version = "0.19.0" -source = { git = "https://github.com/jcrist/msgspec.git#dd965dce22e5278d4935bea923441ecde31b5325" } +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/9b/95d8ce458462b8b71b8a70fa94563b2498b89933689f3a7b8911edfae3d7/msgspec-0.19.0.tar.gz", hash = "sha256:604037e7cd475345848116e89c553aa9a233259733ab51986ac924ab1b976f8e", size = 216934 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/d4/2ec2567ac30dab072cce3e91fb17803c52f0a37aab6b0c24375d2b20a581/msgspec-0.19.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa77046904db764b0462036bc63ef71f02b75b8f72e9c9dd4c447d6da1ed8f8e", size = 187939 }, + { url = "https://files.pythonhosted.org/packages/2b/c0/18226e4328897f4f19875cb62bb9259fe47e901eade9d9376ab5f251a929/msgspec-0.19.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:047cfa8675eb3bad68722cfe95c60e7afabf84d1bd8938979dd2b92e9e4a9551", size = 182202 }, + { url = "https://files.pythonhosted.org/packages/81/25/3a4b24d468203d8af90d1d351b77ea3cffb96b29492855cf83078f16bfe4/msgspec-0.19.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e78f46ff39a427e10b4a61614a2777ad69559cc8d603a7c05681f5a595ea98f7", size = 209029 }, + { url = "https://files.pythonhosted.org/packages/85/2e/db7e189b57901955239f7689b5dcd6ae9458637a9c66747326726c650523/msgspec-0.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c7adf191e4bd3be0e9231c3b6dc20cf1199ada2af523885efc2ed218eafd011", size = 210682 }, + { url = "https://files.pythonhosted.org/packages/03/97/7c8895c9074a97052d7e4a1cc1230b7b6e2ca2486714eb12c3f08bb9d284/msgspec-0.19.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f04cad4385e20be7c7176bb8ae3dca54a08e9756cfc97bcdb4f18560c3042063", size = 214003 }, + { url = "https://files.pythonhosted.org/packages/61/61/e892997bcaa289559b4d5869f066a8021b79f4bf8e955f831b095f47a4cd/msgspec-0.19.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45c8fb410670b3b7eb884d44a75589377c341ec1392b778311acdbfa55187716", size = 216833 }, + { url = "https://files.pythonhosted.org/packages/ce/3d/71b2dffd3a1c743ffe13296ff701ee503feaebc3f04d0e75613b6563c374/msgspec-0.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:70eaef4934b87193a27d802534dc466778ad8d536e296ae2f9334e182ac27b6c", size = 186184 }, + { url = "https://files.pythonhosted.org/packages/b2/5f/a70c24f075e3e7af2fae5414c7048b0e11389685b7f717bb55ba282a34a7/msgspec-0.19.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f98bd8962ad549c27d63845b50af3f53ec468b6318400c9f1adfe8b092d7b62f", size = 190485 }, + { url = "https://files.pythonhosted.org/packages/89/b0/1b9763938cfae12acf14b682fcf05c92855974d921a5a985ecc197d1c672/msgspec-0.19.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:43bbb237feab761b815ed9df43b266114203f53596f9b6e6f00ebd79d178cdf2", size = 183910 }, + { url = "https://files.pythonhosted.org/packages/87/81/0c8c93f0b92c97e326b279795f9c5b956c5a97af28ca0fbb9fd86c83737a/msgspec-0.19.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cfc033c02c3e0aec52b71710d7f84cb3ca5eb407ab2ad23d75631153fdb1f12", size = 210633 }, + { url = "https://files.pythonhosted.org/packages/d0/ef/c5422ce8af73928d194a6606f8ae36e93a52fd5e8df5abd366903a5ca8da/msgspec-0.19.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d911c442571605e17658ca2b416fd8579c5050ac9adc5e00c2cb3126c97f73bc", size = 213594 }, + { url = "https://files.pythonhosted.org/packages/19/2b/4137bc2ed45660444842d042be2cf5b18aa06efd2cda107cff18253b9653/msgspec-0.19.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:757b501fa57e24896cf40a831442b19a864f56d253679f34f260dcb002524a6c", size = 214053 }, + { url = "https://files.pythonhosted.org/packages/9d/e6/8ad51bdc806aac1dc501e8fe43f759f9ed7284043d722b53323ea421c360/msgspec-0.19.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5f0f65f29b45e2816d8bded36e6b837a4bf5fb60ec4bc3c625fa2c6da4124537", size = 219081 }, + { url = "https://files.pythonhosted.org/packages/b1/ef/27dd35a7049c9a4f4211c6cd6a8c9db0a50647546f003a5867827ec45391/msgspec-0.19.0-cp312-cp312-win_amd64.whl", hash = "sha256:067f0de1c33cfa0b6a8206562efdf6be5985b988b53dd244a8e06f993f27c8c0", size = 187467 }, + { url = "https://files.pythonhosted.org/packages/3c/cb/2842c312bbe618d8fefc8b9cedce37f773cdc8fa453306546dba2c21fd98/msgspec-0.19.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f12d30dd6266557aaaf0aa0f9580a9a8fbeadfa83699c487713e355ec5f0bd86", size = 190498 }, + { url = "https://files.pythonhosted.org/packages/58/95/c40b01b93465e1a5f3b6c7d91b10fb574818163740cc3acbe722d1e0e7e4/msgspec-0.19.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82b2c42c1b9ebc89e822e7e13bbe9d17ede0c23c187469fdd9505afd5a481314", size = 183950 }, + { url = "https://files.pythonhosted.org/packages/e8/f0/5b764e066ce9aba4b70d1db8b087ea66098c7c27d59b9dd8a3532774d48f/msgspec-0.19.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19746b50be214a54239aab822964f2ac81e38b0055cca94808359d779338c10e", size = 210647 }, + { url = "https://files.pythonhosted.org/packages/9d/87/bc14f49bc95c4cb0dd0a8c56028a67c014ee7e6818ccdce74a4862af259b/msgspec-0.19.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60ef4bdb0ec8e4ad62e5a1f95230c08efb1f64f32e6e8dd2ced685bcc73858b5", size = 213563 }, + { url = "https://files.pythonhosted.org/packages/53/2f/2b1c2b056894fbaa975f68f81e3014bb447516a8b010f1bed3fb0e016ed7/msgspec-0.19.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac7f7c377c122b649f7545810c6cd1b47586e3aa3059126ce3516ac7ccc6a6a9", size = 213996 }, + { url = "https://files.pythonhosted.org/packages/aa/5a/4cd408d90d1417e8d2ce6a22b98a6853c1b4d7cb7669153e4424d60087f6/msgspec-0.19.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5bc1472223a643f5ffb5bf46ccdede7f9795078194f14edd69e3aab7020d327", size = 219087 }, + { url = "https://files.pythonhosted.org/packages/23/d8/f15b40611c2d5753d1abb0ca0da0c75348daf1252220e5dda2867bd81062/msgspec-0.19.0-cp313-cp313-win_amd64.whl", hash = "sha256:317050bc0f7739cb30d257ff09152ca309bf5a369854bbf1e57dffc310c1f20f", size = 187432 }, +] [[package]] name = "mypy-extensions" @@ -351,7 +375,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "colorlog", specifier = ">=6.8.2,<7" }, - { name = "msgspec", git = "https://github.com/jcrist/msgspec.git" }, + { name = "msgspec", specifier = ">=0.19.0" }, { name = "pdbp", specifier = ">=1.6,<2" }, { name = "tabcompleter", specifier = ">=1.4.0" }, { name = "tricycle", specifier = ">=0.4.1,<0.5" }, -- 2.34.1 From 4a566546a3eb2cd203ea65647306d21155b10106 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 7 Mar 2025 14:13:36 -0500 Subject: [PATCH 281/305] Finally get type-extended `msgspec` fields workinn By using our new `PldRx` design we can, - pass through the pld-spec & a `dec_hook()` to our `MsgDec` which is used to configure the underlying `.dec: msgspec.msgpack.Decoder` - pass through a `enc_hook()` to `mk_codec()` and use it to conf the equiv `MsgCodec.enc` such that sent msg-plds are converted prior to transport. The trick ended up being just to always union the `mk_dec()` extension-types spec with the normaly with the `msgspec.Raw` pld-spec such that the `dec_hook()` is only invoked for payload types tagged by the encoder/sender side B) A variety of impl tweaks to make it all happen as well as various cleanups in the `.msg._codec` mod include, - `mk_dec()` no defaul `spec` arg, better doc string, accept the new `ext_types` arg, doing the union of that with `msgspec.Raw`. - proto-ed a now unused `mk_boxed_ext_struct()` which will likely get removed since it ended up that our `PayloadMsg` structs already cover the ext-type-hook requirement that the decoder is passed a `.type=msgspec.Struct` of some sort in order for `.dec_hook` to be used. - add a `unpack_spec_types()` util fn for getting the `set[Type]` from from a `Union[Type]` annotation instance. - mk the default `mk_codec(pc_pld_spec = Raw,)` since the `PldRx` design was already passing/overriding it and it doesn't make much sense to use `Any` anymore for the same reason; it will cause various `Context` apis to now break. |_ also accept a `enc_hook()` and `ext_types` which are used to maybe config the `.msgpack.Encoder` - generally tweak a bunch of comments-as-docs and todos namely the ones that are completed after the pld-rx design was implemented. Also, - mask the non-functioning `'defstruct'` approach `inside `.msg.types.mk_msg_spec()` to prep for its removal. Adjust the test suite (rn called `test_caps_based_msging`), - add a new suite `test_custom_extension_types` and move and use the `enc/dec_nsp()` hooks to the mod level for its use. - prolly planning to drop the `test_limit_msgspec` suite since it's mostly replaced by the `test_pldrx_limiting` mod's version? - originally was tweaking a bunch in `test_codec_hooks_mod` but likely it will get mostly rewritten to be simpler and simply verify that ext-typed fields can be used over IPC `Context`s between actors (as originally intended for this sub-suite). --- tests/test_caps_based_msging.py | 359 +++++++++++++++++++++++--------- tractor/msg/__init__.py | 1 + tractor/msg/_codec.py | 273 +++++++++++++++++++----- tractor/msg/_ops.py | 10 +- tractor/msg/types.py | 45 ++-- 5 files changed, 513 insertions(+), 175 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index ba2bb101..3c2a73cb 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -15,13 +15,16 @@ from typing import ( from msgspec import ( structs, msgpack, + Raw, Struct, ValidationError, ) import pytest +import trio import tractor from tractor import ( + Actor, _state, MsgTypeError, Context, @@ -32,7 +35,9 @@ from tractor.msg import ( NamespacePath, MsgCodec, + MsgDec, mk_codec, + mk_dec, apply_codec, current_codec, ) @@ -43,101 +48,34 @@ from tractor.msg.types import ( Started, mk_msg_spec, ) -import trio +from tractor.msg._ops import ( + limit_plds, +) def mk_custom_codec( - pld_spec: Union[Type]|Any, add_hooks: bool, -) -> MsgCodec: +) -> tuple[ + MsgCodec, # encode to send + MsgDec, # pld receive-n-decode +]: ''' Create custom `msgpack` enc/dec-hooks and set a `Decoder` which only loads `pld_spec` (like `NamespacePath`) types. ''' - uid: tuple[str, str] = tractor.current_actor().uid # XXX NOTE XXX: despite defining `NamespacePath` as a type # field on our `PayloadMsg.pld`, we still need a enc/dec_hook() pair # to cast to/from that type on the wire. See the docs: # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types - def enc_nsp(obj: Any) -> Any: - print(f'{uid} ENC HOOK') - match obj: - case NamespacePath(): - print( - f'{uid}: `NamespacePath`-Only ENCODE?\n' - f'obj-> `{obj}`: {type(obj)}\n' - ) - # if type(obj) != NamespacePath: - # breakpoint() - return str(obj) - - print( - f'{uid}\n' - 'CUSTOM ENCODE\n' - f'obj-arg-> `{obj}`: {type(obj)}\n' - ) - logmsg: str = ( - f'{uid}\n' - 'FAILED ENCODE\n' - f'obj-> `{obj}: {type(obj)}`\n' - ) - raise NotImplementedError(logmsg) - - def dec_nsp( - obj_type: Type, - obj: Any, - - ) -> Any: - print( - f'{uid}\n' - 'CUSTOM DECODE\n' - f'type-arg-> {obj_type}\n' - f'obj-arg-> `{obj}`: {type(obj)}\n' - ) - nsp = None - - if ( - obj_type is NamespacePath - and isinstance(obj, str) - and ':' in obj - ): - nsp = NamespacePath(obj) - # TODO: we could built a generic handler using - # JUST matching the obj_type part? - # nsp = obj_type(obj) - - if nsp: - print(f'Returning NSP instance: {nsp}') - return nsp - - logmsg: str = ( - f'{uid}\n' - 'FAILED DECODE\n' - f'type-> {obj_type}\n' - f'obj-arg-> `{obj}`: {type(obj)}\n\n' - f'current codec:\n' - f'{current_codec()}\n' - ) - # TODO: figure out the ignore subsys for this! - # -[ ] option whether to defense-relay backc the msg - # inside an `Invalid`/`Ignore` - # -[ ] how to make this handling pluggable such that a - # `Channel`/`MsgTransport` can intercept and process - # back msgs either via exception handling or some other - # signal? - log.warning(logmsg) - # NOTE: this delivers the invalid - # value up to `msgspec`'s decoding - # machinery for error raising. - return obj - # raise NotImplementedError(logmsg) + # if pld_spec is Any: + # pld_spec = Raw nsp_codec: MsgCodec = mk_codec( - ipc_pld_spec=pld_spec, + # ipc_pld_spec=Raw, # default! # NOTE XXX: the encode hook MUST be used no matter what since # our `NamespacePath` is not any of a `Any` native type nor @@ -153,8 +91,9 @@ def mk_custom_codec( # XXX NOTE: pretty sure this is mutex with the `type=` to # `Decoder`? so it won't work in tandem with the # `ipc_pld_spec` passed above? - dec_hook=dec_nsp if add_hooks else None, + ext_types=[NamespacePath], ) + # dec_hook=dec_nsp if add_hooks else None, return nsp_codec @@ -365,7 +304,7 @@ async def send_back_values( expect_debug: bool, pld_spec_type_strs: list[str], add_hooks: bool, - started_msg_bytes: bytes, + # started_msg_bytes: bytes, expect_ipc_send: dict[str, tuple[Any, bool]], ) -> None: @@ -392,24 +331,36 @@ async def send_back_values( # same as on parent side config. nsp_codec: MsgCodec = mk_custom_codec( - pld_spec=ipc_pld_spec, add_hooks=add_hooks, ) with ( apply_codec(nsp_codec) as codec, + limit_plds(ipc_pld_spec) as codec, ): + # we SHOULD NOT be swapping the global codec since it breaks + # `Context.starte()` roundtripping checks! chk_codec_applied( expect_codec=nsp_codec, - enter_value=codec, ) + # XXX SO NOT THIS! + # chk_codec_applied( + # expect_codec=nsp_codec, + # enter_value=codec, + # ) print( f'{uid}: attempting `Started`-bytes DECODE..\n' ) try: - msg: Started = nsp_codec.decode(started_msg_bytes) - expected_pld_spec_str: str = msg.pld - assert pld_spec_str == expected_pld_spec_str + # msg: Started = nsp_codec.decode(started_msg_bytes) + + ipc_spec: Type = ctx._pld_rx._pld_dec.spec + expected_pld_spec_str: str = str(ipc_spec) + assert ( + pld_spec_str == expected_pld_spec_str + and + ipc_pld_spec == ipc_spec + ) # TODO: maybe we should add our own wrapper error so as to # be interchange-lib agnostic? @@ -427,12 +378,15 @@ async def send_back_values( else: print( f'{uid}: (correctly) unable to DECODE `Started`-bytes\n' - f'{started_msg_bytes}\n' + # f'{started_msg_bytes}\n' ) iter_send_val_items = iter(expect_ipc_send.values()) sent: list[Any] = [] - for send_value, expect_send in iter_send_val_items: + for ( + send_value, + expect_send, + ) in iter_send_val_items: try: print( f'{uid}: attempting to `.started({send_value})`\n' @@ -457,12 +411,13 @@ async def send_back_values( break # move on to streaming block.. - except tractor.MsgTypeError: - await tractor.pause() + except tractor.MsgTypeError as _mte: + mte = _mte + # await tractor.pause() if expect_send: raise RuntimeError( - f'EXPECTED to `.started()` value given spec:\n' + f'EXPECTED to `.started()` value given spec ??\n\n' f'ipc_pld_spec -> {ipc_pld_spec}\n' f'value -> {send_value}: {type(send_value)}\n' ) @@ -530,10 +485,6 @@ async def send_back_values( # ) -def ex_func(*args): - print(f'ex_func({args})') - - @pytest.mark.parametrize( 'ipc_pld_spec', [ @@ -593,7 +544,6 @@ def test_codec_hooks_mod( # - codec modified with hooks -> decode nsp as # `NamespacePath` nsp_codec: MsgCodec = mk_custom_codec( - pld_spec=ipc_pld_spec, add_hooks=add_codec_hooks, ) with apply_codec(nsp_codec) as codec: @@ -609,7 +559,11 @@ def test_codec_hooks_mod( f'ipc_pld_spec: {ipc_pld_spec}\n' ' ------ - ------\n' ) - for val_type_str, val, expect_send in iter_maybe_sends( + for ( + val_type_str, + val, + expect_send, + )in iter_maybe_sends( send_items, ipc_pld_spec, add_codec_hooks=add_codec_hooks, @@ -618,7 +572,10 @@ def test_codec_hooks_mod( f'send_value: {val}: {type(val)} ' f'=> expect_send: {expect_send}\n' ) - expect_ipc_send[val_type_str] = (val, expect_send) + expect_ipc_send[val_type_str] = ( + val, + expect_send, + ) print( report + @@ -627,9 +584,24 @@ def test_codec_hooks_mod( assert len(expect_ipc_send) == len(send_items) # now try over real IPC with a the subactor # expect_ipc_rountrip: bool = True + + if ( + subtypes := getattr( + ipc_pld_spec, '__args__', False + ) + ): + pld_types_str: str = '|'.join(subtypes) + breakpoint() + else: + pld_types_str: str = ipc_pld_spec.__name__ + expected_started = Started( cid='cid', - pld=str(ipc_pld_spec), + # pld=str(pld_types_str), + pld=ipc_pld_spec, + ) + started_msg_bytes: bytes = nsp_codec.encode( + expected_started, ) # build list of values we expect to receive from # the subactor. @@ -655,7 +627,7 @@ def test_codec_hooks_mod( expect_debug=debug_mode, pld_spec_type_strs=pld_spec_type_strs, add_hooks=add_codec_hooks, - started_msg_bytes=nsp_codec.encode(expected_started), + started_msg_bytes=started_msg_bytes, # XXX NOTE bc we send a `NamespacePath` in this kwarg expect_ipc_send=expect_ipc_send, @@ -673,6 +645,8 @@ def test_codec_hooks_mod( # test with `limit_msg_spec()` above? # await tractor.pause() print('PARENT opening IPC ctx!\n') + ctx: tractor.Context + ipc: tractor.MsgStream async with ( # XXX should raise an mte (`MsgTypeError`) @@ -877,6 +851,10 @@ def chk_pld_type( def test_limit_msgspec( debug_mode: bool, ): + ''' + Verify that type-limiting the + + ''' async def main(): async with tractor.open_root_actor( debug_mode=debug_mode, @@ -915,3 +893,188 @@ def test_limit_msgspec( # breakpoint() trio.run(main) + + +def enc_nsp(obj: Any) -> Any: + actor: Actor = tractor.current_actor( + err_on_no_runtime=False, + ) + uid: tuple[str, str]|None = None if not actor else actor.uid + print(f'{uid} ENC HOOK') + + match obj: + # case NamespacePath()|str(): + case NamespacePath(): + encoded: str = str(obj) + print( + f'----- ENCODING `NamespacePath` as `str` ------\n' + f'|_obj:{type(obj)!r} = {obj!r}\n' + f'|_encoded: str = {encoded!r}\n' + ) + # if type(obj) != NamespacePath: + # breakpoint() + return encoded + case _: + logmsg: str = ( + f'{uid}\n' + 'FAILED ENCODE\n' + f'obj-> `{obj}: {type(obj)}`\n' + ) + raise NotImplementedError(logmsg) + + +def dec_nsp( + obj_type: Type, + obj: Any, + +) -> Any: + # breakpoint() + actor: Actor = tractor.current_actor( + err_on_no_runtime=False, + ) + uid: tuple[str, str]|None = None if not actor else actor.uid + print( + f'{uid}\n' + 'CUSTOM DECODE\n' + f'type-arg-> {obj_type}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n' + ) + nsp = None + # XXX, never happens right? + if obj_type is Raw: + breakpoint() + + if ( + obj_type is NamespacePath + and isinstance(obj, str) + and ':' in obj + ): + nsp = NamespacePath(obj) + # TODO: we could built a generic handler using + # JUST matching the obj_type part? + # nsp = obj_type(obj) + + if nsp: + print(f'Returning NSP instance: {nsp}') + return nsp + + logmsg: str = ( + f'{uid}\n' + 'FAILED DECODE\n' + f'type-> {obj_type}\n' + f'obj-arg-> `{obj}`: {type(obj)}\n\n' + f'current codec:\n' + f'{current_codec()}\n' + ) + # TODO: figure out the ignore subsys for this! + # -[ ] option whether to defense-relay backc the msg + # inside an `Invalid`/`Ignore` + # -[ ] how to make this handling pluggable such that a + # `Channel`/`MsgTransport` can intercept and process + # back msgs either via exception handling or some other + # signal? + log.warning(logmsg) + # NOTE: this delivers the invalid + # value up to `msgspec`'s decoding + # machinery for error raising. + return obj + # raise NotImplementedError(logmsg) + + +def ex_func(*args): + ''' + A mod level func we can ref and load via our `NamespacePath` + python-object pointer `str` subtype. + + ''' + print(f'ex_func({args})') + + +@pytest.mark.parametrize( + 'add_codec_hooks', + [ + True, + False, + ], + ids=['use_codec_hooks', 'no_codec_hooks'], +) +def test_custom_extension_types( + debug_mode: bool, + add_codec_hooks: bool +): + ''' + Verify that a `MsgCodec` (used for encoding all outbound IPC msgs + and decoding all inbound `PayloadMsg`s) and a paired `MsgDec` + (used for decoding the `PayloadMsg.pld: Raw` received within a given + task's ipc `Context` scope) can both send and receive "extension types" + as supported via custom converter hooks passed to `msgspec`. + + ''' + nsp_pld_dec: MsgDec = mk_dec( + spec=None, # ONLY support the ext type + dec_hook=dec_nsp if add_codec_hooks else None, + ext_types=[NamespacePath], + ) + nsp_codec: MsgCodec = mk_codec( + # ipc_pld_spec=Raw, # default! + + # NOTE XXX: the encode hook MUST be used no matter what since + # our `NamespacePath` is not any of a `Any` native type nor + # a `msgspec.Struct` subtype - so `msgspec` has no way to know + # how to encode it unless we provide the custom hook. + # + # AGAIN that is, regardless of whether we spec an + # `Any`-decoded-pld the enc has no knowledge (by default) + # how to enc `NamespacePath` (nsp), so we add a custom + # hook to do that ALWAYS. + enc_hook=enc_nsp if add_codec_hooks else None, + + # XXX NOTE: pretty sure this is mutex with the `type=` to + # `Decoder`? so it won't work in tandem with the + # `ipc_pld_spec` passed above? + ext_types=[NamespacePath], + + # TODO? is it useful to have the `.pld` decoded *prior* to + # the `PldRx`?? like perf or mem related? + # ext_dec=nsp_pld_dec, + ) + if add_codec_hooks: + assert nsp_codec.dec.dec_hook is None + + # TODO? if we pass `ext_dec` above? + # assert nsp_codec.dec.dec_hook is dec_nsp + + assert nsp_codec.enc.enc_hook is enc_nsp + + nsp = NamespacePath.from_ref(ex_func) + + try: + nsp_bytes: bytes = nsp_codec.encode(nsp) + nsp_rt_sin_msg = nsp_pld_dec.decode(nsp_bytes) + nsp_rt_sin_msg.load_ref() is ex_func + except TypeError: + if not add_codec_hooks: + pass + + try: + msg_bytes: bytes = nsp_codec.encode( + Started( + cid='cid', + pld=nsp, + ) + ) + # since the ext-type obj should also be set as the msg.pld + assert nsp_bytes in msg_bytes + started_rt: Started = nsp_codec.decode(msg_bytes) + pld: Raw = started_rt.pld + assert isinstance(pld, Raw) + nsp_rt: NamespacePath = nsp_pld_dec.decode(pld) + assert isinstance(nsp_rt, NamespacePath) + # in obj comparison terms they should be the same + assert nsp_rt == nsp + # ensure we've decoded to ext type! + assert nsp_rt.load_ref() is ex_func + + except TypeError: + if not add_codec_hooks: + pass diff --git a/tractor/msg/__init__.py b/tractor/msg/__init__.py index 44586f2d..88220054 100644 --- a/tractor/msg/__init__.py +++ b/tractor/msg/__init__.py @@ -33,6 +33,7 @@ from ._codec import ( apply_codec as apply_codec, mk_codec as mk_codec, + mk_dec as mk_dec, MsgCodec as MsgCodec, MsgDec as MsgDec, current_codec as current_codec, diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 32f690f1..46716d4c 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -61,6 +61,7 @@ from tractor.msg.pretty_struct import Struct from tractor.msg.types import ( mk_msg_spec, MsgType, + PayloadMsg, ) from tractor.log import get_logger @@ -80,6 +81,7 @@ class MsgDec(Struct): ''' _dec: msgpack.Decoder + # _ext_types_box: Struct|None = None @property def dec(self) -> msgpack.Decoder: @@ -179,23 +181,122 @@ class MsgDec(Struct): def mk_dec( - spec: Union[Type[Struct]]|Any = Any, + spec: Union[Type[Struct]]|Type|None, + + # NOTE, required for ad-hoc type extensions to the underlying + # serialization proto (which is default `msgpack`), + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types dec_hook: Callable|None = None, + ext_types: list[Type]|None = None, ) -> MsgDec: ''' - Create an IPC msg decoder, normally used as the - `PayloadMsg.pld: PayloadT` field decoder inside a `PldRx`. + Create an IPC msg decoder, a slightly higher level wrapper around + a `msgspec.msgpack.Decoder` which provides, + + - easier introspection of the underlying type spec via + the `.spec` and `.spec_str` attrs, + - `.hook` access to the `Decoder.dec_hook()`, + - automatic custom extension-types decode support when + `dec_hook()` is provided such that any `PayloadMsg.pld` tagged + as a type from from `ext_types` (presuming the `MsgCodec.encode()` also used + a `.enc_hook()`) is processed and constructed by a `PldRx` implicitily. + + NOTE, as mentioned a `MsgDec` is normally used for `PayloadMsg.pld: PayloadT` field + decoding inside an IPC-ctx-oriented `PldRx`. ''' + if ( + spec is None + and + ext_types is None + ): + raise ValueError( + f'You must provide a type-spec for a msg decoder!\n' + f'The only time `spec=None` is permitted is if custom extension types ' + f'are expected to be supported, in which case `ext_types` must be non-`None`' + f'and it is presumed that only the `ext_types` (supported by the paired `dec_hook()`) ' + f'will be permitted within the type-`spec`!\n' + + f'tpec = {spec!r}\n' + f'dec_hook = {dec_hook!r}\n' + f'ext_types = {ext_types!r}\n' + ) + + if dec_hook: + if ext_types is None: + raise ValueError( + f'If extending the serializable types with a custom decoder hook, ' + f'you must also provide the expected type set `dec_hook()` will handle ' + f'via the `ext_types: Union[Type]|None = None` argument!\n' + f'dec_hook = {dec_hook!r}\n' + f'ext_types = {ext_types!r}\n' + ) + + # XXX, i *thought* we would require a boxing struct as per docs, + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + # |_ see comment, + # > Note that typed deserialization is required for + # > successful roundtripping here, so we pass `MyMessage` to + # > `Decoder`. + # + # BUT, turns out as long as you spec a union with `Raw` it + # will work? kk B) + # + # maybe_box_struct = mk_boxed_ext_struct(ext_types) + spec = Raw | Union[*ext_types] + return MsgDec( _dec=msgpack.Decoder( type=spec, # like `MsgType[Any]` dec_hook=dec_hook, - ) + ), ) +# TODO? remove since didn't end up needing this? +def mk_boxed_ext_struct( + ext_types: list[Type], +) -> Struct: + # NOTE, originally was to wrap non-msgpack-supported "extension + # types" in a field-typed boxing struct, see notes around the + # `dec_hook()` branch in `mk_dec()`. + ext_types_union = Union[*ext_types] + repr_ext_types_union: str = ( + str(ext_types_union) + or + "|".join(ext_types) + ) + BoxedExtType = msgspec.defstruct( + f'BoxedExts[{repr_ext_types_union}]', + fields=[ + ('boxed', ext_types_union), + ], + ) + return BoxedExtType + + +def unpack_spec_types( + spec: Union[Type]|Type, +) -> set[Type]: + ''' + Given an input type-`spec`, either a lone type + or a `Union` of types (like `str|int|MyThing`), + return a set of individual types. + + When `spec` is not a type-union returns `{spec,}`. + + ''' + spec_subtypes: set[Union[Type]] = ( + getattr( + spec, + '__args__', + {spec,}, + ) + ) + return spec_subtypes + + def mk_msgspec_table( dec: msgpack.Decoder, msg: MsgType|None = None, @@ -273,6 +374,8 @@ class MsgCodec(Struct): _dec: msgpack.Decoder _pld_spec: Type[Struct]|Raw|Any + # _ext_types_box: Struct|None = None + def __repr__(self) -> str: speclines: str = textwrap.indent( pformat_msgspec(codec=self), @@ -339,12 +442,14 @@ class MsgCodec(Struct): def encode( self, - py_obj: Any, + py_obj: Any|PayloadMsg, use_buf: bool = False, # ^-XXX-^ uhh why am i getting this? # |_BufferError: Existing exports of data: object cannot be re-sized + as_ext_type: bool = False, + ) -> bytes: ''' Encode input python objects to `msgpack` bytes for @@ -357,8 +462,33 @@ class MsgCodec(Struct): if use_buf: self._enc.encode_into(py_obj, self._buf) return self._buf - else: - return self._enc.encode(py_obj) + + return self._enc.encode(py_obj) + # TODO! REMOVE once i'm confident we won't ever need it! + # + # box: Struct = self._ext_types_box + # if ( + # as_ext_type + # or + # ( + # # XXX NOTE, auto-detect if the input type + # box + # and + # (ext_types := unpack_spec_types( + # spec=box.__annotations__['boxed']) + # ) + # ) + # ): + # match py_obj: + # # case PayloadMsg(pld=pld) if ( + # # type(pld) in ext_types + # # ): + # # py_obj.pld = box(boxed=py_obj) + # # breakpoint() + # case _ if ( + # type(py_obj) in ext_types + # ): + # py_obj = box(boxed=py_obj) @property def dec(self) -> msgpack.Decoder: @@ -378,21 +508,30 @@ class MsgCodec(Struct): return self._dec.decode(msg) -# [x] TODO: a sub-decoder system as well? => No! +# ?TODO? time to remove this finally? +# +# -[x] TODO: a sub-decoder system as well? +# => No! already re-architected to include a "payload-receiver" +# now found in `._ops`. # # -[x] do we still want to try and support the sub-decoder with # `.Raw` technique in the case that the `Generic` approach gives # future grief? -# => NO, since we went with the `PldRx` approach instead B) +# => well YES but NO, since we went with the `PldRx` approach +# instead! # # IF however you want to see the code that was staged for this # from wayyy back, see the pure removal commit. def mk_codec( - # struct type unions set for `Decoder` - # https://jcristharif.com/msgspec/structs.html#tagged-unions - ipc_pld_spec: Union[Type[Struct]]|Any = Any, + ipc_pld_spec: Union[Type[Struct]]|Any|Raw = Raw, + # tagged-struct-types-union set for `Decoder`ing of payloads, as + # per https://jcristharif.com/msgspec/structs.html#tagged-unions. + # NOTE that the default `Raw` here **is very intentional** since + # the `PldRx._pld_dec: MsgDec` is responsible for per ipc-ctx-task + # decoding of msg-specs defined by the user as part of **their** + # `tractor` "app's" type-limited IPC msg-spec. # TODO: offering a per-msg(-field) type-spec such that # the fields can be dynamically NOT decoded and left as `Raw` @@ -405,13 +544,18 @@ def mk_codec( libname: str = 'msgspec', - # proxy as `Struct(**kwargs)` for ad-hoc type extensions + # settings for encoding-to-send extension-types, # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types - # ------ - ------ - dec_hook: Callable|None = None, + # dec_hook: Callable|None = None, enc_hook: Callable|None = None, - # ------ - ------ + ext_types: list[Type]|None = None, + + # optionally provided msg-decoder from which we pull its, + # |_.dec_hook() + # |_.type + ext_dec: MsgDec|None = None # + # ?TODO? other params we might want to support # Encoder: # write_buffer_size=write_buffer_size, # @@ -425,26 +569,43 @@ def mk_codec( `msgspec` ;). ''' - # (manually) generate a msg-payload-spec for all relevant - # god-boxing-msg subtypes, parameterizing the `PayloadMsg.pld: PayloadT` - # for the decoder such that all sub-type msgs in our SCIPP - # will automatically decode to a type-"limited" payload (`Struct`) - # object (set). + pld_spec = ipc_pld_spec + if enc_hook: + if not ext_types: + raise ValueError( + f'If extending the serializable types with a custom decoder hook, ' + f'you must also provide the expected type set `enc_hook()` will handle ' + f'via the `ext_types: Union[Type]|None = None` argument!\n' + f'enc_hook = {enc_hook!r}\n' + f'ext_types = {ext_types!r}\n' + ) + + dec_hook: Callable|None = None + if ext_dec: + dec: msgspec.Decoder = ext_dec.dec + dec_hook = dec.dec_hook + pld_spec |= dec.type + if ext_types: + pld_spec |= Union[*ext_types] + + # (manually) generate a msg-spec (how appropes) for all relevant + # payload-boxing-struct-msg-types, parameterizing the + # `PayloadMsg.pld: PayloadT` for the decoder such that all msgs + # in our SC-RPC-protocol will automatically decode to + # a type-"limited" payload (`Struct`) object (set). ( ipc_msg_spec, msg_types, ) = mk_msg_spec( - payload_type_union=ipc_pld_spec, + payload_type_union=pld_spec, ) - assert len(ipc_msg_spec.__args__) == len(msg_types) - assert ipc_msg_spec - # TODO: use this shim instead? - # bc.. unification, err somethin? - # dec: MsgDec = mk_dec( - # spec=ipc_msg_spec, - # dec_hook=dec_hook, - # ) + msg_spec_types: set[Type] = unpack_spec_types(ipc_msg_spec) + assert ( + len(ipc_msg_spec.__args__) == len(msg_types) + and + len(msg_spec_types) == len(msg_types) + ) dec = msgpack.Decoder( type=ipc_msg_spec, @@ -453,22 +614,29 @@ def mk_codec( enc = msgpack.Encoder( enc_hook=enc_hook, ) - codec = MsgCodec( _enc=enc, _dec=dec, - _pld_spec=ipc_pld_spec, + _pld_spec=pld_spec, ) - # sanity on expected backend support assert codec.lib.__name__ == libname - return codec # instance of the default `msgspec.msgpack` codec settings, i.e. # no custom structs, hooks or other special types. -_def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) +# +# XXX NOTE XXX, this will break our `Context.start()` call! +# +# * by default we roundtrip the started pld-`value` and if you apply +# this codec (globally anyway with `apply_codec()`) then the +# `roundtripped` value will include a non-`.pld: Raw` which will +# then type-error on the consequent `._ops.validte_payload_msg()`.. +# +_def_msgspec_codec: MsgCodec = mk_codec( + ipc_pld_spec=Any, +) # The built-in IPC `Msg` spec. # Our composing "shuttle" protocol which allows `tractor`-app code @@ -476,13 +644,13 @@ _def_msgspec_codec: MsgCodec = mk_codec(ipc_pld_spec=Any) # https://jcristharif.com/msgspec/supported-types.html # _def_tractor_codec: MsgCodec = mk_codec( - # TODO: use this for debug mode locking prot? - # ipc_pld_spec=Any, - ipc_pld_spec=Raw, + ipc_pld_spec=Raw, # XXX should be default righ!? ) -# TODO: IDEALLY provides for per-`trio.Task` specificity of the + +# -[x] TODO, IDEALLY provides for per-`trio.Task` specificity of the # IPC msging codec used by the transport layer when doing # `Channel.send()/.recv()` of wire data. +# => impled as our `PldRx` which is `Context` scoped B) # ContextVar-TODO: DIDN'T WORK, kept resetting in every new task to default!? # _ctxvar_MsgCodec: ContextVar[MsgCodec] = ContextVar( @@ -559,17 +727,6 @@ def apply_codec( ) token: Token = var.set(codec) - # ?TODO? for TreeVar approach which copies from the - # cancel-scope of the prior value, NOT the prior task - # See the docs: - # - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables - # - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py - # ^- see docs for @cm `.being()` API - # with _ctxvar_MsgCodec.being(codec): - # new = _ctxvar_MsgCodec.get() - # assert new is codec - # yield codec - try: yield var.get() finally: @@ -580,6 +737,19 @@ def apply_codec( ) assert var.get() is orig + # ?TODO? for TreeVar approach which copies from the + # cancel-scope of the prior value, NOT the prior task + # + # See the docs: + # - https://tricycle.readthedocs.io/en/latest/reference.html#tree-variables + # - https://github.com/oremanj/tricycle/blob/master/tricycle/_tests/test_tree_var.py + # ^- see docs for @cm `.being()` API + # + # with _ctxvar_MsgCodec.being(codec): + # new = _ctxvar_MsgCodec.get() + # assert new is codec + # yield codec + def current_codec() -> MsgCodec: ''' @@ -599,6 +769,7 @@ def limit_msg_spec( # -> related to the `MsgCodec._payload_decs` stuff above.. # tagged_structs: list[Struct]|None = None, + hide_tb: bool = True, **codec_kwargs, ) -> MsgCodec: @@ -609,7 +780,7 @@ def limit_msg_spec( for all IPC contexts in use by the current `trio.Task`. ''' - __tracebackhide__: bool = True + __tracebackhide__: bool = hide_tb curr_codec: MsgCodec = current_codec() msgspec_codec: MsgCodec = mk_codec( ipc_pld_spec=payload_spec, diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index dc632217..6f178ba5 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -50,7 +50,9 @@ from tractor._exceptions import ( _mk_recv_mte, pack_error, ) -from tractor._state import current_ipc_ctx +from tractor._state import ( + current_ipc_ctx, +) from ._codec import ( mk_dec, MsgDec, @@ -78,7 +80,7 @@ if TYPE_CHECKING: log = get_logger(__name__) -_def_any_pldec: MsgDec[Any] = mk_dec() +_def_any_pldec: MsgDec[Any] = mk_dec(spec=Any) class PldRx(Struct): @@ -148,6 +150,10 @@ class PldRx(Struct): exit. ''' + # TODO, ensure we pull the current `MsgCodec`'s custom + # dec/enc_hook settings as well ? + # -[ ] see `._codec.mk_codec()` inputs + # orig_dec: MsgDec = self._pld_dec limit_dec: MsgDec = mk_dec( spec=spec, diff --git a/tractor/msg/types.py b/tractor/msg/types.py index 0904411f..1cc8b78e 100644 --- a/tractor/msg/types.py +++ b/tractor/msg/types.py @@ -599,15 +599,15 @@ def mk_msg_spec( Msg[payload_type_union], Generic[PayloadT], ) - defstruct_bases: tuple = ( - Msg, # [payload_type_union], - # Generic[PayloadT], - # ^-XXX-^: not allowed? lul.. - ) + # defstruct_bases: tuple = ( + # Msg, # [payload_type_union], + # # Generic[PayloadT], + # # ^-XXX-^: not allowed? lul.. + # ) ipc_msg_types: list[Msg] = [] idx_msg_types: list[Msg] = [] - defs_msg_types: list[Msg] = [] + # defs_msg_types: list[Msg] = [] nc_msg_types: list[Msg] = [] for msgtype in __msg_types__: @@ -625,7 +625,7 @@ def mk_msg_spec( # TODO: wait why do we need the dynamic version here? # XXX ANSWER XXX -> BC INHERITANCE.. don't work w generics.. # - # NOTE previously bc msgtypes WERE NOT inheritting + # NOTE previously bc msgtypes WERE NOT inheriting # directly the `Generic[PayloadT]` type, the manual method # of generic-paraming with `.__class_getitem__()` wasn't # working.. @@ -662,38 +662,35 @@ def mk_msg_spec( # with `msgspec.structs.defstruct` # XXX ALSO DOESN'T WORK - defstruct_msgtype = defstruct( - name=msgtype.__name__, - fields=[ - ('cid', str), + # defstruct_msgtype = defstruct( + # name=msgtype.__name__, + # fields=[ + # ('cid', str), - # XXX doesn't seem to work.. - # ('pld', PayloadT), - - ('pld', payload_type_union), - ], - bases=defstruct_bases, - ) - defs_msg_types.append(defstruct_msgtype) + # # XXX doesn't seem to work.. + # # ('pld', PayloadT), + # ('pld', payload_type_union), + # ], + # bases=defstruct_bases, + # ) + # defs_msg_types.append(defstruct_msgtype) # assert index_paramed_msg_type == manual_paramed_msg_subtype - # paramed_msg_type = manual_paramed_msg_subtype - # ipc_payload_msgs_type_union |= index_paramed_msg_type idx_spec: Union[Type[Msg]] = Union[*idx_msg_types] - def_spec: Union[Type[Msg]] = Union[*defs_msg_types] + # def_spec: Union[Type[Msg]] = Union[*defs_msg_types] nc_spec: Union[Type[Msg]] = Union[*nc_msg_types] specs: dict[str, Union[Type[Msg]]] = { 'indexed_generics': idx_spec, - 'defstruct': def_spec, + # 'defstruct': def_spec, 'types_new_class': nc_spec, } msgtypes_table: dict[str, list[Msg]] = { 'indexed_generics': idx_msg_types, - 'defstruct': defs_msg_types, + # 'defstruct': defs_msg_types, 'types_new_class': nc_msg_types, } -- 2.34.1 From 84be5cc549f40728ddd62b47b553b73573b78428 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Fri, 7 Mar 2025 14:38:22 -0500 Subject: [PATCH 282/305] Move `Union` serializers to new `msg.` mod Namely moving `enc/dec_type_union()` from the test mod to a new `tractor.msg._exts` for general use outside the test suite. --- tests/test_caps_based_msging.py | 70 +++++-------------------- tractor/msg/_exts.py | 90 +++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 57 deletions(-) create mode 100644 tractor/msg/_exts.py diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 3c2a73cb..cdc6d59d 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -5,7 +5,6 @@ Low-level functional audits for our B~) ''' -import typing from typing import ( Any, Type, @@ -32,6 +31,7 @@ from tractor import ( from tractor.msg import ( _codec, _ctxvar_MsgCodec, + _exts, NamespacePath, MsgCodec, @@ -247,57 +247,6 @@ def iter_maybe_sends( ) -def dec_type_union( - type_names: list[str], -) -> Type: - ''' - Look up types by name, compile into a list and then create and - return a `typing.Union` from the full set. - - ''' - import importlib - types: list[Type] = [] - for type_name in type_names: - for mod in [ - typing, - importlib.import_module(__name__), - ]: - if type_ref := getattr( - mod, - type_name, - False, - ): - types.append(type_ref) - - # special case handling only.. - # ipc_pld_spec: Union[Type] = eval( - # pld_spec_str, - # {}, # globals - # {'typing': typing}, # locals - # ) - - return Union[*types] - - -def enc_type_union( - union_or_type: Union[Type]|Type, -) -> list[str]: - ''' - Encode a type-union or single type to a list of type-name-strings - ready for IPC interchange. - - ''' - type_strs: list[str] = [] - for typ in getattr( - union_or_type, - '__args__', - {union_or_type,}, - ): - type_strs.append(typ.__qualname__) - - return type_strs - - @tractor.context async def send_back_values( ctx: Context, @@ -324,7 +273,7 @@ async def send_back_values( ) # load pld spec from input str - ipc_pld_spec = dec_type_union( + ipc_pld_spec = _exts.dec_type_union( pld_spec_type_strs, ) pld_spec_str = str(ipc_pld_spec) @@ -413,7 +362,6 @@ async def send_back_values( except tractor.MsgTypeError as _mte: mte = _mte - # await tractor.pause() if expect_send: raise RuntimeError( @@ -422,6 +370,10 @@ async def send_back_values( f'value -> {send_value}: {type(send_value)}\n' ) + # await tractor.pause() + raise mte + + async with ctx.open_stream() as ipc: print( f'{uid}: Entering streaming block to send remaining values..' @@ -591,8 +543,9 @@ def test_codec_hooks_mod( ) ): pld_types_str: str = '|'.join(subtypes) - breakpoint() + # breakpoint() else: + # TODO, use `.msg._exts` utils instead of this! pld_types_str: str = ipc_pld_spec.__name__ expected_started = Started( @@ -611,7 +564,7 @@ def test_codec_hooks_mod( if expect_send ] - pld_spec_type_strs: list[str] = enc_type_union(ipc_pld_spec) + pld_spec_type_strs: list[str] = _exts.enc_type_union(ipc_pld_spec) # XXX should raise an mte (`MsgTypeError`) # when `add_codec_hooks == False` bc the input @@ -848,11 +801,14 @@ def chk_pld_type( return roundtrip +# ?TODO? remove since covered in the newer `test_pldrx_limiting`? def test_limit_msgspec( debug_mode: bool, ): ''' - Verify that type-limiting the + Internals unit testing to verify that type-limiting an IPC ctx's + msg spec with `Pldrx.limit_plds()` results in various + encapsulated `msgspec` object settings and state. ''' async def main(): diff --git a/tractor/msg/_exts.py b/tractor/msg/_exts.py new file mode 100644 index 00000000..abf7bcde --- /dev/null +++ b/tractor/msg/_exts.py @@ -0,0 +1,90 @@ +# tractor: structured concurrent "actors". +# Copyright 2018-eternity Tyler Goodlet. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +''' +Type-extension-utils for codec-ing (python) objects not +covered by the `msgspec.msgpack` protocol. + +See the various API docs from `msgspec`. + +extending from native types, +- https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + +converters, +- https://jcristharif.com/msgspec/converters.html +- https://jcristharif.com/msgspec/api.html#msgspec.convert + +`Raw` fields, +- https://jcristharif.com/msgspec/api.html#raw +- support for `.convert()` and `Raw`, + |_ https://jcristharif.com/msgspec/changelog.html + +''' +import typing +from typing import ( + Type, + Union, +) + +def dec_type_union( + type_names: list[str], +) -> Type: + ''' + Look up types by name, compile into a list and then create and + return a `typing.Union` from the full set. + + ''' + import importlib + types: list[Type] = [] + for type_name in type_names: + for mod in [ + typing, + importlib.import_module(__name__), + ]: + if type_ref := getattr( + mod, + type_name, + False, + ): + types.append(type_ref) + + # special case handling only.. + # ipc_pld_spec: Union[Type] = eval( + # pld_spec_str, + # {}, # globals + # {'typing': typing}, # locals + # ) + + return Union[*types] + + +def enc_type_union( + union_or_type: Union[Type]|Type, +) -> list[str]: + ''' + Encode a type-union or single type to a list of type-name-strings + ready for IPC interchange. + + ''' + type_strs: list[str] = [] + for typ in getattr( + union_or_type, + '__args__', + {union_or_type,}, + ): + type_strs.append(typ.__qualname__) + + return type_strs -- 2.34.1 From 9199913f70cd80b1cb500eedf6e2e285aa45ac45 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 8 Mar 2025 15:48:18 -0500 Subject: [PATCH 283/305] Tweak type-error messages for when `ext_types` is missing --- tractor/msg/_codec.py | 50 ++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/tractor/msg/_codec.py b/tractor/msg/_codec.py index 46716d4c..1e9623af 100644 --- a/tractor/msg/_codec.py +++ b/tractor/msg/_codec.py @@ -211,24 +211,28 @@ def mk_dec( and ext_types is None ): - raise ValueError( - f'You must provide a type-spec for a msg decoder!\n' - f'The only time `spec=None` is permitted is if custom extension types ' - f'are expected to be supported, in which case `ext_types` must be non-`None`' - f'and it is presumed that only the `ext_types` (supported by the paired `dec_hook()`) ' - f'will be permitted within the type-`spec`!\n' - - f'tpec = {spec!r}\n' + raise TypeError( + f'MIssing type-`spec` for msg decoder!\n' + f'\n' + f'`spec=None` is **only** permitted is if custom extension types ' + f'are provided via `ext_types`, meaning it must be non-`None`.\n' + f'\n' + f'In this case it is presumed that only the `ext_types`, ' + f'which much be handled by a paired `dec_hook()`, ' + f'will be permitted within the payload type-`spec`!\n' + f'\n' + f'spec = {spec!r}\n' f'dec_hook = {dec_hook!r}\n' f'ext_types = {ext_types!r}\n' ) if dec_hook: if ext_types is None: - raise ValueError( - f'If extending the serializable types with a custom decoder hook, ' - f'you must also provide the expected type set `dec_hook()` will handle ' - f'via the `ext_types: Union[Type]|None = None` argument!\n' + raise TypeError( + f'If extending the serializable types with a custom decode hook (`dec_hook()`), ' + f'you must also provide the expected type set that the hook will handle ' + f'via a `ext_types: Union[Type]|None = None` argument!\n' + f'\n' f'dec_hook = {dec_hook!r}\n' f'ext_types = {ext_types!r}\n' ) @@ -287,7 +291,7 @@ def unpack_spec_types( When `spec` is not a type-union returns `{spec,}`. ''' - spec_subtypes: set[Union[Type]] = ( + spec_subtypes: set[Union[Type]] = set( getattr( spec, '__args__', @@ -449,6 +453,7 @@ class MsgCodec(Struct): # |_BufferError: Existing exports of data: object cannot be re-sized as_ext_type: bool = False, + hide_tb: bool = True, ) -> bytes: ''' @@ -459,11 +464,21 @@ class MsgCodec(Struct): https://jcristharif.com/msgspec/perf-tips.html#reusing-an-output-buffer ''' + __tracebackhide__: bool = hide_tb if use_buf: self._enc.encode_into(py_obj, self._buf) return self._buf return self._enc.encode(py_obj) + # try: + # return self._enc.encode(py_obj) + # except TypeError as typerr: + # typerr.add_note( + # '|_src error from `msgspec`' + # # f'|_{self._enc.encode!r}' + # ) + # raise typerr + # TODO! REMOVE once i'm confident we won't ever need it! # # box: Struct = self._ext_types_box @@ -572,10 +587,11 @@ def mk_codec( pld_spec = ipc_pld_spec if enc_hook: if not ext_types: - raise ValueError( - f'If extending the serializable types with a custom decoder hook, ' - f'you must also provide the expected type set `enc_hook()` will handle ' - f'via the `ext_types: Union[Type]|None = None` argument!\n' + raise TypeError( + f'If extending the serializable types with a custom encode hook (`enc_hook()`), ' + f'you must also provide the expected type set that the hook will handle ' + f'via a `ext_types: Union[Type]|None = None` argument!\n' + f'\n' f'enc_hook = {enc_hook!r}\n' f'ext_types = {ext_types!r}\n' ) -- 2.34.1 From fbbecff3945adcb22a0262cb7f523f830b1f795e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 8 Mar 2025 15:49:21 -0500 Subject: [PATCH 284/305] Offer a `mods: list` to `dec_type_union()`; drop importing this-mod --- tractor/msg/_exts.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tractor/msg/_exts.py b/tractor/msg/_exts.py index abf7bcde..31eafb5d 100644 --- a/tractor/msg/_exts.py +++ b/tractor/msg/_exts.py @@ -33,6 +33,9 @@ converters, |_ https://jcristharif.com/msgspec/changelog.html ''' +from types import ( + ModuleType, +) import typing from typing import ( Type, @@ -41,19 +44,20 @@ from typing import ( def dec_type_union( type_names: list[str], -) -> Type: + mods: list[ModuleType] = [] +) -> Type|Union[Type]: ''' Look up types by name, compile into a list and then create and return a `typing.Union` from the full set. ''' - import importlib + # import importlib types: list[Type] = [] for type_name in type_names: for mod in [ typing, - importlib.import_module(__name__), - ]: + # importlib.import_module(__name__), + ] + mods: if type_ref := getattr( mod, type_name, -- 2.34.1 From 123683d4425314cf7ab4a5bdfb800b4869f84887 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 8 Mar 2025 15:50:14 -0500 Subject: [PATCH 285/305] Raise RTE from `limit_plds()` on no `curr_ctx` Since it should only be used from within a `Portal.open_context()` scope, make sure the caller knows that! Also don't hide the frame in tb if the immediate function errors.. --- tractor/msg/_ops.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 6f178ba5..839be532 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -461,11 +461,16 @@ def limit_plds( ''' __tracebackhide__: bool = True + curr_ctx: Context|None = current_ipc_ctx() + if curr_ctx is None: + raise RuntimeError( + 'No IPC `Context` is active !?\n' + 'Did you open `limit_plds()` from outside ' + 'a `Portal.open_context()` scope-block?' + ) try: - curr_ctx: Context = current_ipc_ctx() rx: PldRx = curr_ctx._pld_rx orig_pldec: MsgDec = rx.pld_dec - with rx.limit_plds( spec=spec, **dec_kwargs, @@ -475,6 +480,11 @@ def limit_plds( f'{pldec}\n' ) yield pldec + + except BaseException: + __tracebackhide__: bool = False + raise + finally: log.runtime( 'Reverted to previous payload-decoder\n\n' -- 2.34.1 From c48d153375542f897f87c3aec36f06676020deb4 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 8 Mar 2025 15:52:13 -0500 Subject: [PATCH 286/305] Rework IPC-using `test_caps_basesd_msging` tests Namely renaming and massively simplifying it to a new `test_ext_types_over_ipc` which avoids all the wacky "parent dictates what sender should be able to send beforehand".. Instead keep it simple and just always try to send the same small set of types over the wire with expect-logic to handle each case, - use the new `dec_hook`/`ext_types` args to `mk_[co]dec()` routines for pld-spec ipc transport. - always try to stream a small set of types from the child with logic to handle the cases expected to error. Other, - draft a `test_pld_limiting_usage` to check runtime raising of bad API usage; haven't run it yet tho. - move `test_custom_extension_types` to top of mod so that the `enc/dec_nsp()` hooks can be reffed from test parametrizations. - comment out (and maybe remove) the old routines for `iter_maybe_sends`, `test_limit_msgspec`, `chk_pld_type`. XXX TODO, turns out the 2 failing cases from this suite have exposed an an actual bug with `MsgTypeError` unpacking where the `ipc_msg=` input is being set to `None` ?? -> see the comment at the bottom of `._exceptions._mk_recv_mte()` which seems to describe the likely culprit? --- tests/test_caps_based_msging.py | 1631 ++++++++++++++++--------------- 1 file changed, 825 insertions(+), 806 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index cdc6d59d..4d78a117 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -5,6 +5,11 @@ Low-level functional audits for our B~) ''' +from contextlib import ( + contextmanager as cm, + # nullcontext, +) +import importlib from typing import ( Any, Type, @@ -12,10 +17,10 @@ from typing import ( ) from msgspec import ( - structs, - msgpack, + # structs, + # msgpack, Raw, - Struct, + # Struct, ValidationError, ) import pytest @@ -24,7 +29,7 @@ import trio import tractor from tractor import ( Actor, - _state, + # _state, MsgTypeError, Context, ) @@ -42,815 +47,16 @@ from tractor.msg import ( current_codec, ) from tractor.msg.types import ( - _payload_msgs, log, - PayloadMsg, Started, - mk_msg_spec, + # _payload_msgs, + # PayloadMsg, + # mk_msg_spec, ) from tractor.msg._ops import ( limit_plds, ) - -def mk_custom_codec( - add_hooks: bool, - -) -> tuple[ - MsgCodec, # encode to send - MsgDec, # pld receive-n-decode -]: - ''' - Create custom `msgpack` enc/dec-hooks and set a `Decoder` - which only loads `pld_spec` (like `NamespacePath`) types. - - ''' - - # XXX NOTE XXX: despite defining `NamespacePath` as a type - # field on our `PayloadMsg.pld`, we still need a enc/dec_hook() pair - # to cast to/from that type on the wire. See the docs: - # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types - - # if pld_spec is Any: - # pld_spec = Raw - - nsp_codec: MsgCodec = mk_codec( - # ipc_pld_spec=Raw, # default! - - # NOTE XXX: the encode hook MUST be used no matter what since - # our `NamespacePath` is not any of a `Any` native type nor - # a `msgspec.Struct` subtype - so `msgspec` has no way to know - # how to encode it unless we provide the custom hook. - # - # AGAIN that is, regardless of whether we spec an - # `Any`-decoded-pld the enc has no knowledge (by default) - # how to enc `NamespacePath` (nsp), so we add a custom - # hook to do that ALWAYS. - enc_hook=enc_nsp if add_hooks else None, - - # XXX NOTE: pretty sure this is mutex with the `type=` to - # `Decoder`? so it won't work in tandem with the - # `ipc_pld_spec` passed above? - ext_types=[NamespacePath], - ) - # dec_hook=dec_nsp if add_hooks else None, - return nsp_codec - - -def chk_codec_applied( - expect_codec: MsgCodec, - enter_value: MsgCodec|None = None, - -) -> MsgCodec: - ''' - buncha sanity checks ensuring that the IPC channel's - context-vars are set to the expected codec and that are - ctx-var wrapper APIs match the same. - - ''' - # TODO: play with tricyle again, bc this is supposed to work - # the way we want? - # - # TreeVar - # task: trio.Task = trio.lowlevel.current_task() - # curr_codec = _ctxvar_MsgCodec.get_in(task) - - # ContextVar - # task_ctx: Context = task.context - # assert _ctxvar_MsgCodec in task_ctx - # curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] - - # NOTE: currently we use this! - # RunVar - curr_codec: MsgCodec = current_codec() - last_read_codec = _ctxvar_MsgCodec.get() - # assert curr_codec is last_read_codec - - assert ( - (same_codec := expect_codec) is - # returned from `mk_codec()` - - # yielded value from `apply_codec()` - - # read from current task's `contextvars.Context` - curr_codec is - last_read_codec - - # the default `msgspec` settings - is not _codec._def_msgspec_codec - is not _codec._def_tractor_codec - ) - - if enter_value: - enter_value is same_codec - - -def iter_maybe_sends( - send_items: dict[Union[Type], Any] | list[tuple], - ipc_pld_spec: Union[Type] | Any, - add_codec_hooks: bool, - - codec: MsgCodec|None = None, - -) -> tuple[Any, bool]: - - if isinstance(send_items, dict): - send_items = send_items.items() - - for ( - send_type_spec, - send_value, - ) in send_items: - - expect_roundtrip: bool = False - - # values-to-typespec santiy - send_type = type(send_value) - assert send_type == send_type_spec or ( - (subtypes := getattr(send_type_spec, '__args__', None)) - and send_type in subtypes - ) - - spec_subtypes: set[Union[Type]] = ( - getattr( - ipc_pld_spec, - '__args__', - {ipc_pld_spec,}, - ) - ) - send_in_spec: bool = ( - send_type == ipc_pld_spec - or ( - ipc_pld_spec != Any - and # presume `Union` of types - send_type in spec_subtypes - ) - or ( - ipc_pld_spec == Any - and - send_type != NamespacePath - ) - ) - expect_roundtrip = ( - send_in_spec - # any spec should support all other - # builtin py values that we send - # except our custom nsp type which - # we should be able to send as long - # as we provide the custom codec hooks. - or ( - ipc_pld_spec == Any - and - send_type == NamespacePath - and - add_codec_hooks - ) - ) - - if codec is not None: - # XXX FIRST XXX ensure roundtripping works - # before touching any IPC primitives/APIs. - wire_bytes: bytes = codec.encode( - Started( - cid='blahblah', - pld=send_value, - ) - ) - # NOTE: demonstrates the decoder loading - # to via our native SCIPP msg-spec - # (structurred-conc-inter-proc-protocol) - # implemented as per, - try: - msg: Started = codec.decode(wire_bytes) - if not expect_roundtrip: - pytest.fail( - f'NOT-EXPECTED able to roundtrip value given spec:\n' - f'ipc_pld_spec -> {ipc_pld_spec}\n' - f'value -> {send_value}: {send_type}\n' - ) - - pld = msg.pld - assert pld == send_value - - except ValidationError: - if expect_roundtrip: - pytest.fail( - f'EXPECTED to roundtrip value given spec:\n' - f'ipc_pld_spec -> {ipc_pld_spec}\n' - f'value -> {send_value}: {send_type}\n' - ) - - yield ( - str(send_type), - send_value, - expect_roundtrip, - ) - - -@tractor.context -async def send_back_values( - ctx: Context, - expect_debug: bool, - pld_spec_type_strs: list[str], - add_hooks: bool, - # started_msg_bytes: bytes, - expect_ipc_send: dict[str, tuple[Any, bool]], - -) -> None: - ''' - Setup up a custom codec to load instances of `NamespacePath` - and ensure we can round trip a func ref with our parent. - - ''' - uid: tuple = tractor.current_actor().uid - - # debug mode sanity check (prolly superfluous but, meh) - assert expect_debug == _state.debug_mode() - - # init state in sub-actor should be default - chk_codec_applied( - expect_codec=_codec._def_tractor_codec, - ) - - # load pld spec from input str - ipc_pld_spec = _exts.dec_type_union( - pld_spec_type_strs, - ) - pld_spec_str = str(ipc_pld_spec) - - # same as on parent side config. - nsp_codec: MsgCodec = mk_custom_codec( - add_hooks=add_hooks, - ) - with ( - apply_codec(nsp_codec) as codec, - limit_plds(ipc_pld_spec) as codec, - ): - # we SHOULD NOT be swapping the global codec since it breaks - # `Context.starte()` roundtripping checks! - chk_codec_applied( - expect_codec=nsp_codec, - ) - # XXX SO NOT THIS! - # chk_codec_applied( - # expect_codec=nsp_codec, - # enter_value=codec, - # ) - - print( - f'{uid}: attempting `Started`-bytes DECODE..\n' - ) - try: - # msg: Started = nsp_codec.decode(started_msg_bytes) - - ipc_spec: Type = ctx._pld_rx._pld_dec.spec - expected_pld_spec_str: str = str(ipc_spec) - assert ( - pld_spec_str == expected_pld_spec_str - and - ipc_pld_spec == ipc_spec - ) - - # TODO: maybe we should add our own wrapper error so as to - # be interchange-lib agnostic? - # -[ ] the error type is wtv is raised from the hook so we - # could also require a type-class of errors for - # indicating whether the hook-failure can be handled by - # a nasty-dialog-unprot sub-sys? - except ValidationError: - - # NOTE: only in the `Any` spec case do we expect this to - # work since otherwise no spec covers a plain-ol' - # `.pld: str` - if pld_spec_str == 'Any': - raise - else: - print( - f'{uid}: (correctly) unable to DECODE `Started`-bytes\n' - # f'{started_msg_bytes}\n' - ) - - iter_send_val_items = iter(expect_ipc_send.values()) - sent: list[Any] = [] - for ( - send_value, - expect_send, - ) in iter_send_val_items: - try: - print( - f'{uid}: attempting to `.started({send_value})`\n' - f'=> expect_send: {expect_send}\n' - f'SINCE, ipc_pld_spec: {ipc_pld_spec}\n' - f'AND, codec: {codec}\n' - ) - await ctx.started(send_value) - sent.append(send_value) - if not expect_send: - - # XXX NOTE XXX THIS WON'T WORK WITHOUT SPECIAL - # `str` handling! or special debug mode IPC - # msgs! - await tractor.pause() - - raise RuntimeError( - f'NOT-EXPECTED able to roundtrip value given spec:\n' - f'ipc_pld_spec -> {ipc_pld_spec}\n' - f'value -> {send_value}: {type(send_value)}\n' - ) - - break # move on to streaming block.. - - except tractor.MsgTypeError as _mte: - mte = _mte - - if expect_send: - raise RuntimeError( - f'EXPECTED to `.started()` value given spec ??\n\n' - f'ipc_pld_spec -> {ipc_pld_spec}\n' - f'value -> {send_value}: {type(send_value)}\n' - ) - - # await tractor.pause() - raise mte - - - async with ctx.open_stream() as ipc: - print( - f'{uid}: Entering streaming block to send remaining values..' - ) - - for send_value, expect_send in iter_send_val_items: - send_type: Type = type(send_value) - print( - '------ - ------\n' - f'{uid}: SENDING NEXT VALUE\n' - f'ipc_pld_spec: {ipc_pld_spec}\n' - f'expect_send: {expect_send}\n' - f'val: {send_value}\n' - '------ - ------\n' - ) - try: - await ipc.send(send_value) - print(f'***\n{uid}-CHILD sent {send_value!r}\n***\n') - sent.append(send_value) - - # NOTE: should only raise above on - # `.started()` or a `Return` - # if not expect_send: - # raise RuntimeError( - # f'NOT-EXPECTED able to roundtrip value given spec:\n' - # f'ipc_pld_spec -> {ipc_pld_spec}\n' - # f'value -> {send_value}: {send_type}\n' - # ) - - except ValidationError: - print(f'{uid} FAILED TO SEND {send_value}!') - - # await tractor.pause() - if expect_send: - raise RuntimeError( - f'EXPECTED to roundtrip value given spec:\n' - f'ipc_pld_spec -> {ipc_pld_spec}\n' - f'value -> {send_value}: {send_type}\n' - ) - # continue - - else: - print( - f'{uid}: finished sending all values\n' - 'Should be exiting stream block!\n' - ) - - print(f'{uid}: exited streaming block!') - - # TODO: this won't be true bc in streaming phase we DO NOT - # msgspec check outbound msgs! - # -[ ] once we implement the receiver side `InvalidMsg` - # then we can expect it here? - # assert ( - # len(sent) - # == - # len([val - # for val, expect in - # expect_ipc_send.values() - # if expect is True]) - # ) - - -@pytest.mark.parametrize( - 'ipc_pld_spec', - [ - Any, - NamespacePath, - NamespacePath|None, # the "maybe" spec Bo - ], - ids=[ - 'any_type', - 'nsp_type', - 'maybe_nsp_type', - ] -) -@pytest.mark.parametrize( - 'add_codec_hooks', - [ - True, - False, - ], - ids=['use_codec_hooks', 'no_codec_hooks'], -) -def test_codec_hooks_mod( - debug_mode: bool, - ipc_pld_spec: Union[Type]|Any, - # send_value: None|str|NamespacePath, - add_codec_hooks: bool, -): - ''' - Audit the `.msg.MsgCodec` override apis details given our impl - uses `contextvars` to accomplish per `trio` task codec - application around an inter-proc-task-comms context. - - ''' - async def main(): - nsp = NamespacePath.from_ref(ex_func) - send_items: dict[Union, Any] = { - Union[None]: None, - Union[NamespacePath]: nsp, - Union[str]: str(nsp), - } - - # init default state for actor - chk_codec_applied( - expect_codec=_codec._def_tractor_codec, - ) - - async with tractor.open_nursery( - debug_mode=debug_mode, - ) as an: - p: tractor.Portal = await an.start_actor( - 'sub', - enable_modules=[__name__], - ) - - # TODO: 2 cases: - # - codec not modified -> decode nsp as `str` - # - codec modified with hooks -> decode nsp as - # `NamespacePath` - nsp_codec: MsgCodec = mk_custom_codec( - add_hooks=add_codec_hooks, - ) - with apply_codec(nsp_codec) as codec: - chk_codec_applied( - expect_codec=nsp_codec, - enter_value=codec, - ) - - expect_ipc_send: dict[str, tuple[Any, bool]] = {} - - report: str = ( - 'Parent report on send values with\n' - f'ipc_pld_spec: {ipc_pld_spec}\n' - ' ------ - ------\n' - ) - for ( - val_type_str, - val, - expect_send, - )in iter_maybe_sends( - send_items, - ipc_pld_spec, - add_codec_hooks=add_codec_hooks, - ): - report += ( - f'send_value: {val}: {type(val)} ' - f'=> expect_send: {expect_send}\n' - ) - expect_ipc_send[val_type_str] = ( - val, - expect_send, - ) - - print( - report + - ' ------ - ------\n' - ) - assert len(expect_ipc_send) == len(send_items) - # now try over real IPC with a the subactor - # expect_ipc_rountrip: bool = True - - if ( - subtypes := getattr( - ipc_pld_spec, '__args__', False - ) - ): - pld_types_str: str = '|'.join(subtypes) - # breakpoint() - else: - # TODO, use `.msg._exts` utils instead of this! - pld_types_str: str = ipc_pld_spec.__name__ - - expected_started = Started( - cid='cid', - # pld=str(pld_types_str), - pld=ipc_pld_spec, - ) - started_msg_bytes: bytes = nsp_codec.encode( - expected_started, - ) - # build list of values we expect to receive from - # the subactor. - expect_to_send: list[Any] = [ - val - for val, expect_send in expect_ipc_send.values() - if expect_send - ] - - pld_spec_type_strs: list[str] = _exts.enc_type_union(ipc_pld_spec) - - # XXX should raise an mte (`MsgTypeError`) - # when `add_codec_hooks == False` bc the input - # `expect_ipc_send` kwarg has a nsp which can't be - # serialized! - # - # TODO:can we ensure this happens from the - # `Return`-side (aka the sub) as well? - if not add_codec_hooks: - try: - async with p.open_context( - send_back_values, - expect_debug=debug_mode, - pld_spec_type_strs=pld_spec_type_strs, - add_hooks=add_codec_hooks, - started_msg_bytes=started_msg_bytes, - - # XXX NOTE bc we send a `NamespacePath` in this kwarg - expect_ipc_send=expect_ipc_send, - - ) as (ctx, first): - pytest.fail('ctx should fail to open without custom enc_hook!?') - - # this test passes bc we can go no further! - except MsgTypeError: - # teardown nursery - await p.cancel_actor() - return - - # TODO: send the original nsp here and - # test with `limit_msg_spec()` above? - # await tractor.pause() - print('PARENT opening IPC ctx!\n') - ctx: tractor.Context - ipc: tractor.MsgStream - async with ( - - # XXX should raise an mte (`MsgTypeError`) - # when `add_codec_hooks == False`.. - p.open_context( - send_back_values, - expect_debug=debug_mode, - pld_spec_type_strs=pld_spec_type_strs, - add_hooks=add_codec_hooks, - started_msg_bytes=nsp_codec.encode(expected_started), - expect_ipc_send=expect_ipc_send, - ) as (ctx, first), - - ctx.open_stream() as ipc, - ): - # ensure codec is still applied across - # `tractor.Context` + its embedded nursery. - chk_codec_applied( - expect_codec=nsp_codec, - enter_value=codec, - ) - print( - 'root: ENTERING CONTEXT BLOCK\n' - f'type(first): {type(first)}\n' - f'first: {first}\n' - ) - expect_to_send.remove(first) - - # TODO: explicit values we expect depending on - # codec config! - # assert first == first_val - # assert first == f'{__name__}:ex_func' - - async for next_sent in ipc: - print( - 'Parent: child sent next value\n' - f'{next_sent}: {type(next_sent)}\n' - ) - if expect_to_send: - expect_to_send.remove(next_sent) - else: - print('PARENT should terminate stream loop + block!') - - # all sent values should have arrived! - assert not expect_to_send - - await p.cancel_actor() - - trio.run(main) - - -def chk_pld_type( - payload_spec: Type[Struct]|Any, - pld: Any, - - expect_roundtrip: bool|None = None, - -) -> bool: - - pld_val_type: Type = type(pld) - - # TODO: verify that the overridden subtypes - # DO NOT have modified type-annots from original! - # 'Start', .pld: FuncSpec - # 'StartAck', .pld: IpcCtxSpec - # 'Stop', .pld: UNSEt - # 'Error', .pld: ErrorData - - codec: MsgCodec = mk_codec( - # NOTE: this ONLY accepts `PayloadMsg.pld` fields of a specified - # type union. - ipc_pld_spec=payload_spec, - ) - - # make a one-off dec to compare with our `MsgCodec` instance - # which does the below `mk_msg_spec()` call internally - ipc_msg_spec: Union[Type[Struct]] - msg_types: list[PayloadMsg[payload_spec]] - ( - ipc_msg_spec, - msg_types, - ) = mk_msg_spec( - payload_type_union=payload_spec, - ) - _enc = msgpack.Encoder() - _dec = msgpack.Decoder( - type=ipc_msg_spec or Any, # like `PayloadMsg[Any]` - ) - - assert ( - payload_spec - == - codec.pld_spec - ) - - # assert codec.dec == dec - # - # ^-XXX-^ not sure why these aren't "equal" but when cast - # to `str` they seem to match ?? .. kk - - assert ( - str(ipc_msg_spec) - == - str(codec.msg_spec) - == - str(_dec.type) - == - str(codec.dec.type) - ) - - # verify the boxed-type for all variable payload-type msgs. - if not msg_types: - breakpoint() - - roundtrip: bool|None = None - pld_spec_msg_names: list[str] = [ - td.__name__ for td in _payload_msgs - ] - for typedef in msg_types: - - skip_runtime_msg: bool = typedef.__name__ not in pld_spec_msg_names - if skip_runtime_msg: - continue - - pld_field = structs.fields(typedef)[1] - assert pld_field.type is payload_spec # TODO-^ does this need to work to get all subtypes to adhere? - - kwargs: dict[str, Any] = { - 'cid': '666', - 'pld': pld, - } - enc_msg: PayloadMsg = typedef(**kwargs) - - _wire_bytes: bytes = _enc.encode(enc_msg) - wire_bytes: bytes = codec.enc.encode(enc_msg) - assert _wire_bytes == wire_bytes - - ve: ValidationError|None = None - try: - dec_msg = codec.dec.decode(wire_bytes) - _dec_msg = _dec.decode(wire_bytes) - - # decoded msg and thus payload should be exactly same! - assert (roundtrip := ( - _dec_msg - == - dec_msg - == - enc_msg - )) - - if ( - expect_roundtrip is not None - and expect_roundtrip != roundtrip - ): - breakpoint() - - assert ( - pld - == - dec_msg.pld - == - enc_msg.pld - ) - # assert (roundtrip := (_dec_msg == enc_msg)) - - except ValidationError as _ve: - ve = _ve - roundtrip: bool = False - if pld_val_type is payload_spec: - raise ValueError( - 'Got `ValidationError` despite type-var match!?\n' - f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_spec}\n' - ) from ve - - else: - # ow we good cuz the pld spec mismatched. - print( - 'Got expected `ValidationError` since,\n' - f'{pld_val_type} is not {payload_spec}\n' - ) - else: - if ( - payload_spec is not Any - and - pld_val_type is not payload_spec - ): - raise ValueError( - 'DID NOT `ValidationError` despite expected type match!?\n' - f'pld_val_type: {pld_val_type}\n' - f'payload_type: {payload_spec}\n' - ) - - # full code decode should always be attempted! - if roundtrip is None: - breakpoint() - - return roundtrip - - -# ?TODO? remove since covered in the newer `test_pldrx_limiting`? -def test_limit_msgspec( - debug_mode: bool, -): - ''' - Internals unit testing to verify that type-limiting an IPC ctx's - msg spec with `Pldrx.limit_plds()` results in various - encapsulated `msgspec` object settings and state. - - ''' - async def main(): - async with tractor.open_root_actor( - debug_mode=debug_mode, - ): - # ensure we can round-trip a boxing `PayloadMsg` - assert chk_pld_type( - payload_spec=Any, - pld=None, - expect_roundtrip=True, - ) - - # verify that a mis-typed payload value won't decode - assert not chk_pld_type( - payload_spec=int, - pld='doggy', - ) - - # parametrize the boxed `.pld` type as a custom-struct - # and ensure that parametrization propagates - # to all payload-msg-spec-able subtypes! - class CustomPayload(Struct): - name: str - value: Any - - assert not chk_pld_type( - payload_spec=CustomPayload, - pld='doggy', - ) - - assert chk_pld_type( - payload_spec=CustomPayload, - pld=CustomPayload(name='doggy', value='urmom') - ) - - # yah, we can `.pause_from_sync()` now! - # breakpoint() - - trio.run(main) - - def enc_nsp(obj: Any) -> Any: actor: Actor = tractor.current_actor( err_on_no_runtime=False, @@ -1034,3 +240,816 @@ def test_custom_extension_types( except TypeError: if not add_codec_hooks: pass + +@tractor.context +async def sleep_forever_in_sub( + ctx: Context, +) -> None: + await trio.sleep_forever() + + +def mk_custom_codec( + add_hooks: bool, + +) -> tuple[ + MsgCodec, # encode to send + MsgDec, # pld receive-n-decode +]: + ''' + Create custom `msgpack` enc/dec-hooks and set a `Decoder` + which only loads `pld_spec` (like `NamespacePath`) types. + + ''' + + # XXX NOTE XXX: despite defining `NamespacePath` as a type + # field on our `PayloadMsg.pld`, we still need a enc/dec_hook() pair + # to cast to/from that type on the wire. See the docs: + # https://jcristharif.com/msgspec/extending.html#mapping-to-from-native-types + + # if pld_spec is Any: + # pld_spec = Raw + + nsp_codec: MsgCodec = mk_codec( + # ipc_pld_spec=Raw, # default! + + # NOTE XXX: the encode hook MUST be used no matter what since + # our `NamespacePath` is not any of a `Any` native type nor + # a `msgspec.Struct` subtype - so `msgspec` has no way to know + # how to encode it unless we provide the custom hook. + # + # AGAIN that is, regardless of whether we spec an + # `Any`-decoded-pld the enc has no knowledge (by default) + # how to enc `NamespacePath` (nsp), so we add a custom + # hook to do that ALWAYS. + enc_hook=enc_nsp if add_hooks else None, + + # XXX NOTE: pretty sure this is mutex with the `type=` to + # `Decoder`? so it won't work in tandem with the + # `ipc_pld_spec` passed above? + ext_types=[NamespacePath], + ) + # dec_hook=dec_nsp if add_hooks else None, + return nsp_codec + + +@pytest.mark.parametrize( + 'limit_plds_args', + [ + ( + {'dec_hook': None, 'ext_types': None}, + None, + ), + ( + {'dec_hook': dec_nsp, 'ext_types': None}, + TypeError, + ), + ( + {'dec_hook': dec_nsp, 'ext_types': [NamespacePath]}, + None, + ), + ( + {'dec_hook': dec_nsp, 'ext_types': [NamespacePath|None]}, + None, + ), + ], + ids=[ + 'no_hook_no_ext_types', + 'only_hook', + 'hook_and_ext_types', + 'hook_and_ext_types_w_null', + ] +) +def test_pld_limiting_usage( + limit_plds_args: tuple[dict, Exception|None], +): + ''' + Verify `dec_hook()` and `ext_types` need to either both be provided + or we raise a explanator type-error. + + ''' + kwargs, maybe_err = limit_plds_args + async def main(): + async with tractor.open_nursery() as an: # just to open runtime + + # XXX SHOULD NEVER WORK outside an ipc ctx scope! + try: + with limit_plds(**kwargs): + pass + except RuntimeError: + pass + + p: tractor.Portal = await an.start_actor( + 'sub', + enable_modules=[__name__], + ) + async with ( + p.open_context( + sleep_forever_in_sub + ) as (ctx, first), + ): + try: + with limit_plds(**kwargs): + pass + except maybe_err as exc: + assert type(exc) is maybe_err + pass + + +def chk_codec_applied( + expect_codec: MsgCodec|None, + enter_value: MsgCodec|None = None, + +) -> MsgCodec: + ''' + buncha sanity checks ensuring that the IPC channel's + context-vars are set to the expected codec and that are + ctx-var wrapper APIs match the same. + + ''' + # TODO: play with tricyle again, bc this is supposed to work + # the way we want? + # + # TreeVar + # task: trio.Task = trio.lowlevel.current_task() + # curr_codec = _ctxvar_MsgCodec.get_in(task) + + # ContextVar + # task_ctx: Context = task.context + # assert _ctxvar_MsgCodec in task_ctx + # curr_codec: MsgCodec = task.context[_ctxvar_MsgCodec] + if expect_codec is None: + assert enter_value is None + return + + # NOTE: currently we use this! + # RunVar + curr_codec: MsgCodec = current_codec() + last_read_codec = _ctxvar_MsgCodec.get() + # assert curr_codec is last_read_codec + + assert ( + (same_codec := expect_codec) is + # returned from `mk_codec()` + + # yielded value from `apply_codec()` + + # read from current task's `contextvars.Context` + curr_codec is + last_read_codec + + # the default `msgspec` settings + is not _codec._def_msgspec_codec + is not _codec._def_tractor_codec + ) + + if enter_value: + assert enter_value is same_codec + + +# def iter_maybe_sends( +# send_items: dict[Union[Type], Any] | list[tuple], +# ipc_pld_spec: Union[Type] | Any, +# add_codec_hooks: bool, + +# codec: MsgCodec|None = None, + +# ) -> tuple[Any, bool]: + +# if isinstance(send_items, dict): +# send_items = send_items.items() + +# for ( +# send_type_spec, +# send_value, +# ) in send_items: + +# expect_roundtrip: bool = False + +# # values-to-typespec santiy +# send_type = type(send_value) +# assert send_type == send_type_spec or ( +# (subtypes := getattr(send_type_spec, '__args__', None)) +# and send_type in subtypes +# ) + +# spec_subtypes: set[Union[Type]] = ( +# getattr( +# ipc_pld_spec, +# '__args__', +# {ipc_pld_spec,}, +# ) +# ) +# send_in_spec: bool = ( +# send_type == ipc_pld_spec +# or ( +# ipc_pld_spec != Any +# and # presume `Union` of types +# send_type in spec_subtypes +# ) +# or ( +# ipc_pld_spec == Any +# and +# send_type != NamespacePath +# ) +# ) +# expect_roundtrip = ( +# send_in_spec +# # any spec should support all other +# # builtin py values that we send +# # except our custom nsp type which +# # we should be able to send as long +# # as we provide the custom codec hooks. +# or ( +# ipc_pld_spec == Any +# and +# send_type == NamespacePath +# and +# add_codec_hooks +# ) +# ) + +# if codec is not None: +# # XXX FIRST XXX ensure roundtripping works +# # before touching any IPC primitives/APIs. +# wire_bytes: bytes = codec.encode( +# Started( +# cid='blahblah', +# pld=send_value, +# ) +# ) +# # NOTE: demonstrates the decoder loading +# # to via our native SCIPP msg-spec +# # (structurred-conc-inter-proc-protocol) +# # implemented as per, +# try: +# msg: Started = codec.decode(wire_bytes) +# if not expect_roundtrip: +# pytest.fail( +# f'NOT-EXPECTED able to roundtrip value given spec:\n' +# f'ipc_pld_spec -> {ipc_pld_spec}\n' +# f'value -> {send_value}: {send_type}\n' +# ) + +# pld = msg.pld +# assert pld == send_value + +# except ValidationError: +# if expect_roundtrip: +# pytest.fail( +# f'EXPECTED to roundtrip value given spec:\n' +# f'ipc_pld_spec -> {ipc_pld_spec}\n' +# f'value -> {send_value}: {send_type}\n' +# ) + +# yield ( +# str(send_type), +# send_value, +# expect_roundtrip, +# ) + + +@tractor.context +async def send_back_values( + ctx: Context, + rent_pld_spec_type_strs: list[str], + add_hooks: bool, + # expect_ipc_send: dict[str, tuple[Any, bool]], + + # expect_debug: bool, + # started_msg_bytes: bytes, + +) -> None: + ''' + Setup up a custom codec to load instances of `NamespacePath` + and ensure we can round trip a func ref with our parent. + + ''' + uid: tuple = tractor.current_actor().uid + + # init state in sub-actor should be default + chk_codec_applied( + expect_codec=_codec._def_tractor_codec, + ) + + # load pld spec from input str + rent_pld_spec = _exts.dec_type_union( + rent_pld_spec_type_strs, + mods=[ + importlib.import_module(__name__), + ], + ) + rent_pld_spec_types: set[Type] = _codec.unpack_spec_types( + rent_pld_spec, + ) + + # ONLY add ext-hooks if the rent specified a non-std type! + add_hooks: bool = ( + NamespacePath in rent_pld_spec_types + ) + + # same as on parent side config. + nsp_codec: MsgCodec|None = None + if add_hooks: + nsp_codec = mk_codec( + enc_hook=enc_nsp, + ext_types=[NamespacePath], + ) + + with ( + maybe_apply_codec(nsp_codec) as codec, + limit_plds( + rent_pld_spec, + dec_hook=dec_nsp if add_hooks else None, + ext_types=[NamespacePath] if add_hooks else None, + ) as pld_dec, + ): + # ?XXX? SHOULD WE NOT be swapping the global codec since it + # breaks `Context.started()` roundtripping checks?? + chk_codec_applied( + expect_codec=nsp_codec, + enter_value=codec, + ) + + # ?TODO, mismatch case(s)? + # + # ensure pld spec matches on both sides + ctx_pld_dec: MsgDec = ctx._pld_rx._pld_dec + assert pld_dec is ctx_pld_dec + child_pld_spec: Type = pld_dec.spec + child_pld_spec_types: set[Type] = _codec.unpack_spec_types( + child_pld_spec, + ) + assert ( + # child_pld_spec == rent_pld_spec + child_pld_spec_types.issuperset( + rent_pld_spec_types + ) + ) + + # expected_pld_spec_str: str = str(ipc_spec) + # assert ( + # pld_spec_str == expected_pld_spec_str + # and + # ipc_pld_spec == ipc_spec + # ) + + # ?TODO, try loop for each of the types in pld-superset? + # + # for send_value in [ + # nsp, + # str(nsp), + # None, + # ]: + nsp = NamespacePath.from_ref(ex_func) + try: + print( + f'{uid}: attempting to `.started({nsp})`\n' + f'\n' + f'rent_pld_spec: {rent_pld_spec}\n' + f'child_pld_spec: {child_pld_spec}\n' + f'codec: {codec}\n' + ) + await ctx.started(nsp) + + except tractor.MsgTypeError as _mte: + mte = _mte + + # false -ve case + if add_hooks: + raise RuntimeError( + f'EXPECTED to `.started()` value given spec ??\n\n' + f'child_pld_spec -> {child_pld_spec}\n' + f'value = {nsp}: {type(nsp)}\n' + ) + + # true -ve case + raise mte + + # TODO: maybe we should add our own wrapper error so as to + # be interchange-lib agnostic? + # -[ ] the error type is wtv is raised from the hook so we + # could also require a type-class of errors for + # indicating whether the hook-failure can be handled by + # a nasty-dialog-unprot sub-sys? + except TypeError as typerr: + # false -ve + if add_hooks: + raise RuntimeError('Should have been able to send `nsp`??') + + # true -ve + print('Failed to send `nsp` due to no ext hooks set!') + raise typerr + + # now try sending a set of valid and invalid plds to ensure + # the pld spec is respected. + sent: list[Any] = [] + async with ctx.open_stream() as ipc: + print( + f'{uid}: streaming all pld types to rent..' + ) + + # for send_value, expect_send in iter_send_val_items: + for send_value in [ + nsp, + str(nsp), + None, + ]: + send_type: Type = type(send_value) + print( + f'{uid}: SENDING NEXT pld\n' + f'send_type: {send_type}\n' + f'send_value: {send_value}\n' + ) + try: + await ipc.send(send_value) + sent.append(send_value) + + except ValidationError as valerr: + print(f'{uid} FAILED TO SEND {send_value}!') + + # false -ve + if add_hooks: + raise RuntimeError( + f'EXPECTED to roundtrip value given spec:\n' + f'rent_pld_spec -> {rent_pld_spec}\n' + f'child_pld_spec -> {child_pld_spec}\n' + f'value = {send_value}: {send_type}\n' + ) + + # true -ve + raise valerr + # continue + + else: + print( + f'{uid}: finished sending all values\n' + 'Should be exiting stream block!\n' + ) + + print(f'{uid}: exited streaming block!') + + + +@cm +def maybe_apply_codec(codec: MsgCodec|None) -> MsgCodec|None: + if codec is None: + yield None + return + + with apply_codec(codec) as codec: + yield codec + + +@pytest.mark.parametrize( + 'pld_spec', + [ + Any, + NamespacePath, + NamespacePath|None, # the "maybe" spec Bo + ], + ids=[ + 'any_type', + 'only_nsp_ext', + 'maybe_nsp_ext', + ] +) +@pytest.mark.parametrize( + 'add_hooks', + [ + True, + False, + ], + ids=[ + 'use_codec_hooks', + 'no_codec_hooks', + ], +) +def test_ext_types_over_ipc( + debug_mode: bool, + pld_spec: Union[Type], + add_hooks: bool, +): + ''' + Ensure we can support extension types coverted using + `enc/dec_hook()`s passed to the `.msg.limit_plds()` API + and that sane errors happen when we try do the same without + the codec hooks. + + ''' + pld_types: set[Type] = _codec.unpack_spec_types(pld_spec) + + async def main(): + + # sanity check the default pld-spec beforehand + chk_codec_applied( + expect_codec=_codec._def_tractor_codec, + ) + + # extension type we want to send as msg payload + nsp = NamespacePath.from_ref(ex_func) + + # ^NOTE, 2 cases: + # - codec hooks noto added -> decode nsp as `str` + # - codec with hooks -> decode nsp as `NamespacePath` + nsp_codec: MsgCodec|None = None + if ( + NamespacePath in pld_types + and + add_hooks + ): + nsp_codec = mk_codec( + enc_hook=enc_nsp, + ext_types=[NamespacePath], + ) + + async with tractor.open_nursery( + debug_mode=debug_mode, + ) as an: + p: tractor.Portal = await an.start_actor( + 'sub', + enable_modules=[__name__], + ) + with ( + maybe_apply_codec(nsp_codec) as codec, + ): + chk_codec_applied( + expect_codec=nsp_codec, + enter_value=codec, + ) + rent_pld_spec_type_strs: list[str] = _exts.enc_type_union(pld_spec) + + # XXX should raise an mte (`MsgTypeError`) + # when `add_hooks == False` bc the input + # `expect_ipc_send` kwarg has a nsp which can't be + # serialized! + # + # TODO:can we ensure this happens from the + # `Return`-side (aka the sub) as well? + try: + ctx: tractor.Context + ipc: tractor.MsgStream + async with ( + + # XXX should raise an mte (`MsgTypeError`) + # when `add_hooks == False`.. + p.open_context( + send_back_values, + # expect_debug=debug_mode, + rent_pld_spec_type_strs=rent_pld_spec_type_strs, + add_hooks=add_hooks, + # expect_ipc_send=expect_ipc_send, + ) as (ctx, first), + + ctx.open_stream() as ipc, + ): + with ( + limit_plds( + pld_spec, + dec_hook=dec_nsp if add_hooks else None, + ext_types=[NamespacePath] if add_hooks else None, + ) as pld_dec, + ): + ctx_pld_dec: MsgDec = ctx._pld_rx._pld_dec + assert pld_dec is ctx_pld_dec + + # if ( + # not add_hooks + # and + # NamespacePath in + # ): + # pytest.fail('ctx should fail to open without custom enc_hook!?') + + await ipc.send(nsp) + nsp_rt = await ipc.receive() + + assert nsp_rt == nsp + assert nsp_rt.load_ref() is ex_func + + # this test passes bc we can go no further! + except MsgTypeError: + if not add_hooks: + # teardown nursery + await p.cancel_actor() + return + + await p.cancel_actor() + + if ( + NamespacePath in pld_types + and + add_hooks + ): + trio.run(main) + + else: + with pytest.raises( + expected_exception=tractor.RemoteActorError, + ) as excinfo: + trio.run(main) + + exc = excinfo.value + # bc `.started(nsp: NamespacePath)` will raise + assert exc.boxed_type is TypeError + + +# def chk_pld_type( +# payload_spec: Type[Struct]|Any, +# pld: Any, + +# expect_roundtrip: bool|None = None, + +# ) -> bool: + +# pld_val_type: Type = type(pld) + +# # TODO: verify that the overridden subtypes +# # DO NOT have modified type-annots from original! +# # 'Start', .pld: FuncSpec +# # 'StartAck', .pld: IpcCtxSpec +# # 'Stop', .pld: UNSEt +# # 'Error', .pld: ErrorData + +# codec: MsgCodec = mk_codec( +# # NOTE: this ONLY accepts `PayloadMsg.pld` fields of a specified +# # type union. +# ipc_pld_spec=payload_spec, +# ) + +# # make a one-off dec to compare with our `MsgCodec` instance +# # which does the below `mk_msg_spec()` call internally +# ipc_msg_spec: Union[Type[Struct]] +# msg_types: list[PayloadMsg[payload_spec]] +# ( +# ipc_msg_spec, +# msg_types, +# ) = mk_msg_spec( +# payload_type_union=payload_spec, +# ) +# _enc = msgpack.Encoder() +# _dec = msgpack.Decoder( +# type=ipc_msg_spec or Any, # like `PayloadMsg[Any]` +# ) + +# assert ( +# payload_spec +# == +# codec.pld_spec +# ) + +# # assert codec.dec == dec +# # +# # ^-XXX-^ not sure why these aren't "equal" but when cast +# # to `str` they seem to match ?? .. kk + +# assert ( +# str(ipc_msg_spec) +# == +# str(codec.msg_spec) +# == +# str(_dec.type) +# == +# str(codec.dec.type) +# ) + +# # verify the boxed-type for all variable payload-type msgs. +# if not msg_types: +# breakpoint() + +# roundtrip: bool|None = None +# pld_spec_msg_names: list[str] = [ +# td.__name__ for td in _payload_msgs +# ] +# for typedef in msg_types: + +# skip_runtime_msg: bool = typedef.__name__ not in pld_spec_msg_names +# if skip_runtime_msg: +# continue + +# pld_field = structs.fields(typedef)[1] +# assert pld_field.type is payload_spec # TODO-^ does this need to work to get all subtypes to adhere? + +# kwargs: dict[str, Any] = { +# 'cid': '666', +# 'pld': pld, +# } +# enc_msg: PayloadMsg = typedef(**kwargs) + +# _wire_bytes: bytes = _enc.encode(enc_msg) +# wire_bytes: bytes = codec.enc.encode(enc_msg) +# assert _wire_bytes == wire_bytes + +# ve: ValidationError|None = None +# try: +# dec_msg = codec.dec.decode(wire_bytes) +# _dec_msg = _dec.decode(wire_bytes) + +# # decoded msg and thus payload should be exactly same! +# assert (roundtrip := ( +# _dec_msg +# == +# dec_msg +# == +# enc_msg +# )) + +# if ( +# expect_roundtrip is not None +# and expect_roundtrip != roundtrip +# ): +# breakpoint() + +# assert ( +# pld +# == +# dec_msg.pld +# == +# enc_msg.pld +# ) +# # assert (roundtrip := (_dec_msg == enc_msg)) + +# except ValidationError as _ve: +# ve = _ve +# roundtrip: bool = False +# if pld_val_type is payload_spec: +# raise ValueError( +# 'Got `ValidationError` despite type-var match!?\n' +# f'pld_val_type: {pld_val_type}\n' +# f'payload_type: {payload_spec}\n' +# ) from ve + +# else: +# # ow we good cuz the pld spec mismatched. +# print( +# 'Got expected `ValidationError` since,\n' +# f'{pld_val_type} is not {payload_spec}\n' +# ) +# else: +# if ( +# payload_spec is not Any +# and +# pld_val_type is not payload_spec +# ): +# raise ValueError( +# 'DID NOT `ValidationError` despite expected type match!?\n' +# f'pld_val_type: {pld_val_type}\n' +# f'payload_type: {payload_spec}\n' +# ) + +# # full code decode should always be attempted! +# if roundtrip is None: +# breakpoint() + +# return roundtrip + + +# ?TODO? maybe remove since covered in the newer `test_pldrx_limiting` +# via end-2-end testing of all this? +# -[ ] IOW do we really NEED this lowlevel unit testing? +# +# def test_limit_msgspec( +# debug_mode: bool, +# ): +# ''' +# Internals unit testing to verify that type-limiting an IPC ctx's +# msg spec with `Pldrx.limit_plds()` results in various +# encapsulated `msgspec` object settings and state. + +# ''' +# async def main(): +# async with tractor.open_root_actor( +# debug_mode=debug_mode, +# ): +# # ensure we can round-trip a boxing `PayloadMsg` +# assert chk_pld_type( +# payload_spec=Any, +# pld=None, +# expect_roundtrip=True, +# ) + +# # verify that a mis-typed payload value won't decode +# assert not chk_pld_type( +# payload_spec=int, +# pld='doggy', +# ) + +# # parametrize the boxed `.pld` type as a custom-struct +# # and ensure that parametrization propagates +# # to all payload-msg-spec-able subtypes! +# class CustomPayload(Struct): +# name: str +# value: Any + +# assert not chk_pld_type( +# payload_spec=CustomPayload, +# pld='doggy', +# ) + +# assert chk_pld_type( +# payload_spec=CustomPayload, +# pld=CustomPayload(name='doggy', value='urmom') +# ) + +# # yah, we can `.pause_from_sync()` now! +# # breakpoint() + +# trio.run(main) -- 2.34.1 From defae151ec7da5589312c6a32fe194403df09f52 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sat, 8 Mar 2025 23:58:31 -0500 Subject: [PATCH 287/305] Facepalm, fix logic misstep on child side Namely that `add_hooks: bool` should be the same as on the rent side.. Also, just drop the now unused `iter_maybe_sends`. This makes the suite entire greeeeen btw, including the new sub-suite which i hadn't runt before Bo --- tests/test_caps_based_msging.py | 133 +++----------------------------- 1 file changed, 12 insertions(+), 121 deletions(-) diff --git a/tests/test_caps_based_msging.py b/tests/test_caps_based_msging.py index 4d78a117..b334b64f 100644 --- a/tests/test_caps_based_msging.py +++ b/tests/test_caps_based_msging.py @@ -323,8 +323,8 @@ def test_pld_limiting_usage( limit_plds_args: tuple[dict, Exception|None], ): ''' - Verify `dec_hook()` and `ext_types` need to either both be provided - or we raise a explanator type-error. + Verify `dec_hook()` and `ext_types` need to either both be + provided or we raise a explanator type-error. ''' kwargs, maybe_err = limit_plds_args @@ -406,117 +406,11 @@ def chk_codec_applied( assert enter_value is same_codec -# def iter_maybe_sends( -# send_items: dict[Union[Type], Any] | list[tuple], -# ipc_pld_spec: Union[Type] | Any, -# add_codec_hooks: bool, - -# codec: MsgCodec|None = None, - -# ) -> tuple[Any, bool]: - -# if isinstance(send_items, dict): -# send_items = send_items.items() - -# for ( -# send_type_spec, -# send_value, -# ) in send_items: - -# expect_roundtrip: bool = False - -# # values-to-typespec santiy -# send_type = type(send_value) -# assert send_type == send_type_spec or ( -# (subtypes := getattr(send_type_spec, '__args__', None)) -# and send_type in subtypes -# ) - -# spec_subtypes: set[Union[Type]] = ( -# getattr( -# ipc_pld_spec, -# '__args__', -# {ipc_pld_spec,}, -# ) -# ) -# send_in_spec: bool = ( -# send_type == ipc_pld_spec -# or ( -# ipc_pld_spec != Any -# and # presume `Union` of types -# send_type in spec_subtypes -# ) -# or ( -# ipc_pld_spec == Any -# and -# send_type != NamespacePath -# ) -# ) -# expect_roundtrip = ( -# send_in_spec -# # any spec should support all other -# # builtin py values that we send -# # except our custom nsp type which -# # we should be able to send as long -# # as we provide the custom codec hooks. -# or ( -# ipc_pld_spec == Any -# and -# send_type == NamespacePath -# and -# add_codec_hooks -# ) -# ) - -# if codec is not None: -# # XXX FIRST XXX ensure roundtripping works -# # before touching any IPC primitives/APIs. -# wire_bytes: bytes = codec.encode( -# Started( -# cid='blahblah', -# pld=send_value, -# ) -# ) -# # NOTE: demonstrates the decoder loading -# # to via our native SCIPP msg-spec -# # (structurred-conc-inter-proc-protocol) -# # implemented as per, -# try: -# msg: Started = codec.decode(wire_bytes) -# if not expect_roundtrip: -# pytest.fail( -# f'NOT-EXPECTED able to roundtrip value given spec:\n' -# f'ipc_pld_spec -> {ipc_pld_spec}\n' -# f'value -> {send_value}: {send_type}\n' -# ) - -# pld = msg.pld -# assert pld == send_value - -# except ValidationError: -# if expect_roundtrip: -# pytest.fail( -# f'EXPECTED to roundtrip value given spec:\n' -# f'ipc_pld_spec -> {ipc_pld_spec}\n' -# f'value -> {send_value}: {send_type}\n' -# ) - -# yield ( -# str(send_type), -# send_value, -# expect_roundtrip, -# ) - - @tractor.context async def send_back_values( ctx: Context, rent_pld_spec_type_strs: list[str], add_hooks: bool, - # expect_ipc_send: dict[str, tuple[Any, bool]], - - # expect_debug: bool, - # started_msg_bytes: bytes, ) -> None: ''' @@ -545,6 +439,8 @@ async def send_back_values( # ONLY add ext-hooks if the rent specified a non-std type! add_hooks: bool = ( NamespacePath in rent_pld_spec_types + and + add_hooks ) # same as on parent side config. @@ -580,19 +476,11 @@ async def send_back_values( child_pld_spec, ) assert ( - # child_pld_spec == rent_pld_spec child_pld_spec_types.issuperset( rent_pld_spec_types ) ) - # expected_pld_spec_str: str = str(ipc_spec) - # assert ( - # pld_spec_str == expected_pld_spec_str - # and - # ipc_pld_spec == ipc_spec - # ) - # ?TODO, try loop for each of the types in pld-superset? # # for send_value in [ @@ -609,6 +497,7 @@ async def send_back_values( f'child_pld_spec: {child_pld_spec}\n' f'codec: {codec}\n' ) + # await tractor.pause() await ctx.started(nsp) except tractor.MsgTypeError as _mte: @@ -826,11 +715,13 @@ def test_ext_types_over_ipc( assert nsp_rt.load_ref() is ex_func # this test passes bc we can go no further! - except MsgTypeError: - if not add_hooks: - # teardown nursery - await p.cancel_actor() - return + except MsgTypeError as mte: + # if not add_hooks: + # # teardown nursery + # await p.cancel_actor() + # return + + raise mte await p.cancel_actor() -- 2.34.1 From 49ecdc4d73ee1b924f4601dfedbe0030d23844d0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Mar 2025 11:17:46 -0400 Subject: [PATCH 288/305] Avoid attr-err when `._ipc_msg==None` Seems this can happen in particular when we raise a `MessageTypeError` on the sender side of a `Context`, since there isn't any msg relayed from the other side (though i'm wondering if MTE should derive from RAE then considering this case?). Means `RemoteActorError.boxed_type = None` in such cases instead of raising an attr-error for the `None.boxed_type_str`. --- tractor/_exceptions.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tractor/_exceptions.py b/tractor/_exceptions.py index 249ea164..f9e18e18 100644 --- a/tractor/_exceptions.py +++ b/tractor/_exceptions.py @@ -432,9 +432,13 @@ class RemoteActorError(Exception): Error type boxed by last actor IPC hop. ''' - if self._boxed_type is None: + if ( + self._boxed_type is None + and + (ipc_msg := self._ipc_msg) + ): self._boxed_type = get_err_type( - self._ipc_msg.boxed_type_str + ipc_msg.boxed_type_str ) return self._boxed_type @@ -1143,6 +1147,8 @@ def unpack_error( which is the responsibilitiy of the caller. ''' + # XXX, apparently we pass all sorts of msgs here? + # kinda odd but seems like maybe they shouldn't be? if not isinstance(msg, Error): return None -- 2.34.1 From 723a25b74d35420f7a1bcf3c7063863a6dc7e96e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 11 Dec 2024 22:22:26 -0500 Subject: [PATCH 289/305] Support `ctx: UnionType` annots for `@tractor.context` eps --- tractor/_context.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tractor/_context.py b/tractor/_context.py index 4628b11f..5d6ccf69 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -47,6 +47,9 @@ from functools import partial import inspect from pprint import pformat import textwrap +from types import ( + UnionType, +) from typing import ( Any, AsyncGenerator, @@ -2548,7 +2551,14 @@ def context( name: str param: Type for name, param in annots.items(): - if param is Context: + if ( + param is Context + or ( + isinstance(param, UnionType) + and + Context in param.__args__ + ) + ): ctx_var_name: str = name break else: -- 2.34.1 From e7cc91763cb5644ec815fb9f65732f4966138bca Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 10 Mar 2025 18:17:31 -0400 Subject: [PATCH 290/305] Deliver a `MaybeBoxedError` from `.expect_ctxc()` Just like we do from the `.devx._debug.open_crash_handler()`, this allows checking various attrs on the raised `ContextCancelled` much like `with pytest.raises() as excinfo:`. --- tractor/_testing/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tractor/_testing/__init__.py b/tractor/_testing/__init__.py index 43507c33..88860d13 100644 --- a/tractor/_testing/__init__.py +++ b/tractor/_testing/__init__.py @@ -26,6 +26,9 @@ import os import pathlib import tractor +from tractor.devx._debug import ( + BoxedMaybeException, +) from .pytest import ( tractor_test as tractor_test ) @@ -98,12 +101,13 @@ async def expect_ctxc( ''' if yay: try: - yield + yield (maybe_exc := BoxedMaybeException()) raise RuntimeError('Never raised ctxc?') - except tractor.ContextCancelled: + except tractor.ContextCancelled as ctxc: + maybe_exc.value = ctxc if reraise: raise else: return else: - yield + yield (maybe_exc := BoxedMaybeException()) -- 2.34.1 From 87e04c9311dba1fea850b2b502e1915e60c4085d Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 11 Mar 2025 14:04:55 -0400 Subject: [PATCH 291/305] Extend ctx semantics suite for streaming edge cases! Muchas grax to @guilledk for finding the first issue which kicked of this further scrutiny of the `tractor.Context` and `MsgStream` semantics test suite with a strange edge case where, - if the parent opened and immediately closed a stream while the remote child task started and continued (without terminating) to send msgs the parent's `open_context().__aexit__()` would **not block** on the child to complete! => this was seemingly due to a bug discovered inside the `.msg._ops.drain_to_final_msg()` stream handling case logic where we are NOT checking if `Context._stream` is non-`None`! As such this, - extends the `test_caller_closes_ctx_after_callee_opens_stream` (now renamed, see below) to include cases for all combinations of the child and parent sending before receiving on the stream as well as all placements of `Context.cancel()` in the parent before, around and after the stream open. - uses the new `expect_ctxc()` for expecting the taskc (`trio.Task` cancelled)` cases. - also extends the `test_callee_closes_ctx_after_stream_open` (also renamed) to include the case where the parent sends a msg before it receives. => this case has unveiled yet-another-bug where somehow the underlying `MsgStream._rx_chan: trio.ReceiveMemoryChannel` is allowing the child's `Return[None]` msg be consumed and NOT in a place where it is correctly set as `Context._result` resulting in the parent hanging forever inside `._ops.drain_to_final_msg()`.. Alongside, - start renaming using the new "remote-task-peer-side" semantics throughout the test module: "caller" -> "parent", "callee" -> "child". --- tests/test_context_stream_semantics.py | 158 ++++++++++++++++++------- 1 file changed, 117 insertions(+), 41 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index ade275aa..29e99b2e 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -443,7 +443,6 @@ def test_caller_cancels( @tractor.context async def close_ctx_immediately( - ctx: Context, ) -> None: @@ -454,10 +453,21 @@ async def close_ctx_immediately( async with ctx.open_stream(): pass + print('child returning!') + +@pytest.mark.parametrize( + 'parent_send_before_receive', + [ + False, + True, + ], + ids=lambda item: f'child_send_before_receive={item}' +) @tractor_test -async def test_callee_closes_ctx_after_stream_open( +async def test_child_exits_ctx_after_stream_open( debug_mode: bool, + parent_send_before_receive: bool, ): ''' callee context closes without using stream. @@ -474,6 +484,15 @@ async def test_callee_closes_ctx_after_stream_open( => {'stop': True, 'cid': } ''' + timeout: float = ( + 0.5 if ( + not debug_mode + # NOTE, for debugging final + # Return-consumed-n-discarded-ishue! + # and + # not parent_send_before_receive + ) else 999 + ) async with tractor.open_nursery( debug_mode=debug_mode, ) as an: @@ -482,7 +501,7 @@ async def test_callee_closes_ctx_after_stream_open( enable_modules=[__name__], ) - with trio.fail_after(0.5): + with trio.fail_after(timeout): async with portal.open_context( close_ctx_immediately, @@ -494,41 +513,56 @@ async def test_callee_closes_ctx_after_stream_open( with trio.fail_after(0.4): async with ctx.open_stream() as stream: + if parent_send_before_receive: + print('sending first msg from parent!') + await stream.send('yo') # should fall through since ``StopAsyncIteration`` # should be raised through translation of # a ``trio.EndOfChannel`` by # ``trio.abc.ReceiveChannel.__anext__()`` - async for _ in stream: + msg = 10 + async for msg in stream: # trigger failure if we DO NOT # get an EOC! assert 0 else: + # never should get anythinig new from + # the underlying stream + assert msg == 10 # verify stream is now closed try: with trio.fail_after(0.3): + print('parent trying to `.receive()` on EoC stream!') await stream.receive() + assert 0, 'should have raised eoc!?' except trio.EndOfChannel: + print('parent got EoC as expected!') pass + # raise # TODO: should be just raise the closed resource err # directly here to enforce not allowing a re-open # of a stream to the context (at least until a time of # if/when we decide that's a good idea?) try: - with trio.fail_after(0.5): + with trio.fail_after(timeout): async with ctx.open_stream() as stream: pass except trio.ClosedResourceError: pass + # if ctx._rx_chan._state.data: + # await tractor.pause() + await portal.cancel_actor() @tractor.context async def expect_cancelled( ctx: Context, + send_before_receive: bool = False, ) -> None: global _state @@ -538,6 +572,10 @@ async def expect_cancelled( try: async with ctx.open_stream() as stream: + + if send_before_receive: + await stream.send('yo') + async for msg in stream: await stream.send(msg) # echo server @@ -567,23 +605,46 @@ async def expect_cancelled( assert 0, "callee wasn't cancelled !?" +@pytest.mark.parametrize( + 'child_send_before_receive', + [ + False, + True, + ], + ids=lambda item: f'child_send_before_receive={item}' +) +@pytest.mark.parametrize( + 'rent_wait_for_msg', + [ + False, + True, + ], + ids=lambda item: f'rent_wait_for_msg={item}' +) @pytest.mark.parametrize( 'use_ctx_cancel_method', - [False, True], + [ + False, + 'pre_stream', + 'post_stream_open', + 'post_stream_close', + ], + ids=lambda item: f'use_ctx_cancel_method={item}' ) @tractor_test -async def test_caller_closes_ctx_after_callee_opens_stream( - use_ctx_cancel_method: bool, +async def test_parent_exits_ctx_after_child_enters_stream( + use_ctx_cancel_method: bool|str, debug_mode: bool, + rent_wait_for_msg: bool, + child_send_before_receive: bool, ): ''' - caller context closes without using/opening stream + Parent-side of IPC context closes without sending on `MsgStream`. ''' async with tractor.open_nursery( debug_mode=debug_mode, ) as an: - root: Actor = current_actor() portal = await an.start_actor( 'ctx_cancelled', @@ -592,41 +653,52 @@ async def test_caller_closes_ctx_after_callee_opens_stream( async with portal.open_context( expect_cancelled, + send_before_receive=child_send_before_receive, ) as (ctx, sent): assert sent is None await portal.run(assert_state, value=True) # call `ctx.cancel()` explicitly - if use_ctx_cancel_method: + if use_ctx_cancel_method == 'pre_stream': await ctx.cancel() # NOTE: means the local side `ctx._scope` will # have been cancelled by an ctxc ack and thus # `._scope.cancelled_caught` should be set. - try: + async with ( + expect_ctxc( + # XXX: the cause is US since we call + # `Context.cancel()` just above! + yay=True, + + # XXX: must be propagated to __aexit__ + # and should be silently absorbed there + # since we called `.cancel()` just above ;) + reraise=True, + ) as maybe_ctxc, + ): async with ctx.open_stream() as stream: - async for msg in stream: - pass - except tractor.ContextCancelled as ctxc: - # XXX: the cause is US since we call - # `Context.cancel()` just above! - assert ( - ctxc.canceller - == - current_actor().uid - == - root.uid - ) + if rent_wait_for_msg: + async for msg in stream: + print(f'PARENT rx: {msg!r}\n') + break - # XXX: must be propagated to __aexit__ - # and should be silently absorbed there - # since we called `.cancel()` just above ;) - raise + if use_ctx_cancel_method == 'post_stream_open': + await ctx.cancel() - else: - assert 0, "Should have context cancelled?" + if use_ctx_cancel_method == 'post_stream_close': + await ctx.cancel() + + ctxc: tractor.ContextCancelled = maybe_ctxc.value + assert ( + ctxc.canceller + == + current_actor().uid + == + root.uid + ) # channel should still be up assert portal.channel.connected() @@ -637,13 +709,20 @@ async def test_caller_closes_ctx_after_callee_opens_stream( value=False, ) + # XXX CHILD-BLOCKS case, we SHOULD NOT exit from the + # `.open_context()` before the child has returned, + # errored or been cancelled! else: try: - with trio.fail_after(0.2): - await ctx.result() + with trio.fail_after( + 0.5 # if not debug_mode else 999 + ): + res = await ctx.wait_for_result() + assert res is not tractor._context.Unresolved assert 0, "Callee should have blocked!?" except trio.TooSlowError: - # NO-OP -> since already called above + # NO-OP -> since already triggered by + # `trio.fail_after()` above! await ctx.cancel() # NOTE: local scope should have absorbed the cancellation since @@ -683,7 +762,7 @@ async def test_caller_closes_ctx_after_callee_opens_stream( @tractor_test -async def test_multitask_caller_cancels_from_nonroot_task( +async def test_multitask_parent_cancels_from_nonroot_task( debug_mode: bool, ): async with tractor.open_nursery( @@ -735,7 +814,6 @@ async def test_multitask_caller_cancels_from_nonroot_task( @tractor.context async def cancel_self( - ctx: Context, ) -> None: @@ -775,7 +853,7 @@ async def cancel_self( @tractor_test -async def test_callee_cancels_before_started( +async def test_child_cancels_before_started( debug_mode: bool, ): ''' @@ -826,8 +904,7 @@ async def never_open_stream( @tractor.context -async def keep_sending_from_callee( - +async def keep_sending_from_child( ctx: Context, msg_buffer_size: int|None = None, @@ -850,7 +927,7 @@ async def keep_sending_from_callee( 'overrun_by', [ ('caller', 1, never_open_stream), - ('callee', 0, keep_sending_from_callee), + ('callee', 0, keep_sending_from_child), ], ids=[ ('caller_1buf_never_open_stream'), @@ -931,8 +1008,7 @@ def test_one_end_stream_not_opened( @tractor.context async def echo_back_sequence( - - ctx: Context, + ctx: Context, seq: list[int], wait_for_cancel: bool, allow_overruns_side: str, -- 2.34.1 From f999f8228a40af77004ad4cd206f678fee65bcd3 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Tue, 11 Mar 2025 14:31:53 -0400 Subject: [PATCH 292/305] Fix msg-draining on `parent_never_opened_stream`! Repairs a bug in `drain_to_final_msg()` where in the `Yield()` case block we weren't guarding against the `ctx._stream is None` edge case which should be treated a `continue`-draining (not a `break` or attr-error!!) situation since the peer task maybe be continuing to send `Yield` but has not yet sent an outcome msg (one of `Return/Error/ContextCancelled`) to terminate the loop. Ensure we explicitly warn about this case as well as `.cancel()` emit on a taskc. Thanks again to @guille for discovering this! Also add temporary `.info()`s around rxed `Return` msgs as part of trying to debug a different bug discovered while updating the context-semantics test suite (in a prior commit). --- tractor/msg/_ops.py | 84 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 20 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 839be532..5f4b9fe8 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -186,10 +186,16 @@ class PldRx(Struct): msg: MsgType = ( ipc_msg or - # sync-rx msg from underlying IPC feeder (mem-)chan ipc._rx_chan.receive_nowait() ) + if ( + type(msg) is Return + ): + log.info( + f'Rxed final result msg\n' + f'{msg}\n' + ) return self.decode_pld( msg, ipc=ipc, @@ -219,6 +225,13 @@ class PldRx(Struct): # async-rx msg from underlying IPC feeder (mem-)chan await ipc._rx_chan.receive() ) + if ( + type(msg) is Return + ): + log.info( + f'Rxed final result msg\n' + f'{msg}\n' + ) return self.decode_pld( msg=msg, ipc=ipc, @@ -407,8 +420,6 @@ class PldRx(Struct): __tracebackhide__: bool = False raise - dec_msg = decode_pld - async def recv_msg_w_pld( self, ipc: Context|MsgStream, @@ -422,12 +433,19 @@ class PldRx(Struct): ) -> tuple[MsgType, PayloadT]: ''' - Retrieve the next avail IPC msg, decode it's payload, and return - the pair of refs. + Retrieve the next avail IPC msg, decode it's payload, and + return the pair of refs. ''' __tracebackhide__: bool = hide_tb msg: MsgType = await ipc._rx_chan.receive() + if ( + type(msg) is Return + ): + log.info( + f'Rxed final result msg\n' + f'{msg}\n' + ) if passthrough_non_pld_msgs: match msg: @@ -444,6 +462,10 @@ class PldRx(Struct): hide_tb=hide_tb, **kwargs, ) + # log.runtime( + # f'Delivering payload msg\n' + # f'{msg}\n' + # ) return msg, pld @@ -538,8 +560,8 @@ async def maybe_limit_plds( async def drain_to_final_msg( ctx: Context, - hide_tb: bool = True, msg_limit: int = 6, + hide_tb: bool = True, ) -> tuple[ Return|None, @@ -568,8 +590,8 @@ async def drain_to_final_msg( even after ctx closure and the `.open_context()` block exit. ''' - __tracebackhide__: bool = hide_tb raise_overrun: bool = not ctx._allow_overruns + parent_never_opened_stream: bool = ctx._stream is None # wait for a final context result by collecting (but # basically ignoring) any bi-dir-stream msgs still in transit @@ -578,7 +600,8 @@ async def drain_to_final_msg( result_msg: Return|Error|None = None while not ( ctx.maybe_error - and not ctx._final_result_is_set() + and + not ctx._final_result_is_set() ): try: # receive all msgs, scanning for either a final result @@ -631,6 +654,11 @@ async def drain_to_final_msg( ) __tracebackhide__: bool = False + else: + log.cancel( + f'IPC ctx cancelled externally during result drain ?\n' + f'{ctx}' + ) # CASE 2: mask the local cancelled-error(s) # only when we are sure the remote error is # the source cause of this local task's @@ -662,17 +690,24 @@ async def drain_to_final_msg( case Yield(): pre_result_drained.append(msg) if ( - (ctx._stream.closed - and (reason := 'stream was already closed') - ) - or (ctx.cancel_acked - and (reason := 'ctx cancelled other side') - ) - or (ctx._cancel_called - and (reason := 'ctx called `.cancel()`') - ) - or (len(pre_result_drained) > msg_limit - and (reason := f'"yield" limit={msg_limit}') + not parent_never_opened_stream + and ( + (ctx._stream.closed + and + (reason := 'stream was already closed') + ) or + (ctx.cancel_acked + and + (reason := 'ctx cancelled other side') + ) + or (ctx._cancel_called + and + (reason := 'ctx called `.cancel()`') + ) + or (len(pre_result_drained) > msg_limit + and + (reason := f'"yield" limit={msg_limit}') + ) ) ): log.cancel( @@ -690,7 +725,7 @@ async def drain_to_final_msg( # drain up to the `msg_limit` hoping to get # a final result or error/ctxc. else: - log.warning( + report: str = ( 'Ignoring "yield" msg during `ctx.result()` drain..\n' f'<= {ctx.chan.uid}\n' f' |_{ctx._nsf}()\n\n' @@ -699,6 +734,14 @@ async def drain_to_final_msg( f'{pretty_struct.pformat(msg)}\n' ) + if parent_never_opened_stream: + report = ( + f'IPC ctx never opened stream on {ctx.side!r}-side!\n' + f'\n' + # f'{ctx}\n' + ) + report + + log.warning(report) continue # stream terminated, but no result yet.. @@ -790,6 +833,7 @@ async def drain_to_final_msg( f'{ctx.outcome}\n' ) + __tracebackhide__: bool = hide_tb return ( result_msg, pre_result_drained, -- 2.34.1 From 0a56f62748d5fa8f0834cdd9ff2c247f7573fa7e Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 12 Mar 2025 13:13:20 -0400 Subject: [PATCH 293/305] Mk `tests/__init__.py`, not sure where it went? I must have had a local touched file but never committed or something? Seems that new `pytest` requires a top level `tests` pkg in order for relative `.conftest` imports to work. --- tests/devx/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/devx/conftest.py b/tests/devx/conftest.py index 7992e8c9..c45265dc 100644 --- a/tests/devx/conftest.py +++ b/tests/devx/conftest.py @@ -22,7 +22,7 @@ from tractor.devx._debug import ( _repl_fail_msg as _repl_fail_msg, _ctlc_ignore_header as _ctlc_ignore_header, ) -from conftest import ( +from ..conftest import ( _ci_env, ) -- 2.34.1 From 5018284db2012220ef75c369568678560606ca70 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 12 Mar 2025 13:15:48 -0400 Subject: [PATCH 294/305] Complete rename to parent->child IPC ctx peers Now changed in all comments docs **and** test-code content such that we aren't using the "caller"->"callee" semantics anymore. --- tests/test_context_stream_semantics.py | 105 ++++++++++++------------- 1 file changed, 49 insertions(+), 56 deletions(-) diff --git a/tests/test_context_stream_semantics.py b/tests/test_context_stream_semantics.py index 29e99b2e..14cb9cc6 100644 --- a/tests/test_context_stream_semantics.py +++ b/tests/test_context_stream_semantics.py @@ -38,9 +38,9 @@ from tractor._testing import ( # - standard setup/teardown: # ``Portal.open_context()`` starts a new # remote task context in another actor. The target actor's task must -# call ``Context.started()`` to unblock this entry on the caller side. -# the callee task executes until complete and returns a final value -# which is delivered to the caller side and retreived via +# call ``Context.started()`` to unblock this entry on the parent side. +# the child task executes until complete and returns a final value +# which is delivered to the parent side and retreived via # ``Context.result()``. # - cancel termination: @@ -170,9 +170,9 @@ async def assert_state(value: bool): [False, ValueError, KeyboardInterrupt], ) @pytest.mark.parametrize( - 'callee_blocks_forever', + 'child_blocks_forever', [False, True], - ids=lambda item: f'callee_blocks_forever={item}' + ids=lambda item: f'child_blocks_forever={item}' ) @pytest.mark.parametrize( 'pointlessly_open_stream', @@ -181,7 +181,7 @@ async def assert_state(value: bool): ) def test_simple_context( error_parent, - callee_blocks_forever, + child_blocks_forever, pointlessly_open_stream, debug_mode: bool, ): @@ -204,13 +204,13 @@ def test_simple_context( portal.open_context( simple_setup_teardown, data=10, - block_forever=callee_blocks_forever, + block_forever=child_blocks_forever, ) as (ctx, sent), ): assert current_ipc_ctx() is ctx assert sent == 11 - if callee_blocks_forever: + if child_blocks_forever: await portal.run(assert_state, value=True) else: assert await ctx.result() == 'yo' @@ -220,7 +220,7 @@ def test_simple_context( if error_parent: raise error_parent - if callee_blocks_forever: + if child_blocks_forever: await ctx.cancel() else: # in this case the stream will send a @@ -259,9 +259,9 @@ def test_simple_context( @pytest.mark.parametrize( - 'callee_returns_early', + 'child_returns_early', [True, False], - ids=lambda item: f'callee_returns_early={item}' + ids=lambda item: f'child_returns_early={item}' ) @pytest.mark.parametrize( 'cancel_method', @@ -273,14 +273,14 @@ def test_simple_context( [True, False], ids=lambda item: f'chk_ctx_result_before_exit={item}' ) -def test_caller_cancels( +def test_parent_cancels( cancel_method: str, chk_ctx_result_before_exit: bool, - callee_returns_early: bool, + child_returns_early: bool, debug_mode: bool, ): ''' - Verify that when the opening side of a context (aka the caller) + Verify that when the opening side of a context (aka the parent) cancels that context, the ctx does not raise a cancelled when either calling `.result()` or on context exit. @@ -294,7 +294,7 @@ def test_caller_cancels( if ( cancel_method == 'portal' - and not callee_returns_early + and not child_returns_early ): try: res = await ctx.result() @@ -318,7 +318,7 @@ def test_caller_cancels( pytest.fail(f'should not have raised ctxc\n{ctxc}') # we actually get a result - if callee_returns_early: + if child_returns_early: assert res == 'yo' assert ctx.outcome is res assert ctx.maybe_error is None @@ -362,14 +362,14 @@ def test_caller_cancels( ) timeout: float = ( 0.5 - if not callee_returns_early + if not child_returns_early else 2 ) with trio.fail_after(timeout): async with ( expect_ctxc( yay=( - not callee_returns_early + not child_returns_early and cancel_method == 'portal' ) ), @@ -377,13 +377,13 @@ def test_caller_cancels( portal.open_context( simple_setup_teardown, data=10, - block_forever=not callee_returns_early, + block_forever=not child_returns_early, ) as (ctx, sent), ): - if callee_returns_early: + if child_returns_early: # ensure we block long enough before sending - # a cancel such that the callee has already + # a cancel such that the child has already # returned it's result. await trio.sleep(0.5) @@ -421,7 +421,7 @@ def test_caller_cancels( # which should in turn cause `ctx._scope` to # catch any cancellation? if ( - not callee_returns_early + not child_returns_early and cancel_method != 'portal' ): assert not ctx._scope.cancelled_caught @@ -430,11 +430,11 @@ def test_caller_cancels( # basic stream terminations: -# - callee context closes without using stream -# - caller context closes without using stream -# - caller context calls `Context.cancel()` while streaming -# is ongoing resulting in callee being cancelled -# - callee calls `Context.cancel()` while streaming and caller +# - child context closes without using stream +# - parent context closes without using stream +# - parent context calls `Context.cancel()` while streaming +# is ongoing resulting in child being cancelled +# - child calls `Context.cancel()` while streaming and parent # sees stream terminated in `RemoteActorError` # TODO: future possible features @@ -470,7 +470,7 @@ async def test_child_exits_ctx_after_stream_open( parent_send_before_receive: bool, ): ''' - callee context closes without using stream. + child context closes without using stream. This should result in a msg sequence |__ @@ -485,13 +485,7 @@ async def test_child_exits_ctx_after_stream_open( ''' timeout: float = ( - 0.5 if ( - not debug_mode - # NOTE, for debugging final - # Return-consumed-n-discarded-ishue! - # and - # not parent_send_before_receive - ) else 999 + 0.5 if not debug_mode else 999 ) async with tractor.open_nursery( debug_mode=debug_mode, @@ -602,7 +596,7 @@ async def expect_cancelled( raise else: - assert 0, "callee wasn't cancelled !?" + assert 0, "child wasn't cancelled !?" @pytest.mark.parametrize( @@ -857,7 +851,7 @@ async def test_child_cancels_before_started( debug_mode: bool, ): ''' - Callee calls `Context.cancel()` while streaming and caller + Callee calls `Context.cancel()` while streaming and parent sees stream terminated in `ContextCancelled`. ''' @@ -910,7 +904,7 @@ async def keep_sending_from_child( ) -> None: ''' - Send endlessly on the calleee stream. + Send endlessly on the child stream. ''' await ctx.started() @@ -918,7 +912,7 @@ async def keep_sending_from_child( msg_buffer_size=msg_buffer_size, ) as stream: for msg in count(): - print(f'callee sending {msg}') + print(f'child sending {msg}') await stream.send(msg) await trio.sleep(0.01) @@ -926,12 +920,12 @@ async def keep_sending_from_child( @pytest.mark.parametrize( 'overrun_by', [ - ('caller', 1, never_open_stream), - ('callee', 0, keep_sending_from_child), + ('parent', 1, never_open_stream), + ('child', 0, keep_sending_from_child), ], ids=[ - ('caller_1buf_never_open_stream'), - ('callee_0buf_keep_sending_from_callee'), + ('parent_1buf_never_open_stream'), + ('child_0buf_keep_sending_from_child'), ] ) def test_one_end_stream_not_opened( @@ -962,8 +956,7 @@ def test_one_end_stream_not_opened( ) as (ctx, sent): assert sent is None - if 'caller' in overrunner: - + if 'parent' in overrunner: async with ctx.open_stream() as stream: # itersend +1 msg more then the buffer size @@ -978,7 +971,7 @@ def test_one_end_stream_not_opened( await trio.sleep_forever() else: - # callee overruns caller case so we do nothing here + # child overruns parent case so we do nothing here await trio.sleep_forever() await portal.cancel_actor() @@ -986,19 +979,19 @@ def test_one_end_stream_not_opened( # 2 overrun cases and the no overrun case (which pushes right up to # the msg limit) if ( - overrunner == 'caller' + overrunner == 'parent' ): with pytest.raises(tractor.RemoteActorError) as excinfo: trio.run(main) assert excinfo.value.boxed_type == StreamOverrun - elif overrunner == 'callee': + elif overrunner == 'child': with pytest.raises(tractor.RemoteActorError) as excinfo: trio.run(main) # TODO: embedded remote errors so that we can verify the source - # error? the callee delivers an error which is an overrun + # error? the child delivers an error which is an overrun # wrapped in a remote actor error. assert excinfo.value.boxed_type == tractor.RemoteActorError @@ -1017,12 +1010,12 @@ async def echo_back_sequence( ) -> None: ''' - Send endlessly on the calleee stream using a small buffer size + Send endlessly on the child stream using a small buffer size setting on the contex to simulate backlogging that would normally cause overruns. ''' - # NOTE: ensure that if the caller is expecting to cancel this task + # NOTE: ensure that if the parent is expecting to cancel this task # that we stay echoing much longer then they are so we don't # return early instead of receive the cancel msg. total_batches: int = ( @@ -1072,18 +1065,18 @@ async def echo_back_sequence( if be_slow: await trio.sleep(0.05) - print('callee waiting on next') + print('child waiting on next') - print(f'callee echoing back latest batch\n{batch}') + print(f'child echoing back latest batch\n{batch}') for msg in batch: - print(f'callee sending msg\n{msg}') + print(f'child sending msg\n{msg}') await stream.send(msg) try: return 'yo' finally: print( - 'exiting callee with context:\n' + 'exiting child with context:\n' f'{pformat(ctx)}\n' ) @@ -1137,7 +1130,7 @@ def test_maybe_allow_overruns_stream( debug_mode=debug_mode, ) as an: portal = await an.start_actor( - 'callee_sends_forever', + 'child_sends_forever', enable_modules=[__name__], loglevel=loglevel, debug_mode=debug_mode, -- 2.34.1 From 830be005ea3f736ca97a06208ed589f42efdfc7f Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 12 Mar 2025 13:47:53 -0400 Subject: [PATCH 295/305] Rename ext-types with `msgspec` suite module --- tests/{test_caps_based_msging.py => test_ext_types_msgspec.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_caps_based_msging.py => test_ext_types_msgspec.py} (100%) diff --git a/tests/test_caps_based_msging.py b/tests/test_ext_types_msgspec.py similarity index 100% rename from tests/test_caps_based_msging.py rename to tests/test_ext_types_msgspec.py -- 2.34.1 From 7d19c583739fb9d60ef36ae4d5dcd3025e11f75c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 12 Mar 2025 13:49:58 -0400 Subject: [PATCH 296/305] Slight `PldRx` rework to simplify Namely renaming and tweaking the `MsgType` receiving methods, - `.recv_msg()` from what was `.recv_msg_w_pld()` which both receives the IPC msg from the underlying `._rx_chan` and then decodes its payload with `.decode_pld()`; it now also log reports on the different "stage of SC dialog protocol" msg types via a `match/case`. - a new `.recv_msg_nowait()` sync equivalent of ^ (*was* `.recv_pld_nowait()`) who's use was the source of a recently discovered bug where any final `Return.pld` is being consumed-n-discarded by by `MsgStream.aclose()` depending on ctx/stream teardown race conditions.. Also, - remove all the "instance persistent" ipc-ctx attrs, specifically the optional `_ipc`, `_ctx` and the `.wraps_ipc()` cm, since none of them were ever really needed/used; all methods which require a `Context/MsgStream` are explicitly always passed. - update a buncha typing namely to use the more generic-styled `PayloadT` over `Any` and obviously `MsgType[PayloadT]`. --- tractor/msg/_ops.py | 151 ++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 83 deletions(-) diff --git a/tractor/msg/_ops.py b/tractor/msg/_ops.py index 5f4b9fe8..fbbbecff 100644 --- a/tractor/msg/_ops.py +++ b/tractor/msg/_ops.py @@ -110,33 +110,11 @@ class PldRx(Struct): # TODO: better to bind it here? # _rx_mc: trio.MemoryReceiveChannel _pld_dec: MsgDec - _ctx: Context|None = None - _ipc: Context|MsgStream|None = None @property def pld_dec(self) -> MsgDec: return self._pld_dec - # TODO: a better name? - # -[ ] when would this be used as it avoids needingn to pass the - # ipc prim to every method - @cm - def wraps_ipc( - self, - ipc_prim: Context|MsgStream, - - ) -> PldRx: - ''' - Apply this payload receiver to an IPC primitive type, one - of `Context` or `MsgStream`. - - ''' - self._ipc = ipc_prim - try: - yield self - finally: - self._ipc = None - @cm def limit_plds( self, @@ -169,7 +147,7 @@ class PldRx(Struct): def dec(self) -> msgpack.Decoder: return self._pld_dec.dec - def recv_pld_nowait( + def recv_msg_nowait( self, # TODO: make this `MsgStream` compat as well, see above^ # ipc_prim: Context|MsgStream, @@ -180,7 +158,15 @@ class PldRx(Struct): hide_tb: bool = False, **dec_pld_kwargs, - ) -> Any|Raw: + ) -> tuple[ + MsgType[PayloadT], + PayloadT, + ]: + ''' + Attempt to non-blocking receive a message from the `._rx_chan` and + unwrap it's payload delivering the pair to the caller. + + ''' __tracebackhide__: bool = hide_tb msg: MsgType = ( @@ -189,31 +175,78 @@ class PldRx(Struct): # sync-rx msg from underlying IPC feeder (mem-)chan ipc._rx_chan.receive_nowait() ) - if ( - type(msg) is Return - ): - log.info( - f'Rxed final result msg\n' - f'{msg}\n' - ) - return self.decode_pld( + pld: PayloadT = self.decode_pld( msg, ipc=ipc, expect_msg=expect_msg, hide_tb=hide_tb, **dec_pld_kwargs, ) + return ( + msg, + pld, + ) + + async def recv_msg( + self, + ipc: Context|MsgStream, + expect_msg: MsgType, + + # NOTE: ONLY for handling `Stop`-msgs that arrive during + # a call to `drain_to_final_msg()` above! + passthrough_non_pld_msgs: bool = True, + hide_tb: bool = True, + + **decode_pld_kwargs, + + ) -> tuple[MsgType, PayloadT]: + ''' + Retrieve the next avail IPC msg, decode its payload, and + return the (msg, pld) pair. + + ''' + __tracebackhide__: bool = hide_tb + msg: MsgType = await ipc._rx_chan.receive() + match msg: + case Return()|Error(): + log.runtime( + f'Rxed final outcome msg\n' + f'{msg}\n' + ) + case Stop(): + log.runtime( + f'Rxed stream stopped msg\n' + f'{msg}\n' + ) + if passthrough_non_pld_msgs: + return msg, None + + # TODO: is there some way we can inject the decoded + # payload into an existing output buffer for the original + # msg instance? + pld: PayloadT = self.decode_pld( + msg, + ipc=ipc, + expect_msg=expect_msg, + hide_tb=hide_tb, + + **decode_pld_kwargs, + ) + return ( + msg, + pld, + ) async def recv_pld( self, ipc: Context|MsgStream, - ipc_msg: MsgType|None = None, + ipc_msg: MsgType[PayloadT]|None = None, expect_msg: Type[MsgType]|None = None, hide_tb: bool = True, **dec_pld_kwargs, - ) -> Any|Raw: + ) -> PayloadT: ''' Receive a `MsgType`, then decode and return its `.pld` field. @@ -420,54 +453,6 @@ class PldRx(Struct): __tracebackhide__: bool = False raise - async def recv_msg_w_pld( - self, - ipc: Context|MsgStream, - expect_msg: MsgType, - - # NOTE: generally speaking only for handling `Stop`-msgs that - # arrive during a call to `drain_to_final_msg()` above! - passthrough_non_pld_msgs: bool = True, - hide_tb: bool = True, - **kwargs, - - ) -> tuple[MsgType, PayloadT]: - ''' - Retrieve the next avail IPC msg, decode it's payload, and - return the pair of refs. - - ''' - __tracebackhide__: bool = hide_tb - msg: MsgType = await ipc._rx_chan.receive() - if ( - type(msg) is Return - ): - log.info( - f'Rxed final result msg\n' - f'{msg}\n' - ) - - if passthrough_non_pld_msgs: - match msg: - case Stop(): - return msg, None - - # TODO: is there some way we can inject the decoded - # payload into an existing output buffer for the original - # msg instance? - pld: PayloadT = self.decode_pld( - msg, - ipc=ipc, - expect_msg=expect_msg, - hide_tb=hide_tb, - **kwargs, - ) - # log.runtime( - # f'Delivering payload msg\n' - # f'{msg}\n' - # ) - return msg, pld - @cm def limit_plds( @@ -607,7 +592,7 @@ async def drain_to_final_msg( # receive all msgs, scanning for either a final result # or error; the underlying call should never raise any # remote error directly! - msg, pld = await ctx._pld_rx.recv_msg_w_pld( + msg, pld = await ctx._pld_rx.recv_msg( ipc=ctx, expect_msg=Return, raise_error=False, -- 2.34.1 From f0c5b6fb18a27a954155a36c32326a78f53d6426 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 12 Mar 2025 15:03:55 -0400 Subject: [PATCH 297/305] Add `Context._outcome_msg` use new `PldRx` API Such that any `Return` is always capture for each ctx instance and set in `._deliver_msg()` normally; ensures we can at least introspect for it when missing (like in a recently discovered stream teardown race bug). Yes this augments the already existing `._result` which is dedicated for the `._outcome_msg.pld` in the non-error case; we might want to see if there's a nicer way to directly proxy ref to that without getting the pre-pld-decoded `Raw` form with `msgspec`? Also use the new `ctx._pld_rx.recv_msg()` and drop assigning `pld_rx._ctx`. --- tractor/_context.py | 48 ++++++++++++++++++++++++++++++++++++--------- tractor/_portal.py | 2 +- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/tractor/_context.py b/tractor/_context.py index 5d6ccf69..201e920a 100644 --- a/tractor/_context.py +++ b/tractor/_context.py @@ -82,6 +82,7 @@ from .msg import ( MsgType, NamespacePath, PayloadT, + Return, Started, Stop, Yield, @@ -245,11 +246,13 @@ class Context: # a drain loop? # _res_scope: trio.CancelScope|None = None + _outcome_msg: Return|Error|ContextCancelled = Unresolved + # on a clean exit there should be a final value # delivered from the far end "callee" task, so # this value is only set on one side. # _result: Any | int = None - _result: Any|Unresolved = Unresolved + _result: PayloadT|Unresolved = Unresolved # if the local "caller" task errors this value is always set # to the error that was captured in the @@ -1199,9 +1202,11 @@ class Context: ''' __tracebackhide__: bool = hide_tb - assert self._portal, ( - '`Context.wait_for_result()` can not be called from callee side!' - ) + if not self._portal: + raise RuntimeError( + 'Invalid usage of `Context.wait_for_result()`!\n' + 'Not valid on child-side IPC ctx!\n' + ) if self._final_result_is_set(): return self._result @@ -1222,6 +1227,8 @@ class Context: # since every message should be delivered via the normal # `._deliver_msg()` route which will appropriately set # any `.maybe_error`. + outcome_msg: Return|Error|ContextCancelled + drained_msgs: list[MsgType] ( outcome_msg, drained_msgs, @@ -1229,11 +1236,19 @@ class Context: ctx=self, hide_tb=hide_tb, ) - drained_status: str = ( 'Ctx drained to final outcome msg\n\n' f'{outcome_msg}\n' ) + + # ?XXX, should already be set in `._deliver_msg()` right? + if self._outcome_msg is not Unresolved: + # from .devx import _debug + # await _debug.pause() + assert self._outcome_msg is outcome_msg + else: + self._outcome_msg = outcome_msg + if drained_msgs: drained_status += ( '\n' @@ -1741,7 +1756,6 @@ class Context: f'{structfmt(msg)}\n' ) - # NOTE: if an error is deteced we should always still # send it through the feeder-mem-chan and expect # it to be raised by any context (stream) consumer @@ -1753,6 +1767,21 @@ class Context: # normally the task that should get cancelled/error # from some remote fault! send_chan.send_nowait(msg) + match msg: + case Stop(): + if (stream := self._stream): + stream._stop_msg = msg + + case Return(): + if not self._outcome_msg: + log.warning( + f'Setting final outcome msg AFTER ' + f'`._rx_chan.send()`??\n' + f'\n' + f'{msg}' + ) + self._outcome_msg = msg + return True except trio.BrokenResourceError: @@ -2009,7 +2038,7 @@ async def open_context_from_portal( # the dialog, the `Error` msg should be raised from the `msg` # handling block below. try: - started_msg, first = await ctx._pld_rx.recv_msg_w_pld( + started_msg, first = await ctx._pld_rx.recv_msg( ipc=ctx, expect_msg=Started, passthrough_non_pld_msgs=False, @@ -2374,7 +2403,8 @@ async def open_context_from_portal( # displaying `ContextCancelled` traces where the # cause of crash/exit IS due to something in # user/app code on either end of the context. - and not rxchan._closed + and + not rxchan._closed ): # XXX NOTE XXX: and again as per above, we mask any # `trio.Cancelled` raised here so as to NOT mask @@ -2433,6 +2463,7 @@ async def open_context_from_portal( # FINALLY, remove the context from runtime tracking and # exit! log.runtime( + # log.cancel( f'De-allocating IPC ctx opened with {ctx.side!r} peer \n' f'uid: {uid}\n' f'cid: {ctx.cid}\n' @@ -2488,7 +2519,6 @@ def mk_context( _caller_info=caller_info, **kwargs, ) - pld_rx._ctx = ctx ctx._result = Unresolved return ctx diff --git a/tractor/_portal.py b/tractor/_portal.py index 7fbf69b2..cee10c47 100644 --- a/tractor/_portal.py +++ b/tractor/_portal.py @@ -184,7 +184,7 @@ class Portal: ( self._final_result_msg, self._final_result_pld, - ) = await self._expect_result_ctx._pld_rx.recv_msg_w_pld( + ) = await self._expect_result_ctx._pld_rx.recv_msg( ipc=self._expect_result_ctx, expect_msg=Return, ) -- 2.34.1 From 68d71c2df1d2885c7d3f53be3444d61f2f0ae7ef Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 12 Mar 2025 16:24:39 -0400 Subject: [PATCH 298/305] Add `MsgStream._stop_msg` use new `PldRx` API In particular ensuring we use `ctx._pld_rx.recv_msg_nowait()` from `.receive_nowait()` (which is called from `.aclose()`) such that we ALWAYS (can) set the surrounding `Context._result/._outcome_msg` attrs on reception of a final `Return`!! This fixes a final stream-teardown-race-condition-bug where prior we normally didn't set the `Context._result/._outcome_msg` in such cases. This is **precisely because** `.receive_nowait()` only returns the `pld` and when called from `.aclose()` this value is discarded, meaning so is its boxing `Return` despite consuming it from the underlying `._rx_chan`.. Longer term this should be solved differently by ensuring such races cases are handled at a higher scope like inside `Context._deliver_msg()` or the `Portal.open_context()` enter/exit blocks? Add a detailed warning note and todos for all this around the special case block! --- tractor/_streaming.py | 118 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 95 insertions(+), 23 deletions(-) diff --git a/tractor/_streaming.py b/tractor/_streaming.py index 58e9b069..2ff2d41c 100644 --- a/tractor/_streaming.py +++ b/tractor/_streaming.py @@ -45,9 +45,11 @@ from .trionics import ( BroadcastReceiver, ) from tractor.msg import ( - # Return, - # Stop, + Error, + Return, + Stop, MsgType, + PayloadT, Yield, ) @@ -70,8 +72,7 @@ class MsgStream(trio.abc.Channel): A bidirectional message stream for receiving logically sequenced values over an inter-actor IPC `Channel`. - This is the type returned to a local task which entered either - `Portal.open_stream_from()` or `Context.open_stream()`. + Termination rules: @@ -94,6 +95,9 @@ class MsgStream(trio.abc.Channel): self._rx_chan = rx_chan self._broadcaster = _broadcaster + # any actual IPC msg which is effectively an `EndOfStream` + self._stop_msg: bool|Stop = False + # flag to denote end of stream self._eoc: bool|trio.EndOfChannel = False self._closed: bool|trio.ClosedResourceError = False @@ -125,16 +129,67 @@ class MsgStream(trio.abc.Channel): def receive_nowait( self, expect_msg: MsgType = Yield, - ): + ) -> PayloadT: ctx: Context = self._ctx - return ctx._pld_rx.recv_pld_nowait( + ( + msg, + pld, + ) = ctx._pld_rx.recv_msg_nowait( ipc=self, expect_msg=expect_msg, ) + # ?TODO, maybe factor this into a hyper-common `unwrap_pld()` + # + match msg: + + # XXX, these never seems to ever hit? cool? + case Stop(): + log.cancel( + f'Msg-stream was ended via stop msg\n' + f'{msg}' + ) + case Error(): + log.error( + f'Msg-stream was ended via error msg\n' + f'{msg}' + ) + + # XXX NOTE, always set any final result on the ctx to + # avoid teardown race conditions where previously this msg + # would be consumed silently (by `.aclose()` doing its + # own "msg drain loop" but WITHOUT those `drained: lists[MsgType]` + # being post-close-processed! + # + # !!TODO, see the equiv todo-comment in `.receive()` + # around the `if drained:` where we should prolly + # ACTUALLY be doing this post-close processing?? + # + case Return(pld=pld): + log.warning( + f'Msg-stream final result msg for IPC ctx?\n' + f'{msg}' + ) + # XXX TODO, this **should be covered** by higher + # scoped runtime-side method calls such as + # `Context._deliver_msg()`, so you should never + # really see the warning above or else something + # racy/out-of-order is likely going on between + # actor-runtime-side push tasks and the user-app-side + # consume tasks! + # -[ ] figure out that set of race cases and fix! + # -[ ] possibly return the `msg` given an input + # arg-flag is set so we can process the `Return` + # from the `.aclose()` caller? + # + # breakpoint() # to debug this RACE CASE! + ctx._result = pld + ctx._outcome_msg = msg + + return pld + async def receive( self, - hide_tb: bool = False, ): ''' @@ -154,7 +209,7 @@ class MsgStream(trio.abc.Channel): # except trio.EndOfChannel: # raise StopAsyncIteration # - # see ``.aclose()`` for notes on the old behaviour prior to + # see `.aclose()` for notes on the old behaviour prior to # introducing this if self._eoc: raise self._eoc @@ -165,7 +220,11 @@ class MsgStream(trio.abc.Channel): src_err: Exception|None = None # orig tb try: ctx: Context = self._ctx - return await ctx._pld_rx.recv_pld(ipc=self) + pld = await ctx._pld_rx.recv_pld( + ipc=self, + expect_msg=Yield, + ) + return pld # XXX: the stream terminates on either of: # - `self._rx_chan.receive()` raising after manual closure @@ -174,7 +233,7 @@ class MsgStream(trio.abc.Channel): # - via a `Stop`-msg received from remote peer task. # NOTE # |_ previously this was triggered by calling - # ``._rx_chan.aclose()`` on the send side of the channel + # `._rx_chan.aclose()` on the send side of the channel # inside `Actor._deliver_ctx_payload()`, but now the 'stop' # message handling gets delegated to `PldRFx.recv_pld()` # internals. @@ -198,11 +257,14 @@ class MsgStream(trio.abc.Channel): # terminated and signal this local iterator to stop drained: list[Exception|dict] = await self.aclose() if drained: - # ?TODO? pass these to the `._ctx._drained_msgs: deque` - # and then iterate them as part of any `.wait_for_result()` call? - # - # from .devx import pause - # await pause() + # ^^^^^^^^TODO? pass these to the `._ctx._drained_msgs: + # deque` and then iterate them as part of any + # `.wait_for_result()` call? + # + # -[ ] move the match-case processing from + # `.receive_nowait()` instead to right here, use it from + # a for msg in drained:` post-proc loop? + # log.warning( 'Drained context msgs during closure\n\n' f'{drained}' @@ -265,9 +327,6 @@ class MsgStream(trio.abc.Channel): - more or less we try to maintain adherance to trio's `.aclose()` semantics: https://trio.readthedocs.io/en/stable/reference-io.html#trio.abc.AsyncResource.aclose ''' - - # rx_chan = self._rx_chan - # XXX NOTE XXX # it's SUPER IMPORTANT that we ensure we don't DOUBLE # DRAIN msgs on closure so avoid getting stuck handing on @@ -279,15 +338,16 @@ class MsgStream(trio.abc.Channel): # this stream has already been closed so silently succeed as # per ``trio.AsyncResource`` semantics. # https://trio.readthedocs.io/en/stable/reference-io.html#trio.abc.AsyncResource.aclose + # import tractor + # await tractor.pause() return [] ctx: Context = self._ctx drained: list[Exception|dict] = [] while not drained: try: - maybe_final_msg = self.receive_nowait( - # allow_msgs=[Yield, Return], - expect_msg=Yield, + maybe_final_msg: Yield|Return = self.receive_nowait( + expect_msg=Yield|Return, ) if maybe_final_msg: log.debug( @@ -372,8 +432,10 @@ class MsgStream(trio.abc.Channel): # await rx_chan.aclose() if not self._eoc: + this_side: str = self._ctx.side + peer_side: str = self._ctx.peer_side message: str = ( - f'Stream self-closed by {self._ctx.side!r}-side before EoC\n' + f'Stream self-closed by {this_side!r}-side before EoC from {peer_side!r}\n' # } bc a stream is a "scope"/msging-phase inside an IPC f'x}}>\n' f' |_{self}\n' @@ -381,9 +443,19 @@ class MsgStream(trio.abc.Channel): log.cancel(message) self._eoc = trio.EndOfChannel(message) + if ( + (rx_chan := self._rx_chan) + and + (stats := rx_chan.statistics()).tasks_waiting_receive + ): + log.cancel( + f'Msg-stream is closing but there is still reader tasks,\n' + f'{stats}\n' + ) + # ?XXX WAIT, why do we not close the local mem chan `._rx_chan` XXX? # => NO, DEFINITELY NOT! <= - # if we're a bi-dir ``MsgStream`` BECAUSE this same + # if we're a bi-dir `MsgStream` BECAUSE this same # core-msg-loop mem recv-chan is used to deliver the # potential final result from the surrounding inter-actor # `Context` so we don't want to close it until that -- 2.34.1 From 888a3ae7608255d56b6a3bd6f9db9e8e705f625c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 12 Mar 2025 16:41:42 -0400 Subject: [PATCH 299/305] Add `.runtime()`-emit to `._invoke()` to report final result msg in the child --- tractor/_rpc.py | 4 ++++ tractor/_runtime.py | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tractor/_rpc.py b/tractor/_rpc.py index 086cfff6..c5daed9e 100644 --- a/tractor/_rpc.py +++ b/tractor/_rpc.py @@ -649,6 +649,10 @@ async def _invoke( ) # set and shuttle final result to "parent"-side task. ctx._result = res + log.runtime( + f'Sending result msg and exiting {ctx.side!r}\n' + f'{return_msg}\n' + ) await chan.send(return_msg) # NOTE: this happens IFF `ctx._scope.cancel()` is diff --git a/tractor/_runtime.py b/tractor/_runtime.py index e7faaedf..890a690a 100644 --- a/tractor/_runtime.py +++ b/tractor/_runtime.py @@ -836,8 +836,10 @@ class Actor: )] except KeyError: report: str = ( - 'Ignoring invalid IPC ctx msg!\n\n' - f'<=? {uid}\n\n' + 'Ignoring invalid IPC msg!?\n' + f'Ctx seems to not/no-longer exist??\n' + f'\n' + f'<=? {uid}\n' f' |_{pretty_struct.pformat(msg)}\n' ) match msg: -- 2.34.1 From 9919edc4bba18501d435de22ecc86ade36cc5c15 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Sun, 16 Mar 2025 17:20:20 -0400 Subject: [PATCH 300/305] Mask top level import of `.hilevel` Since it isn't required until the landing of the new service-manager stuff in https://pikers.dev/goodboy/tractor/pulls/12; was an oversight from commit `0607a31dddeba032a2cf7d9fe605edd9d7bb4846`. --- tractor/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tractor/__init__.py b/tractor/__init__.py index 6ddbf199..0c011a22 100644 --- a/tractor/__init__.py +++ b/tractor/__init__.py @@ -67,4 +67,4 @@ from ._root import ( from ._ipc import Channel as Channel from ._portal import Portal as Portal from ._runtime import Actor as Actor -from . import hilevel as hilevel +# from . import hilevel as hilevel -- 2.34.1 From bd19942328ccbd2bb97ccd903172e5c5ee09ca7b Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 19 Mar 2025 10:02:05 -0400 Subject: [PATCH 301/305] Bump up to `pytest>=8.3.5` to match "GH actions" Ensure it's only for the `--dev` optional deps. --- pyproject.toml | 4 ++-- uv.lock | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 61272a60..da08fbc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ dependencies = [ "colorlog>=6.8.2,<7", # built-in multi-actor `pdb` REPL "pdbp>=1.6,<2", # windows only (from `pdbp`) - "tabcompleter>=1.4.0", + "tabcompleter>=1.4.0", # typed IPC msging # TODO, get back on release once 3.13 support is out! "msgspec>=0.19.0", @@ -57,7 +57,7 @@ dev = [ # test suite # TODO: maybe some of these layout choices? # https://docs.pytest.org/en/8.0.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules - "pytest>=8.2.0,<9", + "pytest>=8.3.5", "pexpect>=4.9.0,<5", # `tractor.devx` tooling "greenback>=1.2.1,<2", diff --git a/uv.lock b/uv.lock index 6b12137e..94647191 100644 --- a/uv.lock +++ b/uv.lock @@ -294,7 +294,7 @@ wheels = [ [[package]] name = "pytest" -version = "8.3.4" +version = "8.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -302,9 +302,9 @@ dependencies = [ { name = "packaging" }, { name = "pluggy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919 } +sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083 }, + { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634 }, ] [[package]] @@ -390,7 +390,7 @@ dev = [ { name = "pexpect", specifier = ">=4.9.0,<5" }, { name = "prompt-toolkit", specifier = ">=3.0.50" }, { name = "pyperclip", specifier = ">=1.9.0" }, - { name = "pytest", specifier = ">=8.2.0,<9" }, + { name = "pytest", specifier = ">=8.3.5" }, { name = "stackscope", specifier = ">=0.2.2,<0.3" }, { name = "xonsh", specifier = ">=0.19.2" }, ] -- 2.34.1 From 3d5488598106097086b86ccaf630449756ca5e80 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 19 Mar 2025 13:30:05 -0400 Subject: [PATCH 302/305] Continue supporting py3.11+ Apparently the only thing needing a guard was use of `asyncio.Queue.shutdown()` and the paired `QueueShutDown` exception? Cool. --- tractor/to_asyncio.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/tractor/to_asyncio.py b/tractor/to_asyncio.py index 7b87be0b..08b1ed25 100644 --- a/tractor/to_asyncio.py +++ b/tractor/to_asyncio.py @@ -23,12 +23,10 @@ import asyncio from asyncio.exceptions import ( CancelledError, ) -from asyncio import ( - QueueShutDown, -) from contextlib import asynccontextmanager as acm from dataclasses import dataclass import inspect +import platform import traceback from typing import ( Any, @@ -79,6 +77,20 @@ __all__ = [ 'run_as_asyncio_guest', ] +if (_py_313 := ( + ('3', '13') + == + platform.python_version_tuple()[:-1] + ) +): + # 3.13+ only.. lel. + # https://docs.python.org/3.13/library/asyncio-queue.html#asyncio.QueueShutDown + from asyncio import ( + QueueShutDown, + ) +else: + QueueShutDown = False + # TODO, generally speaking we can generalize this abstraction, a "SC linked # parent->child task pair", as the same "supervision scope primitive" @@ -575,7 +587,11 @@ def _run_asyncio_task( # normally suppressed unless the trio.Task also errors # # ?TODO, is this even needed (does it happen) now? - elif isinstance(aio_err, QueueShutDown): + elif ( + _py_313 + and + isinstance(aio_err, QueueShutDown) + ): # import pdbp; pdbp.set_trace() trio_err = AsyncioTaskExited( 'Task exited before `trio` side' @@ -955,9 +971,10 @@ async def translate_aio_errors( # or an error, we ensure the aio-side gets signalled via # an explicit exception and its `Queue` is shutdown. if ya_trio_exited: - # raise `QueueShutDown` on next `Queue.get()` call on - # aio side. - chan._to_aio.shutdown() + # XXX py3.13+ ONLY.. + # raise `QueueShutDown` on next `Queue.get/put()` + if _py_313: + chan._to_aio.shutdown() # pump this event-loop (well `Runner` but ya) # -- 2.34.1 From 87619e1b3f5a88f08430fadab6f0b2405fd2b0c0 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Wed, 18 Sep 2024 22:47:59 -0400 Subject: [PATCH 303/305] Add a super naive multi-host-capable web-req proxier for @jc211 --- examples/multihost/client.py | 50 ++++++++++++++++++++++++ examples/multihost/server.py | 75 ++++++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 examples/multihost/client.py create mode 100644 examples/multihost/server.py diff --git a/examples/multihost/client.py b/examples/multihost/client.py new file mode 100644 index 00000000..888b1004 --- /dev/null +++ b/examples/multihost/client.py @@ -0,0 +1,50 @@ +import tractor +import trio + + +log = tractor.log.get_console_log( + _root_name='my_app', + name='client', +) + + +async def client_main(): + + # enable console logging for our custom app's logger + tractor.log.get_console_log( + level='info', + _root_name='my_app', + name='client', + ) + + # presuming you can get a ref to the target server RPC-ctx func + from server import proxy_request + + async with ( + tractor.open_root_actor( + name='web_requester', + registry_addrs=[('127.0.0.1', 1616)], + enable_modules=[], # since this isn't a service actor + ), + + # use discovery api to find the server actor on your net + # (NOTE, in which case the below registry addr would have to + # be the public IP of that host!) + tractor.find_actor( + name='web_proxier', + registry_addrs=[('127.0.0.1', 1616)], + ) as portal, + + # open an RPC context with the remote actor, thus spawning + # a new task implemented as the function defined in the + # server code. + portal.open_context( + proxy_request, + address='https://github.com', + ) as (ctx, first), + ): + resp: dict = await ctx.result() + print(resp) + + +trio.run(client_main) diff --git a/examples/multihost/server.py b/examples/multihost/server.py new file mode 100644 index 00000000..8f43b58f --- /dev/null +++ b/examples/multihost/server.py @@ -0,0 +1,75 @@ +import httpx +import tractor +import trio + +log = tractor.log.get_console_log( + _root_name='my_app', + name='server_thingy', +) + + +@tractor.context +async def proxy_request( + ctx: tractor.Context, + address: str, +): + log.info( + 'Rxed client request\n' + f'{address}\n' + ) + async with httpx.AsyncClient() as client: + await ctx.started() # signal the remote task has started its client + log.info( + 'Opened `httpx` client..' + ) + + resp: httpx.Response = await client.get(address) # do the proxied request, get response. + log.info( + 'Got response..\n' + f'{resp}\n' + ) + + # only breaking this up to clarify that you didn't have to only return a single result you could have opened + # a long lived stream to avoid task spawning overhead in this service actor.. but more on that later.. + # + # NOTEs, cast to `str` here since we can't serialize the + # response type for the wire directly, at least no without + # a custom `msgspec.Decoder`!! + return str(resp) + # + # ^TODO, various typed msging options: + # -[ ] try returning just the `resp` verbatim => should raise + # an MTE + # -[ ] try defining a custom `Response` msg to proxy the orig + # types fields and/or a decoder to serialize it? + + +async def main(): + + # enable console logging for our custom app's logger + tractor.log.get_console_log( + level='info', + _root_name='my_app', + name='server_thingy', + ) + + # since run as a script this will likely be `__main__` + # so instead we want to just use our module name.. + this_mod: str = 'server' + async with tractor.open_root_actor( + name='web_proxier', + registry_addrs=[('127.0.0.1', 1616)], + enable_modules=[this_mod], + loglevel='info', + ): + # just block waiting for a peer actor to connect and open an RPC context using the above + # proxy endpoint. + log.info( + 'proxy server up bby!\n' + 'waiting to serve some requests..\n' + ) + await trio.sleep_forever() + + +if __name__ == '__main__': + trio.run(main) -- 2.34.1 From 90f48512d15074091c6148672882093d32f6273c Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 19 Sep 2024 02:51:33 -0400 Subject: [PATCH 304/305] Add in depth comment about module naming when used without pkg --- examples/multihost/client.py | 23 ++++++++++++++++++----- examples/multihost/server.py | 26 +++++++++++++++++++++----- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/examples/multihost/client.py b/examples/multihost/client.py index 888b1004..d4893bf5 100644 --- a/examples/multihost/client.py +++ b/examples/multihost/client.py @@ -6,33 +6,45 @@ log = tractor.log.get_console_log( _root_name='my_app', name='client', ) +_loglevel: str = 'cancel' async def client_main(): # enable console logging for our custom app's logger tractor.log.get_console_log( - level='info', + level=_loglevel, _root_name='my_app', name='client', ) - # presuming you can get a ref to the target server RPC-ctx func + # presuming you can get a ref to the target server RPC-ctx func, + # pass it directly as our rpc-ctx endpoint below. from server import proxy_request + # + # NOTE, see he equiv note in `server.py` explaining why this will + # render more or less to `'server:proxy_request'` according to + # `tractor.msg.NamespacePath.from_ref(proxy_request)` async with ( tractor.open_root_actor( name='web_requester', registry_addrs=[('127.0.0.1', 1616)], enable_modules=[], # since this isn't a service actor + loglevel=_loglevel, ), # use discovery api to find the server actor on your net # (NOTE, in which case the below registry addr would have to # be the public IP of that host!) - tractor.find_actor( + # tractor.find_actor( + # name='web_proxier', + # registry_addrs=[('127.0.0.1', 1616)], + # ) as portal, + + tractor.wait_for_actor( name='web_proxier', - registry_addrs=[('127.0.0.1', 1616)], + registry_addr=('127.0.0.1', 1616), ) as portal, # open an RPC context with the remote actor, thus spawning @@ -47,4 +59,5 @@ async def client_main(): print(resp) -trio.run(client_main) +if __name__ == '__main__': + trio.run(client_main) diff --git a/examples/multihost/server.py b/examples/multihost/server.py index 8f43b58f..3b4ac67d 100644 --- a/examples/multihost/server.py +++ b/examples/multihost/server.py @@ -36,7 +36,8 @@ async def proxy_request( # response type for the wire directly, at least no without # a custom `msgspec.Decoder`!! return str(resp) - # + + # return resp # ^TODO, various typed msging options: # -[ ] try returning just the `resp` verbatim => should raise # an MTE @@ -53,17 +54,32 @@ async def main(): name='server_thingy', ) - # since run as a script this will likely be `__main__` - # so instead we want to just use our module name.. + # since (originally) this is run as a script, we will end up with + # `__name__ == '__main__'` so to ensure the rpc request from the + # client isn't blocked by `tractor.ModuleNotFound`, we want to just + # use the explicit file-as-module name.. why u ask? this_mod: str = 'server' + # WELP, when the `Portal.open_context()` api (used in + # `client.py`) requests the RPC-ctx ep it will send + # a `str`-like-ptr encoding the func-ref in form expected by + # `pkgutil.resolve_name()`. + # + # Since the client's local namespace reference/path to this + # `.server.py` mod will be from a direct manual import, that + # `proxy_request()`-ref will render as `'server:proxy_request'` + # (as delivered from `NamespacePath.from_ref()` since that's how + # `.open_context()` serializes the func's-ref for IPC transit). + # SO, we need to be sure we "enable" this module name so that the + # nsp maps to an enabled module in the `Actor._mods: dict`. + async with tractor.open_root_actor( name='web_proxier', registry_addrs=[('127.0.0.1', 1616)], enable_modules=[this_mod], loglevel='info', ): - # just block waiting for a peer actor to connect and open an RPC context using the above - # proxy endpoint. + # just block waiting for a peer actor to connect and open an + # RPC context using the above proxy endpoint. log.info( 'proxy server up bby!\n' 'waiting to serve some requests..\n' -- 2.34.1 From afb2501ceb566a16a42603e71326aeb02e43c773 Mon Sep 17 00:00:00 2001 From: Jad Abou-Chakra <29726242+jc211@users.noreply.github.com> Date: Thu, 19 Sep 2024 18:17:20 +1000 Subject: [PATCH 305/305] Decouple registery addresses from binding addresses --- tractor/_root.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/tractor/_root.py b/tractor/_root.py index 2a9beaa3..f10103fe 100644 --- a/tractor/_root.py +++ b/tractor/_root.py @@ -70,7 +70,10 @@ async def open_root_actor( # defaults are above arbiter_addr: tuple[str, int]|None = None, - + + # binding addrs for the transport layer server + trans_bind_addrs: list[tuple[str, int]] = [(_default_host, _default_port)], + name: str|None = 'root', # either the `multiprocessing` start method: @@ -201,6 +204,8 @@ async def open_root_actor( _default_lo_addrs ) assert registry_addrs + + assert trans_bind_addrs loglevel = ( loglevel @@ -287,8 +292,6 @@ async def open_root_actor( tuple(addr), # TODO: just drop this requirement? ) - trans_bind_addrs: list[tuple[str, int]] = [] - # Create a new local root-actor instance which IS NOT THE # REGISTRAR if ponged_addrs: @@ -309,11 +312,6 @@ async def open_root_actor( loglevel=loglevel, enable_modules=enable_modules, ) - # DO NOT use the registry_addrs as the transport server - # addrs for this new non-registar, root-actor. - for host, port in ponged_addrs: - # NOTE: zero triggers dynamic OS port allocation - trans_bind_addrs.append((host, 0)) # Start this local actor as the "registrar", aka a regular # actor who manages the local registry of "mailboxes" of -- 2.34.1