Compare commits
274 Commits
main
...
subint_for
| Author | SHA1 | Date |
|---|---|---|
|
|
797f57ce7b | |
|
|
cf0e3e6f8b | |
|
|
99d70337b7 | |
|
|
a617b52140 | |
|
|
e3318a5483 | |
|
|
e2b1035ff0 | |
|
|
17f1ea5910 | |
|
|
9bd113154b | |
|
|
5ea5fb211d | |
|
|
489dc6d0cc | |
|
|
35796ec8ae | |
|
|
baf7ec54ac | |
|
|
79390a4e3a | |
|
|
dbe2e8bd82 | |
|
|
fe753f9656 | |
|
|
b48503085f | |
|
|
3869a9b468 | |
|
|
c2d8e967aa | |
|
|
1037c05eaf | |
|
|
959fc48806 | |
|
|
2e9dbc5b12 | |
|
|
7cee62ce42 | |
|
|
3773ad8b77 | |
|
|
73e83e1474 | |
|
|
355beac082 | |
|
|
263249029b | |
|
|
292179d8f3 | |
|
|
9a360fbbcd | |
|
|
ae5b63c0bc | |
|
|
f75865fb2e | |
|
|
e0b8f23cbc | |
|
|
8d662999a4 | |
|
|
d7ca68cf61 | |
|
|
b5b0504918 | |
|
|
de78a6445b | |
|
|
5c98ab1fb6 | |
|
|
3867403fab | |
|
|
7c8e5a6732 | |
|
|
3152f423d8 | |
|
|
ed65301d32 | |
|
|
8817032c90 | |
|
|
70dc60a199 | |
|
|
cd287c7e93 | |
|
|
7b04b2cdfc | |
|
|
75b07c4b7c | |
|
|
86d4e0d3ed | |
|
|
ccb013a615 | |
|
|
c3d6cc9007 | |
|
|
cb7b76c44f | |
|
|
23677f8a3c | |
|
|
06ff2dd5f2 | |
|
|
a891e003b2 | |
|
|
e90241baaa | |
|
|
7079a597c5 | |
|
|
bc60aa1ec5 | |
|
|
f881683c97 | |
|
|
490fac432c | |
|
|
5f6e45e1d4 | |
|
|
5c4438bacc | |
|
|
90ba0e3658 | |
|
|
1f1e09a786 | |
|
|
7cf3b5d00d | |
|
|
c72d495d68 | |
|
|
926e861f52 | |
|
|
d9cb38372f | |
|
|
b4c975b48b | |
|
|
8344537aa6 | |
|
|
3a9f4ea383 | |
|
|
ca1b01f926 | |
|
|
570c975f14 | |
|
|
a0a7668670 | |
|
|
27bf566d75 | |
|
|
e8f1eca8d2 | |
|
|
656c6c30d1 | |
|
|
acf6568275 | |
|
|
c6c591e61a | |
|
|
b883b27646 | |
|
|
00637764d9 | |
|
|
ea971d25aa | |
|
|
83b6c4270a | |
|
|
6309c2e6fc | |
|
|
f5301d3fb0 | |
|
|
9f8e9eb739 | |
|
|
6b04650187 | |
|
|
a0607f363e | |
|
|
2ee86ddb1a | |
|
|
0286d36ed7 | |
|
|
2db6f97130 | |
|
|
9af6adc181 | |
|
|
452a32fb23 | |
|
|
f47010d7e9 | |
|
|
3f198bc86c | |
|
|
391c8d3566 | |
|
|
4fc477cfd6 | |
|
|
cd1cd03725 | |
|
|
febe587c6c | |
|
|
4d3c5b9163 | |
|
|
8b106b9144 | |
|
|
d6ece8eab3 | |
|
|
8494eb9b8a | |
|
|
cab366cd65 | |
|
|
85f9c5df6f | |
|
|
ebe9d5e4b5 | |
|
|
bbf01d5161 | |
|
|
ec8e8a2786 | |
|
|
c3d1ec22eb | |
|
|
8f44efa327 | |
|
|
5968a3c773 | |
|
|
80597b80bf | |
|
|
a41c6d5c70 | |
|
|
9c37b3f956 | |
|
|
8f6bc56174 | |
|
|
b14dbde77b | |
|
|
cd6509b724 | |
|
|
93d99ed2eb | |
|
|
6215e3b2dd | |
|
|
be5d8da8c0 | |
|
|
21ed181835 | |
|
|
9ec2749ab7 | |
|
|
f3441a6790 | |
|
|
cc42d38284 | |
|
|
6827ceba12 | |
|
|
94458807ce | |
|
|
be5e7e446b | |
|
|
571b2b320e | |
|
|
c7b5d00f19 | |
|
|
1049f7bf38 | |
|
|
cc3bfac741 | |
|
|
e71eec07de | |
|
|
b557ec20a7 | |
|
|
85457cb839 | |
|
|
850219f60c | |
|
|
d929fb75b5 | |
|
|
403c2174a1 | |
|
|
528012f35f | |
|
|
0dfa6f4a8a | |
|
|
a0d3741fac | |
|
|
149b800c9f | |
|
|
03f458a45c | |
|
|
e77198bb64 | |
|
|
5b8f6cf4c7 | |
|
|
8868ff19f3 | |
|
|
066011b83d | |
|
|
b1d003d850 | |
|
|
8991ec2bf5 | |
|
|
dfc153c228 | |
|
|
52e8fb43ee | |
|
|
99577b719a | |
|
|
4092db60b2 | |
|
|
4f333dee05 | |
|
|
8a2f74da2c | |
|
|
2bf155131d | |
|
|
0f29f5717a | |
|
|
5ea721683b | |
|
|
f84ef44992 | |
|
|
1e0c57c6c5 | |
|
|
65660c77c7 | |
|
|
c9b415475f | |
|
|
359bcf691f | |
|
|
b3ce5ab4f6 | |
|
|
e89fe03da7 | |
|
|
417b796169 | |
|
|
36cbc07602 | |
|
|
1f2fad22ee | |
|
|
ca5f6f50a8 | |
|
|
a7ff1387c7 | |
|
|
abbb4a79c8 | |
|
|
1529095c32 | |
|
|
8215a7ba34 | |
|
|
c1c4d85958 | |
|
|
88b084802f | |
|
|
bf1dcea9d1 | |
|
|
5c270b89d5 | |
|
|
6ee0149e8d | |
|
|
9c4cd869fb | |
|
|
afd66ce3b7 | |
|
|
f9bdb1b35d | |
|
|
d135ce94af | |
|
|
fb94aa0095 | |
|
|
b71e8575e5 | |
|
|
bbc028e84c | |
|
|
016306adf5 | |
|
|
712c009790 | |
|
|
79396b4a26 | |
|
|
5b2905b702 | |
|
|
776af3fce6 | |
|
|
4639685770 | |
|
|
98a7d69341 | |
|
|
ab6c955949 | |
|
|
a72bb9321e | |
|
|
0e2949ea59 | |
|
|
fb73935dbc | |
|
|
94dfeb1441 | |
|
|
9c1bcb23af | |
|
|
a1ea373f34 | |
|
|
e8f3d64e71 | |
|
|
b30faaca82 | |
|
|
51701fc8dc | |
|
|
7b89204afd | |
|
|
82d02ef404 | |
|
|
b7546fd221 | |
|
|
86c95539ca | |
|
|
706a4b761b | |
|
|
c5af2fa778 | |
|
|
86489cc453 | |
|
|
2631fb4ff3 | |
|
|
aee86f2544 | |
|
|
83c8a8ad78 | |
|
|
daae196048 | |
|
|
70efcb09a0 | |
|
|
a7e74acdff | |
|
|
9c3d3bcec1 | |
|
|
521fb97fe9 | |
|
|
d8a3969048 | |
|
|
01c0db651a | |
|
|
7bcd7aca2b | |
|
|
920d0043b4 | |
|
|
93b9a6cd97 | |
|
|
e7cefba67f | |
|
|
683476cc96 | |
|
|
ad24df0ed7 | |
|
|
a1622c0b94 | |
|
|
a385d20810 | |
|
|
7f9044c1ef | |
|
|
d0618e3cb4 | |
|
|
a5bebf76d5 | |
|
|
814b2e7e62 | |
|
|
1704f73504 | |
|
|
c735fc8544 | |
|
|
c5ea6040bf | |
|
|
d4f2fa547a | |
|
|
20896bfbab | |
|
|
70bb77280e | |
|
|
916f88a070 | |
|
|
91f2f3ec10 | |
|
|
3e5124e184 | |
|
|
fa86269e30 | |
|
|
d0b92bbeba | |
|
|
9470815f5a | |
|
|
592d918394 | |
|
|
0cddc67bdb | |
|
|
052fe2435f | |
|
|
28819bf5d3 | |
|
|
07c2ba5c0d | |
|
|
50f40f427b | |
|
|
bf6de55865 | |
|
|
5ded99a886 | |
|
|
7145fa364f | |
|
|
f8e25688c7 | |
|
|
c3f455a8ec | |
|
|
f78e842fba | |
|
|
3638b80c9d | |
|
|
2ed9e65530 | |
|
|
6cab363c51 | |
|
|
8aee24e83f | |
|
|
cdcc1b42fc | |
|
|
51ac0c623e | |
|
|
3f0bde1bf8 | |
|
|
fa1a15dce8 | |
|
|
5850844297 | |
|
|
ff02939213 | |
|
|
d61e8caab2 | |
|
|
0b0c83e9da | |
|
|
5e7c0f264d | |
|
|
edf1189fe0 | |
|
|
de24bfe052 | |
|
|
e235b96894 | |
|
|
dea4b9fd93 | |
|
|
557e2cec6a | |
|
|
0e3229f16d | |
|
|
448d25aef4 | |
|
|
343c9e0034 | |
|
|
1dc27c5161 | |
|
|
14aefa4b11 |
|
|
@ -0,0 +1,38 @@
|
|||
# Docs TODOs
|
||||
|
||||
## Auto-sync README code examples with source
|
||||
|
||||
The `docs/README.rst` has inline code blocks that
|
||||
duplicate actual example files (e.g.
|
||||
`examples/infected_asyncio_echo_server.py`). Every time
|
||||
the public API changes we have to manually sync both.
|
||||
|
||||
Sphinx's `literalinclude` directive can pull code directly
|
||||
from source files:
|
||||
|
||||
```rst
|
||||
.. literalinclude:: ../examples/infected_asyncio_echo_server.py
|
||||
:language: python
|
||||
:caption: examples/infected_asyncio_echo_server.py
|
||||
```
|
||||
|
||||
Or to include only a specific function/section:
|
||||
|
||||
```rst
|
||||
.. literalinclude:: ../examples/infected_asyncio_echo_server.py
|
||||
:language: python
|
||||
:pyobject: aio_echo_server
|
||||
```
|
||||
|
||||
This way the docs always reflect the actual code without
|
||||
manual syncing.
|
||||
|
||||
### Considerations
|
||||
- `README.rst` is also rendered on GitHub/PyPI which do
|
||||
NOT support `literalinclude` - so we'd need a build
|
||||
step or a separate `_sphinx_readme.rst` (which already
|
||||
exists at `docs/github_readme/_sphinx_readme.rst`).
|
||||
- Could use a pre-commit hook or CI step to extract code
|
||||
from examples into the README for GitHub rendering.
|
||||
- Another option: `sphinx-autodoc` style approach where
|
||||
docstrings from the actual module are pulled in.
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(date *)",
|
||||
"Bash(cp .claude/*)",
|
||||
"Bash(git diff *)",
|
||||
"Bash(git log *)",
|
||||
"Bash(git status)",
|
||||
"Bash(git remote:*)",
|
||||
"Bash(git stash:*)",
|
||||
"Bash(git mv:*)",
|
||||
"Bash(git rev-parse:*)",
|
||||
"Bash(test:*)",
|
||||
"Bash(ls:*)",
|
||||
"Bash(grep:*)",
|
||||
"Bash(find:*)",
|
||||
"Bash(ln:*)",
|
||||
"Bash(cat:*)",
|
||||
"Bash(mkdir:*)",
|
||||
"Bash(gh pr:*)",
|
||||
"Bash(gh api:*)",
|
||||
"Bash(gh issue:*)",
|
||||
"Bash(UV_PROJECT_ENVIRONMENT=py* uv sync:*)",
|
||||
"Bash(UV_PROJECT_ENVIRONMENT=py* uv run:*)",
|
||||
"Bash(echo EXIT:$?:*)",
|
||||
"Write(.claude/*commit_msg*)",
|
||||
"Write(.claude/git_commit_msg_LATEST.md)",
|
||||
"Skill(run-tests)",
|
||||
"Skill(close-wkt)",
|
||||
"Skill(open-wkt)",
|
||||
"Skill(prompt-io)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": []
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
# Commit Message Style Guide for `tractor`
|
||||
|
||||
Analysis based on 500 recent commits from the `tractor` repository.
|
||||
|
||||
## Core Principles
|
||||
|
||||
Write commit messages that are technically precise yet casual in
|
||||
tone. Use abbreviations and informal language while maintaining
|
||||
clarity about what changed and why.
|
||||
|
||||
## Subject Line Format
|
||||
|
||||
### Length and Structure
|
||||
- Target: ~50 chars with a hard-max of 67.
|
||||
- Use backticks around code elements (72.2% of commits)
|
||||
- Rarely use colons (5.2%), except for file prefixes
|
||||
- End with '?' for uncertain changes (rare: 0.8%)
|
||||
- End with '!' for important changes (rare: 2.0%)
|
||||
|
||||
### Opening Verbs (Present Tense)
|
||||
|
||||
Most common verbs from analysis:
|
||||
- `Add` (14.4%) - wholly new features/functionality
|
||||
- `Use` (4.4%) - adopt new approach/tool
|
||||
- `Drop` (3.6%) - remove code/feature
|
||||
- `Fix` (2.4%) - bug fixes
|
||||
- `Move`/`Mv` (3.6%) - relocate code
|
||||
- `Adjust` (2.0%) - minor tweaks
|
||||
- `Update` (1.6%) - enhance existing feature
|
||||
- `Bump` (1.2%) - dependency updates
|
||||
- `Rename` (1.2%) - identifier changes
|
||||
- `Set` (1.2%) - configuration changes
|
||||
- `Handle` (1.0%) - add handling logic
|
||||
- `Raise` (1.0%) - add error raising
|
||||
- `Pass` (0.8%) - pass parameters/values
|
||||
- `Support` (0.8%) - add support for something
|
||||
- `Hide` (1.4%) - make private/internal
|
||||
- `Always` (1.4%) - enforce consistent behavior
|
||||
- `Mk` (1.4%) - make/create (abbreviated)
|
||||
- `Start` (1.0%) - begin implementation
|
||||
|
||||
Other frequent verbs: `More`, `Change`, `Extend`, `Disable`, `Log`,
|
||||
`Enable`, `Ensure`, `Expose`, `Allow`
|
||||
|
||||
### Backtick Usage
|
||||
|
||||
Always use backticks for:
|
||||
- Module names: `trio`, `asyncio`, `msgspec`, `greenback`, `stackscope`
|
||||
- Class names: `Context`, `Actor`, `Address`, `PldRx`, `SpawnSpec`
|
||||
- Method names: `.pause_from_sync()`, `._pause()`, `.cancel()`
|
||||
- Function names: `breakpoint()`, `collapse_eg()`, `open_root_actor()`
|
||||
- Decorators: `@acm`, `@context`
|
||||
- Exceptions: `Cancelled`, `TransportClosed`, `MsgTypeError`
|
||||
- Keywords: `finally`, `None`, `False`
|
||||
- Variable names: `tn`, `debug_mode`
|
||||
- Complex expressions: `trio.Cancelled`, `asyncio.Task`
|
||||
|
||||
Most backticked terms in tractor:
|
||||
`trio`, `asyncio`, `Context`, `.pause_from_sync()`, `tn`,
|
||||
`._pause()`, `breakpoint()`, `collapse_eg()`, `Actor`, `@acm`,
|
||||
`.cancel()`, `Cancelled`, `open_root_actor()`, `greenback`
|
||||
|
||||
### Examples
|
||||
|
||||
Good subject lines:
|
||||
```
|
||||
Add `uds` to `._multiaddr`, tweak typing
|
||||
Drop `DebugStatus.shield` attr, add `.req_finished`
|
||||
Use `stackscope` for all actor-tree rendered "views"
|
||||
Fix `.to_asyncio` inter-task-cancellation!
|
||||
Bump `ruff.toml` to target py313
|
||||
Mv `load_module_from_path()` to new `._code_load` submod
|
||||
Always use `tuple`-cast for singleton parent addrs
|
||||
```
|
||||
|
||||
## Body Format
|
||||
|
||||
### General Structure
|
||||
- 43.2% of commits have no body (simple changes)
|
||||
- Use blank line after subject
|
||||
- Max line length: 67 chars
|
||||
- Use `-` bullets for lists (28.0% of commits)
|
||||
- Rarely use `*` bullets (2.4%)
|
||||
|
||||
### Section Markers
|
||||
|
||||
Use these markers to organize longer commit bodies:
|
||||
- `Also,` (most common: 26 occurrences)
|
||||
- `Other,` (13 occurrences)
|
||||
- `Deats,` (11 occurrences) - for implementation details
|
||||
- `Further,` (7 occurrences)
|
||||
- `TODO,` (3 occurrences)
|
||||
- `Impl details,` (2 occurrences)
|
||||
- `Notes,` (1 occurrence)
|
||||
|
||||
### Common Abbreviations
|
||||
|
||||
Use these freely (sorted by frequency):
|
||||
- `msg` (63) - message
|
||||
- `bg` (37) - background
|
||||
- `ctx` (30) - context
|
||||
- `impl` (27) - implementation
|
||||
- `mod` (26) - module
|
||||
- `obvi` (17) - obviously
|
||||
- `tn` (16) - task name
|
||||
- `fn` (15) - function
|
||||
- `vs` (15) - versus
|
||||
- `bc` (14) - because
|
||||
- `var` (14) - variable
|
||||
- `prolly` (9) - probably
|
||||
- `ep` (6) - entry point
|
||||
- `OW` (5) - otherwise
|
||||
- `rn` (4) - right now
|
||||
- `sig` (4) - signal/signature
|
||||
- `deps` (3) - dependencies
|
||||
- `iface` (2) - interface
|
||||
- `subproc` (2) - subprocess
|
||||
- `tho` (2) - though
|
||||
- `ofc` (2) - of course
|
||||
|
||||
### Tone and Style
|
||||
|
||||
- Casual but technical (use `XD` for humor: 23 times)
|
||||
- Use `..` for trailing thoughts (108 occurrences)
|
||||
- Use `Woops,` to acknowledge mistakes (4 subject lines)
|
||||
- Don't be afraid to show personality while being precise
|
||||
|
||||
### Example Bodies
|
||||
|
||||
Simple with bullets:
|
||||
```
|
||||
Add `multiaddr` and bump up some deps
|
||||
|
||||
Since we're planning to use it for (discovery)
|
||||
addressing, allowing replacement of the hacky (pretend)
|
||||
attempt in `tractor._multiaddr` Bp
|
||||
|
||||
Also pin some deps,
|
||||
- make us py312+
|
||||
- use `pdbp` with my frame indexing fix.
|
||||
- mv to latest `xonsh` for fancy cmd/suggestion injections.
|
||||
|
||||
Bump lock file to match obvi!
|
||||
```
|
||||
|
||||
With section markers:
|
||||
```
|
||||
Use `stackscope` for all actor-tree rendered "views"
|
||||
|
||||
Instead of the (much more) limited and hacky `.devx._code`
|
||||
impls, move to using the new `.devx._stackscope` API which
|
||||
wraps the `stackscope` project.
|
||||
|
||||
Deats,
|
||||
- make new `stackscope.extract_stack()` wrapper
|
||||
- port over frame-descing to `_stackscope.pformat_stack()`
|
||||
- move `PdbREPL` to use `stackscope` render approach
|
||||
- update tests for new stack output format
|
||||
|
||||
Also,
|
||||
- tweak log formatting for consistency
|
||||
- add typing hints throughout
|
||||
```
|
||||
|
||||
## Special Patterns
|
||||
|
||||
### WIP Commits
|
||||
Rare (0.2%) - avoid committing WIP if possible
|
||||
|
||||
### Merge Commits
|
||||
Auto-generated (4.4%), don't worry about style
|
||||
|
||||
### File References
|
||||
- Use `module.py` or `.submodule` style
|
||||
- Rarely use `file.py:line` references (0 in analysis)
|
||||
|
||||
### Links
|
||||
- GitHub links used sparingly (3 total)
|
||||
- Prefer code references over external links
|
||||
|
||||
## Footer
|
||||
|
||||
The default footer should credit `claude` (you) for helping generate
|
||||
the commit msg content:
|
||||
|
||||
```
|
||||
(this commit msg was generated in some part by [`claude-code`][claude-code-gh])
|
||||
[claude-code-gh]: https://github.com/anthropics/claude-code
|
||||
```
|
||||
|
||||
Further, if the patch was solely or in part written
|
||||
by `claude`, instead add:
|
||||
|
||||
```
|
||||
(this patch was generated in some part by [`claude-code`][claude-code-gh])
|
||||
[claude-code-gh]: https://github.com/anthropics/claude-code
|
||||
```
|
||||
|
||||
## Summary Checklist
|
||||
|
||||
Before committing, verify:
|
||||
- [ ] Subject line uses present tense verb
|
||||
- [ ] Subject line ~50 chars (hard max 67)
|
||||
- [ ] Code elements wrapped in backticks
|
||||
- [ ] Body lines ≤67 chars
|
||||
- [ ] Abbreviations used where natural
|
||||
- [ ] Casual yet precise tone
|
||||
- [ ] Section markers if body >3 paragraphs
|
||||
- [ ] Technical accuracy maintained
|
||||
|
||||
## Analysis Metadata
|
||||
|
||||
```
|
||||
Source: tractor repository
|
||||
Commits analyzed: 500
|
||||
Date range: 2019-2025
|
||||
Analysis date: 2026-02-08
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
(this style guide was generated by [`claude-code`][claude-code-gh]
|
||||
analyzing commit history)
|
||||
|
||||
[claude-code-gh]: https://github.com/anthropics/claude-code
|
||||
|
|
@ -0,0 +1,231 @@
|
|||
---
|
||||
name: conc-anal
|
||||
description: >
|
||||
Concurrency analysis for tractor's trio-based
|
||||
async primitives. Trace task scheduling across
|
||||
checkpoint boundaries, identify race windows in
|
||||
shared mutable state, and verify synchronization
|
||||
correctness. Invoke on code segments the user
|
||||
points at, OR proactively when reviewing/writing
|
||||
concurrent cache, lock, or multi-task acm code.
|
||||
argument-hint: "[file:line-range or function name]"
|
||||
allowed-tools:
|
||||
- Read
|
||||
- Grep
|
||||
- Glob
|
||||
- Task
|
||||
---
|
||||
|
||||
Perform a structured concurrency analysis on the
|
||||
target code. This skill should be invoked:
|
||||
|
||||
- **On demand**: user points at a code segment
|
||||
(file:lines, function name, or pastes a snippet)
|
||||
- **Proactively**: when writing or reviewing code
|
||||
that touches shared mutable state across trio
|
||||
tasks — especially `_Cache`, locks, events, or
|
||||
multi-task `@acm` lifecycle management
|
||||
|
||||
## 0. Identify the target
|
||||
|
||||
If the user provides a file:line-range or function
|
||||
name, read that code. If not explicitly provided,
|
||||
identify the relevant concurrent code from context
|
||||
(e.g. the current diff, a failing test, or the
|
||||
function under discussion).
|
||||
|
||||
## 1. Inventory shared mutable state
|
||||
|
||||
List every piece of state that is accessed by
|
||||
multiple tasks. For each, note:
|
||||
|
||||
- **What**: the variable/dict/attr (e.g.
|
||||
`_Cache.values`, `_Cache.resources`,
|
||||
`_Cache.users`)
|
||||
- **Scope**: class-level, module-level, or
|
||||
closure-captured
|
||||
- **Writers**: which tasks/code-paths mutate it
|
||||
- **Readers**: which tasks/code-paths read it
|
||||
- **Guarded by**: which lock/event/ordering
|
||||
protects it (or "UNGUARDED" if none)
|
||||
|
||||
Format as a table:
|
||||
|
||||
```
|
||||
| State | Writers | Readers | Guard |
|
||||
|---------------------|-----------------|-----------------|----------------|
|
||||
| _Cache.values | run_ctx, moc¹ | moc | ctx_key lock |
|
||||
| _Cache.resources | run_ctx, moc | moc, run_ctx | UNGUARDED |
|
||||
```
|
||||
|
||||
¹ `moc` = `maybe_open_context`
|
||||
|
||||
## 2. Map checkpoint boundaries
|
||||
|
||||
For each code path through the target, mark every
|
||||
**checkpoint** — any `await` expression where trio
|
||||
can switch to another task. Use line numbers:
|
||||
|
||||
```
|
||||
L325: await lock.acquire() ← CHECKPOINT
|
||||
L395: await service_tn.start(...) ← CHECKPOINT
|
||||
L411: lock.release() ← (not a checkpoint, but changes lock state)
|
||||
L414: yield (False, yielded) ← SUSPEND (caller runs)
|
||||
L485: no_more_users.set() ← (wakes run_ctx, no switch yet)
|
||||
```
|
||||
|
||||
**Key trio scheduling rules to apply:**
|
||||
- `Event.set()` makes waiters *ready* but does NOT
|
||||
switch immediately
|
||||
- `lock.release()` is not a checkpoint
|
||||
- `await sleep(0)` IS a checkpoint
|
||||
- Code in `finally` blocks CAN have checkpoints
|
||||
(unlike asyncio)
|
||||
- `await` inside `except` blocks can be
|
||||
`trio.Cancelled`-masked
|
||||
|
||||
## 3. Trace concurrent task schedules
|
||||
|
||||
Write out the **interleaved execution trace** for
|
||||
the problematic scenario. Number each step and tag
|
||||
which task executes it:
|
||||
|
||||
```
|
||||
[Task A] 1. acquires lock
|
||||
[Task A] 2. cache miss → allocates resources
|
||||
[Task A] 3. releases lock
|
||||
[Task A] 4. yields to caller
|
||||
[Task A] 5. caller exits → finally runs
|
||||
[Task A] 6. users-- → 0, sets no_more_users
|
||||
[Task A] 7. pops lock from _Cache.locks
|
||||
[run_ctx] 8. wakes from no_more_users.wait()
|
||||
[run_ctx] 9. values.pop(ctx_key)
|
||||
[run_ctx] 10. acm __aexit__ → CHECKPOINT
|
||||
[Task B] 11. creates NEW lock (old one popped)
|
||||
[Task B] 12. acquires immediately
|
||||
[Task B] 13. values[ctx_key] → KeyError
|
||||
[Task B] 14. resources[ctx_key] → STILL EXISTS
|
||||
[Task B] 15. 💥 RuntimeError
|
||||
```
|
||||
|
||||
Identify the **race window**: the range of steps
|
||||
where state is inconsistent. In the example above,
|
||||
steps 9–10 are the window (values gone, resources
|
||||
still alive).
|
||||
|
||||
## 4. Classify the bug
|
||||
|
||||
Categorize what kind of concurrency issue this is:
|
||||
|
||||
- **TOCTOU** (time-of-check-to-time-of-use): state
|
||||
changes between a check and the action based on it
|
||||
- **Stale reference**: a task holds a reference to
|
||||
state that another task has invalidated
|
||||
- **Lifetime mismatch**: a synchronization primitive
|
||||
(lock, event) has a shorter lifetime than the
|
||||
state it's supposed to protect
|
||||
- **Missing guard**: shared state is accessed
|
||||
without any synchronization
|
||||
- **Atomicity gap**: two operations that should be
|
||||
atomic have a checkpoint between them
|
||||
|
||||
## 5. Propose fixes
|
||||
|
||||
For each proposed fix, provide:
|
||||
|
||||
- **Sketch**: pseudocode or diff showing the change
|
||||
- **How it closes the window**: which step(s) from
|
||||
the trace it eliminates or reorders
|
||||
- **Tradeoffs**: complexity, perf, new edge cases,
|
||||
impact on other code paths
|
||||
- **Risk**: what could go wrong (deadlocks, new
|
||||
races, cancellation issues)
|
||||
|
||||
Rate each fix: `[simple|moderate|complex]` impl
|
||||
effort.
|
||||
|
||||
## 6. Output format
|
||||
|
||||
Structure the full analysis as:
|
||||
|
||||
```markdown
|
||||
## Concurrency analysis: `<target>`
|
||||
|
||||
### Shared state
|
||||
<table from step 1>
|
||||
|
||||
### Checkpoints
|
||||
<list from step 2>
|
||||
|
||||
### Race trace
|
||||
<interleaved trace from step 3>
|
||||
|
||||
### Classification
|
||||
<bug type from step 4>
|
||||
|
||||
### Fixes
|
||||
<proposals from step 5>
|
||||
```
|
||||
|
||||
## Tractor-specific patterns to watch
|
||||
|
||||
These are known problem areas in tractor's
|
||||
concurrency model. Flag them when encountered:
|
||||
|
||||
### `_Cache` lock vs `run_ctx` lifetime
|
||||
|
||||
The `_Cache.locks` entry is managed by
|
||||
`maybe_open_context` callers, but `run_ctx` runs
|
||||
in `service_tn` — a different task tree. Lock
|
||||
pop/release in the caller's `finally` does NOT
|
||||
wait for `run_ctx` to finish tearing down. Any
|
||||
state that `run_ctx` cleans up in its `finally`
|
||||
(e.g. `resources.pop()`) is vulnerable to
|
||||
re-entry races after the lock is popped.
|
||||
|
||||
### `values.pop()` → acm `__aexit__` → `resources.pop()` gap
|
||||
|
||||
In `_Cache.run_ctx`, the inner `finally` pops
|
||||
`values`, then the acm's `__aexit__` runs (which
|
||||
has checkpoints), then the outer `finally` pops
|
||||
`resources`. This creates a window where `values`
|
||||
is gone but `resources` still exists — a classic
|
||||
atomicity gap.
|
||||
|
||||
### Global vs per-key counters
|
||||
|
||||
`_Cache.users` as a single `int` (pre-fix) meant
|
||||
that users of different `ctx_key`s inflated each
|
||||
other's counts, preventing teardown when one key's
|
||||
users hit zero. Always verify that per-key state
|
||||
(`users`, `locks`) is actually keyed on `ctx_key`
|
||||
and not on `fid` or some broader key.
|
||||
|
||||
### `Event.set()` wakes but doesn't switch
|
||||
|
||||
`trio.Event.set()` makes waiting tasks *ready* but
|
||||
the current task continues executing until its next
|
||||
checkpoint. Code between `.set()` and the next
|
||||
`await` runs atomically from the scheduler's
|
||||
perspective. Use this to your advantage (or watch
|
||||
for bugs where code assumes the woken task runs
|
||||
immediately).
|
||||
|
||||
### `except` block checkpoint masking
|
||||
|
||||
`await` expressions inside `except` handlers can
|
||||
be masked by `trio.Cancelled`. If a `finally`
|
||||
block runs from an `except` and contains
|
||||
`lock.release()`, the release happens — but any
|
||||
`await` after it in the same `except` may be
|
||||
swallowed. This is why `maybe_open_context`'s
|
||||
cache-miss path does `lock.release()` in a
|
||||
`finally` inside the `except KeyError`.
|
||||
|
||||
### Cancellation in `finally`
|
||||
|
||||
Unlike asyncio, trio allows checkpoints in
|
||||
`finally` blocks. This means `finally` cleanup
|
||||
that does `await` can itself be cancelled (e.g.
|
||||
by nursery shutdown). Watch for cleanup code that
|
||||
assumes it will run to completion.
|
||||
|
|
@ -0,0 +1,241 @@
|
|||
# PR/Patch-Request Description Format Reference
|
||||
|
||||
Canonical structure for `tractor` patch-request
|
||||
descriptions, designed to work across GitHub,
|
||||
Gitea, SourceHut, and GitLab markdown renderers.
|
||||
|
||||
**Line length: wrap at 72 chars** for all prose
|
||||
content (Summary bullets, Motivation paragraphs,
|
||||
Scopes bullets, etc.). Fill lines *to* 72 — don't
|
||||
stop short at 50-65. Only raw URLs in
|
||||
reference-link definitions may exceed this.
|
||||
|
||||
## Template
|
||||
|
||||
```markdown
|
||||
<!-- pr-msg-meta
|
||||
branch: <branch-name>
|
||||
base: <base-branch>
|
||||
submitted:
|
||||
github: ___
|
||||
gitea: ___
|
||||
srht: ___
|
||||
-->
|
||||
|
||||
## <Title: present-tense verb + backticked code>
|
||||
|
||||
### Summary
|
||||
- [<hash>][<hash>] Description of change ending
|
||||
with period.
|
||||
- [<hash>][<hash>] Another change description
|
||||
ending with period.
|
||||
- [<hash>][<hash>] [<hash>][<hash>] Multi-commit
|
||||
change description.
|
||||
|
||||
### Motivation
|
||||
<1-2 paragraphs: problem/limitation first,
|
||||
then solution. Hard-wrap at 72 chars.>
|
||||
|
||||
### Scopes changed
|
||||
- [<hash>][<hash>] `pkg.mod.func()` — what
|
||||
changed.
|
||||
* [<hash>][<hash>] Also adjusts
|
||||
`.related_thing()` in same module.
|
||||
- [<hash>][<hash>] `tests.test_mod` — new/changed
|
||||
test coverage.
|
||||
|
||||
<!--
|
||||
### Cross-references
|
||||
Also submitted as
|
||||
[github-pr][] | [gitea-pr][] | [srht-patch][].
|
||||
|
||||
### Links
|
||||
- [relevant-issue-or-discussion](url)
|
||||
- [design-doc-or-screenshot](url)
|
||||
-->
|
||||
|
||||
(this pr content was generated in some part by
|
||||
[`claude-code`][claude-code-gh])
|
||||
|
||||
[<hash>]: https://<service>/<owner>/<repo>/commit/<hash>
|
||||
[claude-code-gh]: https://github.com/anthropics/claude-code
|
||||
|
||||
<!-- cross-service pr refs (fill after submit):
|
||||
[github-pr]: https://github.com/<owner>/<repo>/pull/___
|
||||
[gitea-pr]: https://<host>/<owner>/<repo>/pulls/___
|
||||
[srht-patch]: https://git.sr.ht/~<owner>/<repo>/patches/___
|
||||
-->
|
||||
```
|
||||
|
||||
## Markdown Reference-Link Strategy
|
||||
|
||||
Use reference-style links for ALL commit hashes
|
||||
and cross-service PR refs to ensure cross-service
|
||||
compatibility:
|
||||
|
||||
**Inline usage** (in bullets):
|
||||
```markdown
|
||||
- [f3726cf9][f3726cf9] Add `reg_err_types()`
|
||||
for custom exc lookup.
|
||||
```
|
||||
|
||||
**Definition** (bottom of document):
|
||||
```markdown
|
||||
[f3726cf9]: https://github.com/goodboy/tractor/commit/f3726cf9
|
||||
```
|
||||
|
||||
### Why reference-style?
|
||||
- Keeps prose readable without long inline URLs.
|
||||
- All URLs in one place — trivially swappable
|
||||
per-service.
|
||||
- Most git services auto-link bare SHAs anyway,
|
||||
but explicit refs guarantee it works in *any*
|
||||
md renderer.
|
||||
- The `[hash][hash]` form is self-documenting —
|
||||
display text matches the ref ID.
|
||||
- Cross-service PR refs use the same mechanism:
|
||||
`[github-pr][]` resolves via a ref-link def
|
||||
at the bottom, trivially fillable post-submit.
|
||||
|
||||
## Cross-Service PR Placeholder Mechanism
|
||||
|
||||
The generated description includes three layers
|
||||
of cross-service support, all using native md
|
||||
reference-links:
|
||||
|
||||
### 1. Metadata comment (top of file)
|
||||
|
||||
```markdown
|
||||
<!-- pr-msg-meta
|
||||
branch: remote_exc_type_registry
|
||||
base: main
|
||||
submitted:
|
||||
github: ___
|
||||
gitea: ___
|
||||
srht: ___
|
||||
-->
|
||||
```
|
||||
|
||||
A YAML-ish HTML comment block. The `___`
|
||||
placeholders get filled with PR/patch numbers
|
||||
after submission. Machine-parseable for tooling
|
||||
(e.g. `gish`) but invisible in rendered md.
|
||||
|
||||
### 2. Cross-references section (in body)
|
||||
|
||||
```markdown
|
||||
<!--
|
||||
### Cross-references
|
||||
Also submitted as
|
||||
[github-pr][] | [gitea-pr][] | [srht-patch][].
|
||||
-->
|
||||
```
|
||||
|
||||
Commented out at generation time. After submitting
|
||||
to multiple services, uncomment and the ref-links
|
||||
resolve via the stubs at the bottom.
|
||||
|
||||
### 3. Ref-link stubs (bottom of file)
|
||||
|
||||
```markdown
|
||||
<!-- cross-service pr refs (fill after submit):
|
||||
[github-pr]: https://github.com/goodboy/tractor/pull/___
|
||||
[gitea-pr]: https://pikers.dev/goodboy/tractor/pulls/___
|
||||
[srht-patch]: https://git.sr.ht/~goodboy/tractor/patches/___
|
||||
-->
|
||||
```
|
||||
|
||||
Commented out with `___` number placeholders.
|
||||
After submission: uncomment, replace `___` with
|
||||
the actual number. Each service-specific copy
|
||||
fills in all services' numbers so any copy can
|
||||
cross-reference the others.
|
||||
|
||||
### Post-submission file layout
|
||||
|
||||
```
|
||||
pr_msg_LATEST.md # latest draft (skill root)
|
||||
msgs/
|
||||
20260325T002027Z_mybranch_pr_msg.md # timestamped
|
||||
github/
|
||||
42_pr_msg.md # github PR #42
|
||||
gitea/
|
||||
17_pr_msg.md # gitea PR #17
|
||||
srht/
|
||||
5_pr_msg.md # srht patch #5
|
||||
```
|
||||
|
||||
Each `<service>/<num>_pr_msg.md` is a copy with:
|
||||
- metadata `submitted:` fields filled in
|
||||
- cross-references section uncommented
|
||||
- ref-link stubs uncommented with real numbers
|
||||
- all services cross-linked in each copy
|
||||
|
||||
This mirrors the `gish` skill's
|
||||
`<backend>/<num>.md` pattern.
|
||||
|
||||
## Commit-Link URL Patterns by Service
|
||||
|
||||
| Service | Pattern |
|
||||
|-----------|-------------------------------------|
|
||||
| GitHub | `https://github.com/<o>/<r>/commit/<h>` |
|
||||
| Gitea | `https://<host>/<o>/<r>/commit/<h>` |
|
||||
| SourceHut | `https://git.sr.ht/~<o>/<r>/commit/<h>` |
|
||||
| GitLab | `https://gitlab.com/<o>/<r>/-/commit/<h>` |
|
||||
|
||||
## PR/Patch URL Patterns by Service
|
||||
|
||||
| Service | Pattern |
|
||||
|-----------|-------------------------------------|
|
||||
| GitHub | `https://github.com/<o>/<r>/pull/<n>` |
|
||||
| Gitea | `https://<host>/<o>/<r>/pulls/<n>` |
|
||||
| SourceHut | `https://git.sr.ht/~<o>/<r>/patches/<n>` |
|
||||
| GitLab | `https://gitlab.com/<o>/<r>/-/merge_requests/<n>` |
|
||||
|
||||
## Scope Naming Convention
|
||||
|
||||
Use Python namespace-resolution syntax for
|
||||
referencing changed code scopes:
|
||||
|
||||
| File path | Scope reference |
|
||||
|---------------------------|-------------------------------|
|
||||
| `tractor/_exceptions.py` | `tractor._exceptions` |
|
||||
| `tractor/_state.py` | `tractor._state` |
|
||||
| `tests/test_foo.py` | `tests.test_foo` |
|
||||
| Function in module | `tractor._exceptions.func()` |
|
||||
| Method on class | `.RemoteActorError.src_type` |
|
||||
| Class | `tractor._exceptions.RAE` |
|
||||
|
||||
Prefix with the package path for top-level refs;
|
||||
use leading-dot shorthand (`.ClassName.method()`)
|
||||
for sub-bullets where the parent module is already
|
||||
established.
|
||||
|
||||
## Title Conventions
|
||||
|
||||
Same verb vocabulary as commit messages:
|
||||
- `Add` — wholly new feature/API
|
||||
- `Fix` — bug fix
|
||||
- `Drop` — removal
|
||||
- `Use` — adopt new approach
|
||||
- `Move`/`Mv` — relocate code
|
||||
- `Adjust` — minor tweak
|
||||
- `Update` — enhance existing feature
|
||||
- `Support` — add support for something
|
||||
|
||||
Target 50 chars, hard max 70. Always backtick
|
||||
code elements.
|
||||
|
||||
## Tone
|
||||
|
||||
Casual yet technically precise — matching the
|
||||
project's commit-msg style. Terse but every bullet
|
||||
carries signal. Use project abbreviations freely
|
||||
(msg, bg, ctx, impl, mod, obvi, fn, bc, var,
|
||||
prolly, ep, etc.).
|
||||
|
||||
---
|
||||
|
||||
(this format reference was generated by
|
||||
[`claude-code`][claude-code-gh])
|
||||
[claude-code-gh]: https://github.com/anthropics/claude-code
|
||||
|
|
@ -0,0 +1,358 @@
|
|||
---
|
||||
name: run-tests
|
||||
description: >
|
||||
Run tractor test suite (or subsets). Use when the user wants
|
||||
to run tests, verify changes, or check for regressions.
|
||||
argument-hint: "[test-path-or-pattern] [--opts]"
|
||||
allowed-tools:
|
||||
- Bash(python -m pytest *)
|
||||
- Bash(python -c *)
|
||||
- Bash(python --version *)
|
||||
- Bash(UV_PROJECT_ENVIRONMENT=py* uv run python *)
|
||||
- Bash(UV_PROJECT_ENVIRONMENT=py* uv run pytest *)
|
||||
- Bash(UV_PROJECT_ENVIRONMENT=py* uv sync *)
|
||||
- Bash(UV_PROJECT_ENVIRONMENT=py* uv pip show *)
|
||||
- Bash(git rev-parse *)
|
||||
- Bash(ls *)
|
||||
- Bash(cat *)
|
||||
- Bash(jq * .pytest_cache/*)
|
||||
- Read
|
||||
- Grep
|
||||
- Glob
|
||||
- Task
|
||||
- AskUserQuestion
|
||||
---
|
||||
|
||||
Run the `tractor` test suite using `pytest`. Follow this
|
||||
process:
|
||||
|
||||
## 1. Parse user intent
|
||||
|
||||
From the user's message and any arguments, determine:
|
||||
|
||||
- **scope**: full suite, specific file(s), specific
|
||||
test(s), or a keyword pattern (`-k`).
|
||||
- **transport**: which IPC transport protocol to test
|
||||
against (default: `tcp`, also: `uds`).
|
||||
- **options**: any extra pytest flags the user wants
|
||||
(e.g. `--ll debug`, `--tpdb`, `-x`, `-v`).
|
||||
|
||||
If the user provides a bare path or pattern as argument,
|
||||
treat it as the test target. Examples:
|
||||
|
||||
- `/run-tests` → full suite
|
||||
- `/run-tests test_local.py` → single file
|
||||
- `/run-tests test_registrar -v` → file + verbose
|
||||
- `/run-tests -k cancel` → keyword filter
|
||||
- `/run-tests tests/ipc/ --tpt-proto uds` → subdir + UDS
|
||||
|
||||
## 2. Construct the pytest command
|
||||
|
||||
Base command:
|
||||
```
|
||||
python -m pytest
|
||||
```
|
||||
|
||||
### Default flags (always include unless user overrides):
|
||||
- `-x` (stop on first failure)
|
||||
- `--tb=short` (concise tracebacks)
|
||||
- `--no-header` (reduce noise)
|
||||
|
||||
### Path resolution:
|
||||
- If the user gives a bare filename like `test_local.py`,
|
||||
resolve it under `tests/`.
|
||||
- If the user gives a subdirectory like `ipc/`, resolve
|
||||
under `tests/ipc/`.
|
||||
- Glob if needed: `tests/**/test_*<pattern>*.py`
|
||||
|
||||
### Key pytest options for this project:
|
||||
|
||||
| Flag | Purpose |
|
||||
|---|---|
|
||||
| `--ll <level>` | Set tractor log level (e.g. `debug`, `info`, `runtime`) |
|
||||
| `--tpdb` / `--debug-mode` | Enable tractor's multi-proc debugger |
|
||||
| `--tpt-proto <key>` | IPC transport: `tcp` (default) or `uds` |
|
||||
| `--spawn-backend <be>` | Spawn method: `trio` (default), `mp_spawn`, `mp_forkserver` |
|
||||
| `-k <expr>` | pytest keyword filter |
|
||||
| `-v` / `-vv` | Verbosity |
|
||||
| `-s` | No output capture (useful with `--tpdb`) |
|
||||
|
||||
### Common combos:
|
||||
```sh
|
||||
# quick smoke test of core modules
|
||||
python -m pytest tests/test_local.py tests/test_rpc.py -x --tb=short --no-header
|
||||
|
||||
# full suite, stop on first failure
|
||||
python -m pytest tests/ -x --tb=short --no-header
|
||||
|
||||
# specific test with debug
|
||||
python -m pytest tests/discovery/test_registrar.py::test_reg_then_unreg -x -s --tpdb --ll debug
|
||||
|
||||
# run with UDS transport
|
||||
python -m pytest tests/ -x --tb=short --no-header --tpt-proto uds
|
||||
|
||||
# keyword filter
|
||||
python -m pytest tests/ -x --tb=short --no-header -k "cancel and not slow"
|
||||
```
|
||||
|
||||
## 3. Pre-flight: venv detection (MANDATORY)
|
||||
|
||||
**Always verify a `uv` venv is active before running
|
||||
`python` or `pytest`.** This project uses
|
||||
`UV_PROJECT_ENVIRONMENT=py<MINOR>` naming (e.g.
|
||||
`py313`) — never `.venv`.
|
||||
|
||||
### Step 1: detect active venv
|
||||
|
||||
Run this check first:
|
||||
|
||||
```sh
|
||||
python -c "
|
||||
import sys, os
|
||||
venv = os.environ.get('VIRTUAL_ENV', '')
|
||||
prefix = sys.prefix
|
||||
print(f'VIRTUAL_ENV={venv}')
|
||||
print(f'sys.prefix={prefix}')
|
||||
print(f'executable={sys.executable}')
|
||||
"
|
||||
```
|
||||
|
||||
### Step 2: interpret results
|
||||
|
||||
**Case A — venv is active** (`VIRTUAL_ENV` is set
|
||||
and points to a `py<MINOR>/` dir under the project
|
||||
root or worktree):
|
||||
|
||||
Use bare `python` / `python -m pytest` for all
|
||||
commands. This is the normal, fast path.
|
||||
|
||||
**Case B — no venv active** (`VIRTUAL_ENV` is empty
|
||||
or `sys.prefix` points to a system Python):
|
||||
|
||||
Use `AskUserQuestion` to ask the user:
|
||||
|
||||
> "No uv venv is active. Should I activate one
|
||||
> via `UV_PROJECT_ENVIRONMENT=py<MINOR> uv sync`,
|
||||
> or would you prefer to activate your shell venv
|
||||
> first?"
|
||||
|
||||
Options:
|
||||
1. **"Create/sync venv"** — run
|
||||
`UV_PROJECT_ENVIRONMENT=py<MINOR> uv sync` where
|
||||
`<MINOR>` is detected from `python --version`
|
||||
(e.g. `313` for 3.13). Then use
|
||||
`py<MINOR>/bin/python` for all subsequent
|
||||
commands in this session.
|
||||
2. **"I'll activate it myself"** — stop and let the
|
||||
user `source py<MINOR>/bin/activate` or similar.
|
||||
|
||||
**Case C — inside a git worktree** (`git rev-parse
|
||||
--git-common-dir` differs from `--git-dir`):
|
||||
|
||||
Verify Python resolves from the **worktree's own
|
||||
venv**, not the main repo's:
|
||||
|
||||
```sh
|
||||
python -c "import tractor; print(tractor.__file__)"
|
||||
```
|
||||
|
||||
If the path points outside the worktree, create a
|
||||
worktree-local venv:
|
||||
|
||||
```sh
|
||||
UV_PROJECT_ENVIRONMENT=py<MINOR> uv sync
|
||||
```
|
||||
|
||||
Then use `py<MINOR>/bin/python` for all commands.
|
||||
|
||||
**Why this matters**: without the correct venv,
|
||||
subprocesses spawned by tractor resolve modules
|
||||
from the wrong editable install, causing spurious
|
||||
`AttributeError` / `ModuleNotFoundError`.
|
||||
|
||||
### Fallback: `uv run`
|
||||
|
||||
If the user can't or won't activate a venv, all
|
||||
`python` and `pytest` commands can be prefixed
|
||||
with `UV_PROJECT_ENVIRONMENT=py<MINOR> uv run`:
|
||||
|
||||
```sh
|
||||
# instead of: python -m pytest tests/ -x
|
||||
UV_PROJECT_ENVIRONMENT=py313 uv run pytest tests/ -x
|
||||
|
||||
# instead of: python -c 'import tractor'
|
||||
UV_PROJECT_ENVIRONMENT=py313 uv run python -c 'import tractor'
|
||||
```
|
||||
|
||||
`uv run` auto-discovers the project and venv,
|
||||
but is slower than a pre-activated venv due to
|
||||
lock-file resolution on each invocation. Prefer
|
||||
activating the venv when possible.
|
||||
|
||||
### Step 3: import + collection checks
|
||||
|
||||
After venv is confirmed, always run these
|
||||
(especially after refactors or module moves):
|
||||
|
||||
```sh
|
||||
# 1. package import smoke check
|
||||
python -c 'import tractor; print(tractor)'
|
||||
|
||||
# 2. verify all tests collect (no import errors)
|
||||
python -m pytest tests/ -x -q --co 2>&1 | tail -5
|
||||
```
|
||||
|
||||
If either fails, fix the import error before running
|
||||
any actual tests.
|
||||
|
||||
## 4. Run and report
|
||||
|
||||
- Run the constructed command.
|
||||
- Use a timeout of **600000ms** (10min) for full suite
|
||||
runs, **120000ms** (2min) for single-file runs.
|
||||
- If the suite is large (full `tests/`), consider running
|
||||
in the background and checking output when done.
|
||||
- Use `--lf` (last-failed) to re-run only previously
|
||||
failing tests when iterating on a fix.
|
||||
|
||||
### On failure:
|
||||
- Show the failing test name(s) and short traceback.
|
||||
- If the failure looks related to recent changes, point
|
||||
out the likely cause and suggest a fix.
|
||||
- **Check the known-flaky list** (section 8) before
|
||||
investigating — don't waste time on pre-existing
|
||||
timeout issues.
|
||||
- **NEVER auto-commit fixes.** If you apply a code fix
|
||||
during test iteration, leave it unstaged. Tell the
|
||||
user what changed and suggest they review the
|
||||
worktree state, stage files manually, and use
|
||||
`/commit-msg` (inline or in a separate session) to
|
||||
generate the commit message. The human drives all
|
||||
`git add` and `git commit` operations.
|
||||
|
||||
### On success:
|
||||
- Report the pass/fail/skip counts concisely.
|
||||
|
||||
## 5. Test directory layout (reference)
|
||||
|
||||
```
|
||||
tests/
|
||||
├── conftest.py # root fixtures, daemon, signals
|
||||
├── devx/ # debugger/tooling tests
|
||||
├── ipc/ # transport protocol tests
|
||||
├── msg/ # messaging layer tests
|
||||
├── discovery/ # discovery subsystem tests
|
||||
│ ├── test_multiaddr.py # multiaddr construction
|
||||
│ └── test_registrar.py # registry/discovery protocol
|
||||
├── test_local.py # registrar + local actor basics
|
||||
├── test_rpc.py # RPC error handling
|
||||
├── test_spawning.py # subprocess spawning
|
||||
├── test_multi_program.py # multi-process tree tests
|
||||
├── test_cancellation.py # cancellation semantics
|
||||
├── test_context_stream_semantics.py # ctx streaming
|
||||
├── test_inter_peer_cancellation.py # peer cancel
|
||||
├── test_infected_asyncio.py # trio-in-asyncio
|
||||
└── ...
|
||||
```
|
||||
|
||||
## 6. Change-type → test mapping
|
||||
|
||||
After modifying specific modules, run the corresponding
|
||||
test subset first for fast feedback:
|
||||
|
||||
| Changed module(s) | Run these tests first |
|
||||
|---|---|
|
||||
| `runtime/_runtime.py`, `runtime/_state.py` | `test_local.py test_rpc.py test_spawning.py test_root_runtime.py` |
|
||||
| `discovery/` (`_registry`, `_discovery`, `_addr`) | `tests/discovery/ test_multi_program.py test_local.py` |
|
||||
| `_context.py`, `_streaming.py` | `test_context_stream_semantics.py test_advanced_streaming.py` |
|
||||
| `ipc/` (`_chan`, `_server`, `_transport`) | `tests/ipc/ test_2way.py` |
|
||||
| `runtime/_portal.py`, `runtime/_rpc.py` | `test_rpc.py test_cancellation.py` |
|
||||
| `spawn/` (`_spawn`, `_entry`) | `test_spawning.py test_multi_program.py` |
|
||||
| `devx/debug/` | `tests/devx/test_debugger.py` (slow!) |
|
||||
| `to_asyncio.py` | `test_infected_asyncio.py test_root_infect_asyncio.py` |
|
||||
| `msg/` | `tests/msg/` |
|
||||
| `_exceptions.py` | `test_remote_exc_relay.py test_inter_peer_cancellation.py` |
|
||||
| `runtime/_supervise.py` | `test_cancellation.py test_spawning.py` |
|
||||
|
||||
## 7. Quick-check shortcuts
|
||||
|
||||
### After refactors (fastest first-pass):
|
||||
```sh
|
||||
# import + collect check
|
||||
python -c 'import tractor' && python -m pytest tests/ -x -q --co 2>&1 | tail -3
|
||||
|
||||
# core subset (~10s)
|
||||
python -m pytest tests/test_local.py tests/test_rpc.py tests/test_spawning.py tests/discovery/test_registrar.py -x --tb=short --no-header
|
||||
```
|
||||
|
||||
### Inspect last failures (without re-running):
|
||||
|
||||
When the user asks "what failed?", "show failures",
|
||||
or wants to check the last-failed set before
|
||||
re-running — read the pytest cache directly. This
|
||||
is instant and avoids test collection overhead.
|
||||
|
||||
```sh
|
||||
python -c "
|
||||
import json, pathlib, sys
|
||||
p = pathlib.Path('.pytest_cache/v/cache/lastfailed')
|
||||
if not p.exists():
|
||||
print('No lastfailed cache found.'); sys.exit()
|
||||
data = json.loads(p.read_text())
|
||||
# filter to real test node IDs (ignore junk
|
||||
# entries that can accumulate from system paths)
|
||||
tests = sorted(k for k in data if k.startswith('tests/'))
|
||||
if not tests:
|
||||
print('No failures recorded.')
|
||||
else:
|
||||
print(f'{len(tests)} last-failed test(s):')
|
||||
for t in tests:
|
||||
print(f' {t}')
|
||||
"
|
||||
```
|
||||
|
||||
**Why not `--cache-show` or `--co --lf`?**
|
||||
|
||||
- `pytest --cache-show 'cache/lastfailed'` works
|
||||
but dumps raw dict repr including junk entries
|
||||
(stale system paths that leak into the cache).
|
||||
- `pytest --co --lf` actually *collects* tests which
|
||||
triggers import resolution and is slow (~0.5s+).
|
||||
Worse, when cached node IDs don't exactly match
|
||||
current parametrize IDs (e.g. param names changed
|
||||
between runs), pytest falls back to collecting
|
||||
the *entire file*, giving false positives.
|
||||
- Reading the JSON directly is instant, filterable
|
||||
to `tests/`-prefixed entries, and shows exactly
|
||||
what pytest recorded — no interpretation.
|
||||
|
||||
**After inspecting**, re-run the failures:
|
||||
```sh
|
||||
python -m pytest --lf -x --tb=short --no-header
|
||||
```
|
||||
|
||||
### Full suite in background:
|
||||
When core tests pass and you want full coverage while
|
||||
continuing other work, run in background:
|
||||
```sh
|
||||
python -m pytest tests/ -x --tb=short --no-header -q
|
||||
```
|
||||
(use `run_in_background=true` on the Bash tool)
|
||||
|
||||
## 8. Known flaky tests
|
||||
|
||||
These tests have **pre-existing** timing/environment
|
||||
sensitivity. If they fail with `TooSlowError` or
|
||||
pexpect `TIMEOUT`, they are almost certainly NOT caused
|
||||
by your changes — note them and move on.
|
||||
|
||||
| Test | Typical error | Notes |
|
||||
|---|---|---|
|
||||
| `devx/test_debugger.py::test_multi_nested_subactors_error_through_nurseries` | pexpect TIMEOUT | Debugger pexpect timing |
|
||||
| `test_cancellation.py::test_cancel_via_SIGINT_other_task` | TooSlowError | Signal handling race |
|
||||
| `test_inter_peer_cancellation.py::test_peer_spawns_and_cancels_service_subactor` | TooSlowError | Async timing (both param variants) |
|
||||
| `test_docs_examples.py::test_example[we_are_processes.py]` | `assert None == 0` | `__main__` missing `__file__` in subproc |
|
||||
|
||||
**Rule of thumb**: if a test fails with `TooSlowError`,
|
||||
`trio.TooSlowError`, or `pexpect.TIMEOUT` and you didn't
|
||||
touch the relevant code path, it's flaky — skip it.
|
||||
|
|
@ -1,10 +1,18 @@
|
|||
name: CI
|
||||
|
||||
# NOTE distilled from,
|
||||
# https://github.com/orgs/community/discussions/26276
|
||||
on:
|
||||
# any time someone pushes a new branch to origin
|
||||
# any time a new update to 'main'
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
# Allows you to run this workflow manually from the Actions tab
|
||||
# for on all (forked) PRs to repo
|
||||
# NOTE, use a draft PR if you just want CI triggered..
|
||||
pull_request:
|
||||
|
||||
# to run workflow manually from the "Actions" tab
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
|
@ -74,24 +82,44 @@ jobs:
|
|||
# run: mypy tractor/ --ignore-missing-imports --show-traceback
|
||||
|
||||
|
||||
testing-linux:
|
||||
name: '${{ matrix.os }} Python ${{ matrix.python }} - ${{ matrix.spawn_backend }}'
|
||||
timeout-minutes: 10
|
||||
testing:
|
||||
name: '${{ matrix.os }} Python${{ matrix.python-version }} spawn_backend=${{ matrix.spawn_backend }} tpt_proto=${{ matrix.tpt_proto }}'
|
||||
timeout-minutes: 16
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ['3.13']
|
||||
os: [
|
||||
ubuntu-latest,
|
||||
macos-latest,
|
||||
]
|
||||
python-version: [
|
||||
'3.13',
|
||||
# '3.14',
|
||||
]
|
||||
spawn_backend: [
|
||||
'trio',
|
||||
# 'mp_spawn',
|
||||
# 'mp_forkserver',
|
||||
# ?TODO^ is it worth it to get these running again?
|
||||
#
|
||||
# - [ ] next-gen backends, on 3.13+
|
||||
# https://github.com/goodboy/tractor/issues/379
|
||||
# 'subinterpreter',
|
||||
# 'subint',
|
||||
]
|
||||
tpt_proto: [
|
||||
'tcp',
|
||||
'uds',
|
||||
]
|
||||
# https://github.com/orgs/community/discussions/26253#discussioncomment-3250989
|
||||
exclude:
|
||||
# don't do UDS run on macOS (for now)
|
||||
- os: macos-latest
|
||||
tpt_proto: 'uds'
|
||||
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: 'Install uv + py-${{ matrix.python-version }}'
|
||||
|
|
@ -118,7 +146,11 @@ jobs:
|
|||
run: uv tree
|
||||
|
||||
- name: Run tests
|
||||
run: uv run pytest tests/ --spawn-backend=${{ matrix.spawn_backend }} -rsx
|
||||
run: >
|
||||
uv run
|
||||
pytest tests/ -rsx
|
||||
--spawn-backend=${{ matrix.spawn_backend }}
|
||||
--tpt-proto=${{ matrix.tpt_proto }}
|
||||
|
||||
# XXX legacy NOTE XXX
|
||||
#
|
||||
|
|
|
|||
|
|
@ -102,3 +102,69 @@ venv.bak/
|
|||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
# all files under
|
||||
.git/
|
||||
|
||||
# require very explicit staging for anything we **really**
|
||||
# want put/kept in repo.
|
||||
notes_to_self/
|
||||
snippets/
|
||||
|
||||
# ------- AI shiz -------
|
||||
# `ai.skillz` symlinks,
|
||||
# (machine-local, deploy via deploy-skill.sh)
|
||||
.claude/skills/py-codestyle
|
||||
.claude/skills/close-wkt
|
||||
.claude/skills/plan-io
|
||||
.claude/skills/prompt-io
|
||||
.claude/skills/resolve-conflicts
|
||||
.claude/skills/inter-skill-review
|
||||
|
||||
# /open-wkt specifics
|
||||
.claude/skills/open-wkt
|
||||
.claude/wkts/
|
||||
claude_wkts
|
||||
|
||||
# /code-review-changes specifics
|
||||
.claude/skills/code-review-changes
|
||||
# review-skill ephemeral ctx (per-PR, single-use)
|
||||
.claude/review_context.md
|
||||
.claude/review_regression.md
|
||||
|
||||
# /pr-msg specifics
|
||||
.claude/skills/pr-msg/*
|
||||
# repo-specific
|
||||
!.claude/skills/pr-msg/format-reference.md
|
||||
# XXX, so u can nvim-telescope this file.
|
||||
# !.claude/skills/pr-msg/pr_msg_LATEST.md
|
||||
|
||||
# /commit-msg specifics
|
||||
# - any commit-msg gen tmp files
|
||||
.claude/*_commit_*.md
|
||||
.claude/*_commit*.txt
|
||||
.claude/skills/commit-msg/*
|
||||
!.claude/skills/commit-msg/style-duie-reference.md
|
||||
|
||||
# use prompt-io instead?
|
||||
.claude/plans
|
||||
|
||||
# nix develop --profile .nixdev
|
||||
.nixdev*
|
||||
|
||||
# :Obsession .
|
||||
Session.vim
|
||||
|
||||
# `gish` local `.md`-files
|
||||
# TODO? better all around automation!
|
||||
# -[ ] it'd be handy to also commit and sync with wtv git service?
|
||||
# -[ ] everything should be put under a `.gish/` no?
|
||||
gitea/
|
||||
gh/
|
||||
|
||||
# ------ macOS ------
|
||||
# Finder metadata
|
||||
**/.DS_Store
|
||||
|
||||
# LLM conversations that should remain private
|
||||
docs/conversations/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,161 @@
|
|||
# `subint` backend: parent trio loop parks after subint teardown (Ctrl-C works; not a CPython-level issue)
|
||||
|
||||
Follow-up to the Phase B subint spawn-backend PR (see
|
||||
`tractor.spawn._subint`, issue #379). Distinct from the
|
||||
`subint_sigint_starvation_issue.md` (SIGINT-unresponsive
|
||||
starvation hang): this one is **Ctrl-C-able**, which means
|
||||
it's *not* the shared-GIL-hostage class and is ours to fix
|
||||
from inside tractor rather than waiting on upstream CPython
|
||||
/ msgspec progress.
|
||||
|
||||
## TL;DR
|
||||
|
||||
After a stuck-subint subactor is torn down via the
|
||||
hard-kill path, a parent-side trio task parks on an
|
||||
*orphaned resource* (most likely a `chan.recv()` /
|
||||
`process_messages` loop on the now-dead subint's IPC
|
||||
channel) and waits forever for bytes that can't arrive —
|
||||
because the channel was torn down without emitting a clean
|
||||
EOF/`BrokenResourceError` to the waiting receiver.
|
||||
|
||||
Unlike `subint_sigint_starvation_issue.md`, the main trio
|
||||
loop **is** iterating normally — SIGINT delivers cleanly
|
||||
and the test unhangs. But absent Ctrl-C, the test suite
|
||||
wedges indefinitely.
|
||||
|
||||
## Symptom
|
||||
|
||||
Running `test_subint_non_checkpointing_child` under
|
||||
`--spawn-backend=subint` (in
|
||||
`tests/test_subint_cancellation.py`):
|
||||
|
||||
1. Test spawns a subactor whose main task runs
|
||||
`threading.Event.wait(1.0)` in a loop — releases the
|
||||
GIL but never inserts a trio checkpoint.
|
||||
2. Parent does `an.cancel_scope.cancel()`. Our
|
||||
`subint_proc` cancel path fires: soft-kill sends
|
||||
`Portal.cancel_actor()` over the live IPC channel →
|
||||
subint's trio loop *should* process the cancel msg on
|
||||
its IPC dispatcher task (since the GIL releases are
|
||||
happening).
|
||||
3. Expected: subint's `trio.run()` unwinds, driver thread
|
||||
exits naturally, parent returns.
|
||||
4. Actual: parent `trio.run()` never completes. Test
|
||||
hangs past its `trio.fail_after()` deadline.
|
||||
|
||||
## Evidence
|
||||
|
||||
### `strace` on the hung pytest process during SIGINT
|
||||
|
||||
```
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(17, "\2", 1) = 1
|
||||
```
|
||||
|
||||
Contrast with the SIGINT-starvation hang (see
|
||||
`subint_sigint_starvation_issue.md`) where that same
|
||||
`write()` returned `EAGAIN`. Here the SIGINT byte is
|
||||
written successfully → Python's signal handler pipe is
|
||||
being drained → main trio loop **is** iterating → SIGINT
|
||||
gets turned into `trio.Cancelled` → the test unhangs (if
|
||||
the operator happens to be there to hit Ctrl-C).
|
||||
|
||||
### Stack dump (via `tractor.devx.dump_on_hang`)
|
||||
|
||||
Single main thread visible, parked in
|
||||
`trio._core._io_epoll.get_events` inside `trio.run` at the
|
||||
test's `trio.run(...)` call site. No subint driver thread
|
||||
(subint was destroyed successfully — this is *after* the
|
||||
hard-kill path, not during it).
|
||||
|
||||
## Root cause hypothesis
|
||||
|
||||
Most consistent with the evidence: a parent-side trio
|
||||
task is awaiting a `chan.recv()` / `process_messages` loop
|
||||
on the dead subint's IPC channel. The sequence:
|
||||
|
||||
1. Soft-kill in `subint_proc` sends `Portal.cancel_actor()`
|
||||
over the channel. The subint's trio dispatcher *may* or
|
||||
may not have processed the cancel msg before the subint
|
||||
was destroyed — timing-dependent.
|
||||
2. Hard-kill timeout fires (because the subint's main
|
||||
task was in `threading.Event.wait()` with no trio
|
||||
checkpoint — cancel-msg processing couldn't race the
|
||||
timeout).
|
||||
3. Driver thread abandoned, `_interpreters.destroy()`
|
||||
runs. Subint is gone.
|
||||
4. But the parent-side trio task holding a
|
||||
`chan.recv()` / `process_messages` loop against that
|
||||
channel was **not** explicitly cancelled. The channel's
|
||||
underlying socket got torn down, but without a clean
|
||||
EOF delivered to the waiting recv, the task parks
|
||||
forever on `trio.lowlevel.wait_readable` (or similar).
|
||||
|
||||
This matches the "main loop fine, task parked on
|
||||
orphaned I/O" signature.
|
||||
|
||||
## Why this is ours to fix (not CPython's)
|
||||
|
||||
- Main trio loop iterates normally → GIL isn't starved.
|
||||
- SIGINT is deliverable → not a signal-pipe-full /
|
||||
wakeup-fd contention scenario.
|
||||
- The hang is in *our* supervision code, specifically in
|
||||
how `subint_proc` tears down its side of the IPC when
|
||||
the subint is abandoned/destroyed.
|
||||
|
||||
## Possible fix directions
|
||||
|
||||
1. **Explicit parent-side channel abort on subint
|
||||
abandon.** In `subint_proc`'s teardown block, after the
|
||||
hard-kill timeout fires, explicitly close the parent's
|
||||
end of the IPC channel to the subint. Any waiting
|
||||
`chan.recv()` / `process_messages` task sees
|
||||
`BrokenResourceError` (or `ClosedResourceError`) and
|
||||
unwinds.
|
||||
2. **Cancel parent-side RPC tasks tied to the dead
|
||||
subint's channel.** The `Actor._rpc_tasks` / nursery
|
||||
machinery should have a handle on any
|
||||
`process_messages` loops bound to a specific peer
|
||||
channel. Iterate those and cancel explicitly.
|
||||
3. **Bound the top-level `await actor_nursery
|
||||
._join_procs.wait()` shield in `subint_proc`** (same
|
||||
pattern as the other bounded shields the hard-kill
|
||||
patch added). If the nursery never sets `_join_procs`
|
||||
because a child task is parked, the bound would at
|
||||
least let the teardown proceed.
|
||||
|
||||
Of these, (1) is the most surgical and directly addresses
|
||||
the root cause. (2) is a defense-in-depth companion. (3)
|
||||
is a band-aid but cheap to add.
|
||||
|
||||
## Current workaround
|
||||
|
||||
None in-tree. The test's `trio.fail_after()` bound
|
||||
currently fires and raises `TooSlowError`, so the test
|
||||
visibly **fails** rather than hangs — which is
|
||||
intentional (an unbounded cancellation-audit test would
|
||||
defeat itself). But in interactive test runs the operator
|
||||
has to hit Ctrl-C to move past the parked state before
|
||||
pytest reports the failure.
|
||||
|
||||
## Reproducer
|
||||
|
||||
```
|
||||
./py314/bin/python -m pytest \
|
||||
tests/test_subint_cancellation.py::test_subint_non_checkpointing_child \
|
||||
--spawn-backend=subint --tb=short --no-header -v
|
||||
```
|
||||
|
||||
Expected: hangs until `trio.fail_after(15)` fires, or
|
||||
Ctrl-C unwedges it manually.
|
||||
|
||||
## References
|
||||
|
||||
- `tractor.spawn._subint.subint_proc` — current subint
|
||||
teardown code; see the `_HARD_KILL_TIMEOUT` bounded
|
||||
shields + `daemon=True` driver-thread abandonment
|
||||
(commit `b025c982`).
|
||||
- `ai/conc-anal/subint_sigint_starvation_issue.md` — the
|
||||
sibling CPython-level hang (GIL-starvation,
|
||||
SIGINT-unresponsive) which is **not** this issue.
|
||||
- Phase B tracking: issue #379.
|
||||
|
|
@ -0,0 +1,337 @@
|
|||
# `os.fork()` from a non-main sub-interpreter aborts the child (CPython refuses post-fork cleanup)
|
||||
|
||||
Third `subint`-class analysis in this project. Unlike its
|
||||
two siblings (`subint_sigint_starvation_issue.md`,
|
||||
`subint_cancel_delivery_hang_issue.md`), this one is not a
|
||||
hang — it's a **hard CPython-level refusal** of an
|
||||
experimental spawn strategy we wanted to try.
|
||||
|
||||
## TL;DR
|
||||
|
||||
An in-process sub-interpreter cannot be used as a
|
||||
"launchpad" for `os.fork()` on current CPython. The fork
|
||||
syscall succeeds in the parent, but the forked CHILD
|
||||
process is aborted immediately by CPython's post-fork
|
||||
cleanup with:
|
||||
|
||||
```
|
||||
Fatal Python error: _PyInterpreterState_DeleteExceptMain: not main interpreter
|
||||
```
|
||||
|
||||
This is enforced by a hard `PyStatus_ERR` gate in
|
||||
`Python/pystate.c`. The CPython devs acknowledge the
|
||||
fragility with an in-source comment (`// Ideally we could
|
||||
guarantee tstate is running main.`) but provide no
|
||||
mechanism to satisfy the precondition from user code.
|
||||
|
||||
**Implication for tractor**: the `subint_fork` backend
|
||||
sketched in `tractor.spawn._subint_fork` is structurally
|
||||
dead on current CPython. The submodule is kept as
|
||||
documentation of the attempt; `--spawn-backend=subint_fork`
|
||||
raises `NotImplementedError` pointing here.
|
||||
|
||||
## Context — why we tried this
|
||||
|
||||
The motivation is issue #379's "Our own thoughts, ideas
|
||||
for `fork()`-workaround/hacks..." section. The existing
|
||||
trio-backend (`tractor.spawn._trio.trio_proc`) spawns
|
||||
subactors via `trio.lowlevel.open_process()` → ultimately
|
||||
`posix_spawn()` or `fork+exec`, from the parent's main
|
||||
interpreter that is currently running `trio.run()`. This
|
||||
brushes against a known-fragile interaction between
|
||||
`trio` and `fork()` tracked in
|
||||
[python-trio/trio#1614](https://github.com/python-trio/trio/issues/1614)
|
||||
and siblings — mostly mitigated in `tractor`'s case only
|
||||
incidentally (we `exec()` immediately post-fork).
|
||||
|
||||
The idea was:
|
||||
|
||||
1. Create a subint that has *never* imported `trio`.
|
||||
2. From a worker thread in that subint, call `os.fork()`.
|
||||
3. In the child, `execv()` back into
|
||||
`python -m tractor._child` — same as `trio_proc` does.
|
||||
4. The fork is from a trio-free context → trio+fork
|
||||
hazards avoided regardless of downstream behavior.
|
||||
|
||||
The parent-side orchestration (`ipc_server.wait_for_peer`,
|
||||
`SpawnSpec`, `Portal` yield) would reuse
|
||||
`trio_proc`'s flow verbatim, with only the subproc-spawn
|
||||
mechanics swapped.
|
||||
|
||||
## Symptom
|
||||
|
||||
Running the prototype (`tractor.spawn._subint_fork.subint_fork_proc`,
|
||||
see git history prior to the stub revert) on py3.14:
|
||||
|
||||
```
|
||||
Fatal Python error: _PyInterpreterState_DeleteExceptMain: not main interpreter
|
||||
Python runtime state: initialized
|
||||
|
||||
Current thread 0x00007f6b71a456c0 [subint-fork-lau] (most recent call first):
|
||||
File "<script>", line 2 in <module>
|
||||
<script>:2: DeprecationWarning: This process (pid=802985) is multi-threaded, use of fork() may lead to deadlocks in the child.
|
||||
```
|
||||
|
||||
Key clues:
|
||||
|
||||
- The **`DeprecationWarning`** fires in the parent (before
|
||||
fork completes) — fork *is* executing, we get that far.
|
||||
- The **`Fatal Python error`** comes from the child — it
|
||||
aborts during CPython's post-fork C initialization
|
||||
before any user Python runs in the child.
|
||||
- The thread name `subint-fork-lau[nchpad]` is ours —
|
||||
confirms the fork is being called from the launchpad
|
||||
subint's driver thread.
|
||||
|
||||
## CPython source walkthrough
|
||||
|
||||
### Call site — `Modules/posixmodule.c:728-793`
|
||||
|
||||
The post-fork-child hook CPython runs in the child process:
|
||||
|
||||
```c
|
||||
void
|
||||
PyOS_AfterFork_Child(void)
|
||||
{
|
||||
PyStatus status;
|
||||
_PyRuntimeState *runtime = &_PyRuntime;
|
||||
|
||||
// re-creates runtime->interpreters.mutex (HEAD_UNLOCK)
|
||||
status = _PyRuntimeState_ReInitThreads(runtime);
|
||||
...
|
||||
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
_Py_EnsureTstateNotNULL(tstate);
|
||||
|
||||
...
|
||||
|
||||
// Ideally we could guarantee tstate is running main. ← !!!
|
||||
_PyInterpreterState_ReinitRunningMain(tstate);
|
||||
|
||||
status = _PyEval_ReInitThreads(tstate);
|
||||
...
|
||||
|
||||
status = _PyInterpreterState_DeleteExceptMain(runtime);
|
||||
if (_PyStatus_EXCEPTION(status)) {
|
||||
goto fatal_error;
|
||||
}
|
||||
...
|
||||
|
||||
fatal_error:
|
||||
Py_ExitStatusException(status);
|
||||
}
|
||||
```
|
||||
|
||||
The `// Ideally we could guarantee tstate is running
|
||||
main.` comment is a flashing warning sign — the CPython
|
||||
devs *know* this path is fragile when fork is called from
|
||||
a non-main subint, but they've chosen to abort rather than
|
||||
silently corrupt state. Arguably the right call.
|
||||
|
||||
### The refusal — `Python/pystate.c:1035-1075`
|
||||
|
||||
```c
|
||||
/*
|
||||
* Delete all interpreter states except the main interpreter. If there
|
||||
* is a current interpreter state, it *must* be the main interpreter.
|
||||
*/
|
||||
PyStatus
|
||||
_PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime)
|
||||
{
|
||||
struct pyinterpreters *interpreters = &runtime->interpreters;
|
||||
|
||||
PyThreadState *tstate = _PyThreadState_Swap(runtime, NULL);
|
||||
if (tstate != NULL && tstate->interp != interpreters->main) {
|
||||
return _PyStatus_ERR("not main interpreter"); ← our error
|
||||
}
|
||||
|
||||
HEAD_LOCK(runtime);
|
||||
PyInterpreterState *interp = interpreters->head;
|
||||
interpreters->head = NULL;
|
||||
while (interp != NULL) {
|
||||
if (interp == interpreters->main) {
|
||||
interpreters->main->next = NULL;
|
||||
interpreters->head = interp;
|
||||
interp = interp->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
// XXX Won't this fail since PyInterpreterState_Clear() requires
|
||||
// the "current" tstate to be set?
|
||||
PyInterpreterState_Clear(interp); // XXX must activate?
|
||||
zapthreads(interp);
|
||||
...
|
||||
}
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
The comment in the docstring (`If there is a current
|
||||
interpreter state, it *must* be the main interpreter.`) is
|
||||
the formal API contract. The `XXX` comments further in
|
||||
suggest the CPython team is already aware this function
|
||||
has latent issues even in the happy path.
|
||||
|
||||
## Chain summary
|
||||
|
||||
1. Our launchpad subint's driver OS-thread calls
|
||||
`os.fork()`.
|
||||
2. `fork()` succeeds. Child wakes up with:
|
||||
- The parent's full memory image (including all
|
||||
subints).
|
||||
- Only the *calling* thread alive (the driver thread).
|
||||
- `_PyThreadState_GET()` on that thread returns the
|
||||
**launchpad subint's tstate**, *not* main's.
|
||||
3. CPython runs `PyOS_AfterFork_Child()`.
|
||||
4. It reaches `_PyInterpreterState_DeleteExceptMain()`.
|
||||
5. Gate check fails: `tstate->interp != interpreters->main`.
|
||||
6. `PyStatus_ERR("not main interpreter")` → `fatal_error`
|
||||
goto → `Py_ExitStatusException()` → child aborts.
|
||||
|
||||
Parent-side consequence: `os.fork()` in the subint
|
||||
bootstrap returned successfully with the child's PID, but
|
||||
the child died before connecting back. Our parent's
|
||||
`ipc_server.wait_for_peer(uid)` would hang forever — the
|
||||
child never gets to `_actor_child_main`.
|
||||
|
||||
## Definitive answer to "Open Question 1"
|
||||
|
||||
From the (now-stub) `subint_fork_proc` docstring:
|
||||
|
||||
> Does CPython allow `os.fork()` from a non-main
|
||||
> sub-interpreter under the legacy config?
|
||||
|
||||
**No.** Not in a usable-by-user-code sense. The fork
|
||||
syscall is not blocked, but the child cannot survive
|
||||
CPython's post-fork initialization. This is enforced, not
|
||||
accidental, and the CPython devs have acknowledged the
|
||||
fragility in-source.
|
||||
|
||||
## What we'd need from CPython to unblock
|
||||
|
||||
Any one of these, from least-to-most invasive:
|
||||
|
||||
1. **A pre-fork hook mechanism** that lets user code (or
|
||||
tractor itself via `os.register_at_fork(before=...)`)
|
||||
swap the current tstate to main before fork runs. The
|
||||
swap would need to work across the subint→main
|
||||
boundary, which is the actual hard part —
|
||||
`_PyThreadState_Swap()` exists but is internal.
|
||||
|
||||
2. **A `_PyInterpreterState_DeleteExceptFor(tstate->interp)`
|
||||
variant** that cleans up all *other* subints while
|
||||
preserving the calling subint's state. Lets the child
|
||||
continue executing in the subint after fork; a
|
||||
subsequent `execv()` clears everything at the OS
|
||||
level anyway.
|
||||
|
||||
3. **A cleaner error** than `Fatal Python error` aborting
|
||||
the child. Even without fixing the underlying
|
||||
capability, a raised Python-level exception in the
|
||||
parent's `fork()` call (rather than a silent child
|
||||
abort) would at least make the failure mode
|
||||
debuggable.
|
||||
|
||||
## Upstream-report draft (for CPython issue tracker)
|
||||
|
||||
### Title
|
||||
|
||||
> `os.fork()` from a non-main sub-interpreter aborts the
|
||||
> child with a fatal error in `PyOS_AfterFork_Child`; can
|
||||
> we at least make it a clean `RuntimeError` in the
|
||||
> parent?
|
||||
|
||||
### Body
|
||||
|
||||
> **Version**: Python 3.14.x
|
||||
>
|
||||
> **Summary**: Calling `os.fork()` from a thread currently
|
||||
> executing inside a sub-interpreter causes the forked
|
||||
> child process to abort during CPython's post-fork
|
||||
> cleanup, with the following output in the child:
|
||||
>
|
||||
> ```
|
||||
> Fatal Python error: _PyInterpreterState_DeleteExceptMain: not main interpreter
|
||||
> ```
|
||||
>
|
||||
> From the **parent's** point of view the fork succeeded
|
||||
> (returned a valid child PID). The failure is completely
|
||||
> opaque to parent-side Python code — unless the parent
|
||||
> does `os.waitpid()` it won't even notice the child
|
||||
> died.
|
||||
>
|
||||
> **Root cause** (as I understand it from reading sources):
|
||||
> `Modules/posixmodule.c::PyOS_AfterFork_Child()` calls
|
||||
> `_PyInterpreterState_DeleteExceptMain()` with a
|
||||
> precondition that `_PyThreadState_GET()->interp` be the
|
||||
> main interpreter. When `fork()` is called from a thread
|
||||
> executing inside a subinterpreter, the child wakes up
|
||||
> with its tstate still pointing at the subint, and the
|
||||
> gate in `Python/pystate.c:1044-1047` fails.
|
||||
>
|
||||
> A comment in the source
|
||||
> (`Modules/posixmodule.c:753` — `// Ideally we could
|
||||
> guarantee tstate is running main.`) suggests this is a
|
||||
> known-fragile path rather than an intentional
|
||||
> invariant.
|
||||
>
|
||||
> **Use case**: I was experimenting with using a
|
||||
> sub-interpreter as a "fork launchpad" — have a subint
|
||||
> that has never imported `trio`, call `os.fork()` from
|
||||
> that subint's thread, and in the child `execv()` back
|
||||
> into a fresh Python interpreter process. The goal was
|
||||
> to sidestep known issues with `trio` + `fork()`
|
||||
> interaction (see
|
||||
> [python-trio/trio#1614](https://github.com/python-trio/trio/issues/1614))
|
||||
> by guaranteeing the forking context had never been
|
||||
> "contaminated" by trio's imports or globals. This
|
||||
> approach would allow `trio`-using applications to
|
||||
> combine `fork`-based subprocess spawning with
|
||||
> per-worker `trio.run()` runtimes — a fairly common
|
||||
> pattern that currently requires workarounds.
|
||||
>
|
||||
> **Request**:
|
||||
>
|
||||
> Ideally: make fork-from-subint work (e.g., by swapping
|
||||
> the caller's tstate to main in the pre-fork hook), or
|
||||
> provide a `_PyInterpreterState_DeleteExceptFor(interp)`
|
||||
> variant that permits the caller's subint to survive
|
||||
> post-fork so user code can subsequently `execv()`.
|
||||
>
|
||||
> Minimally: convert the fatal child-side abort into a
|
||||
> clean `RuntimeError` (or similar) raised in the
|
||||
> parent's `fork()` call. Even if the capability isn't
|
||||
> expanded, the failure mode should be debuggable by
|
||||
> user-code in the parent — right now it's a silent
|
||||
> child death with an error message buried in the
|
||||
> child's stderr that parent code can't programmatically
|
||||
> see.
|
||||
>
|
||||
> **Related**: PEP 684 (per-interpreter GIL), PEP 734
|
||||
> (`concurrent.interpreters` public API). The private
|
||||
> `_interpreters` module is what I used to create the
|
||||
> launchpad — behavior is the same whether using
|
||||
> `_interpreters.create('legacy')` or
|
||||
> `concurrent.interpreters.create()` (the latter was not
|
||||
> tested but the gate is identical).
|
||||
>
|
||||
> Happy to contribute a minimal reproducer + test case if
|
||||
> this is something the team wants to pursue.
|
||||
|
||||
## References
|
||||
|
||||
- `Modules/posixmodule.c:728` —
|
||||
[`PyOS_AfterFork_Child`](https://github.com/python/cpython/blob/main/Modules/posixmodule.c#L728)
|
||||
- `Python/pystate.c:1040` —
|
||||
[`_PyInterpreterState_DeleteExceptMain`](https://github.com/python/cpython/blob/main/Python/pystate.c#L1040)
|
||||
- PEP 684 (per-interpreter GIL):
|
||||
<https://peps.python.org/pep-0684/>
|
||||
- PEP 734 (`concurrent.interpreters` public API):
|
||||
<https://peps.python.org/pep-0734/>
|
||||
- [python-trio/trio#1614](https://github.com/python-trio/trio/issues/1614)
|
||||
— the original motivation for the launchpad idea.
|
||||
- tractor issue #379 — "Our own thoughts, ideas for
|
||||
`fork()`-workaround/hacks..." section where this was
|
||||
first sketched.
|
||||
- `tractor.spawn._subint_fork` — in-tree stub preserving
|
||||
the attempted impl's shape in git history.
|
||||
|
|
@ -0,0 +1,350 @@
|
|||
# `subint` backend: abandoned-subint thread can wedge main trio event loop (Ctrl-C unresponsive)
|
||||
|
||||
Follow-up to the Phase B subint spawn-backend PR (see
|
||||
`tractor.spawn._subint`, issue #379). The hard-kill escape
|
||||
hatch we landed (`_HARD_KILL_TIMEOUT`, bounded shields,
|
||||
`daemon=True` driver-thread abandonment) handles *most*
|
||||
stuck-subint scenarios cleanly, but there's one class of
|
||||
hang that can't be fully escaped from within tractor: a
|
||||
still-running abandoned sub-interpreter can starve the
|
||||
**parent's** trio event loop to the point where **SIGINT is
|
||||
effectively dropped by the kernel ↔ Python boundary** —
|
||||
making the pytest process un-Ctrl-C-able.
|
||||
|
||||
## Symptom
|
||||
|
||||
Running `test_stale_entry_is_deleted[subint]` under
|
||||
`--spawn-backend=subint`:
|
||||
|
||||
1. Test spawns a subactor (`transport_fails_actor`) which
|
||||
kills its own IPC server and then
|
||||
`trio.sleep_forever()`.
|
||||
2. Parent tries `Portal.cancel_actor()` → channel
|
||||
disconnected → fast return.
|
||||
3. Nursery teardown triggers our `subint_proc` cancel path.
|
||||
Portal-cancel fails (dead channel),
|
||||
`_HARD_KILL_TIMEOUT` fires, driver thread is abandoned
|
||||
(`daemon=True`), `_interpreters.destroy(interp_id)`
|
||||
raises `InterpreterError` (because the subint is still
|
||||
running).
|
||||
4. Test appears to hang indefinitely at the *outer*
|
||||
`async with tractor.open_nursery() as an:` exit.
|
||||
5. `Ctrl-C` at the terminal does nothing. The pytest
|
||||
process is un-interruptable.
|
||||
|
||||
## Evidence
|
||||
|
||||
### `strace` on the hung pytest process
|
||||
|
||||
```
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(37, "\2", 1) = -1 EAGAIN (Resource temporarily unavailable)
|
||||
rt_sigreturn({mask=[WINCH]}) = 140585542325792
|
||||
```
|
||||
|
||||
Translated:
|
||||
|
||||
- Kernel delivers `SIGINT` to pytest.
|
||||
- CPython's C-level signal handler fires and tries to
|
||||
write the signal number byte (`0x02` = SIGINT) to fd 37
|
||||
— the **Python signal-wakeup fd** (set via
|
||||
`signal.set_wakeup_fd()`, which trio uses to wake its
|
||||
event loop on signals).
|
||||
- Write returns `EAGAIN` — **the pipe is full**. Nothing
|
||||
is draining it.
|
||||
- `rt_sigreturn` with the signal masked off — signal is
|
||||
"handled" from the kernel's perspective but the actual
|
||||
Python-level handler (and therefore trio's
|
||||
`KeyboardInterrupt` delivery) never runs.
|
||||
|
||||
### Stack dump (via `tractor.devx.dump_on_hang`)
|
||||
|
||||
At 20s into the hang, only the **main thread** is visible:
|
||||
|
||||
```
|
||||
Thread 0x...7fdca0191780 [python] (most recent call first):
|
||||
File ".../trio/_core/_io_epoll.py", line 245 in get_events
|
||||
File ".../trio/_core/_run.py", line 2415 in run
|
||||
File ".../tests/discovery/test_registrar.py", line 575 in test_stale_entry_is_deleted
|
||||
...
|
||||
```
|
||||
|
||||
No driver thread shows up. The abandoned-legacy-subint
|
||||
thread still exists from the OS's POV (it's still running
|
||||
inside `_interpreters.exec()` driving the subint's
|
||||
`trio.run()` on `trio.sleep_forever()`) but the **main
|
||||
interp's faulthandler can't see threads currently executing
|
||||
inside a sub-interpreter's tstate**. Concretely: the thread
|
||||
is alive, holding state we can't introspect from here.
|
||||
|
||||
## Root cause analysis
|
||||
|
||||
The most consistent explanation for both observations:
|
||||
|
||||
1. **Legacy-config subinterpreters share the main GIL.**
|
||||
PEP 734's public `concurrent.interpreters.create()`
|
||||
defaults to `'isolated'` (per-interp GIL), but tractor
|
||||
uses `_interpreters.create('legacy')` as a workaround
|
||||
for C extensions that don't yet support PEP 684
|
||||
(notably `msgspec`, see
|
||||
[jcrist/msgspec#563](https://github.com/jcrist/msgspec/issues/563)).
|
||||
Legacy-mode subints share process-global state
|
||||
including the GIL.
|
||||
|
||||
2. **Our abandoned subint thread never exits.** After our
|
||||
hard-kill timeout, `driver_thread.join()` is abandoned
|
||||
via `abandon_on_cancel=True` and the thread is
|
||||
`daemon=True` so proc-exit won't block on it — but the
|
||||
thread *itself* is still alive inside
|
||||
`_interpreters.exec()`, driving a `trio.run()` that
|
||||
will never return (the subint actor is in
|
||||
`trio.sleep_forever()`).
|
||||
|
||||
3. **`_interpreters.destroy()` cannot force-stop a running
|
||||
subint.** It raises `InterpreterError` on any
|
||||
still-running subinterpreter; there is no public
|
||||
CPython API to force-destroy one.
|
||||
|
||||
4. **Shared-GIL + non-terminating subint thread → main
|
||||
trio loop starvation.** Under enough load (the subint's
|
||||
trio event loop iterating in the background, IPC-layer
|
||||
tasks still in the subint, etc.) the main trio event
|
||||
loop can fail to iterate frequently enough to drain its
|
||||
wakeup pipe. Once that pipe fills, `SIGINT` writes from
|
||||
the C signal handler return `EAGAIN` and signals are
|
||||
silently dropped — exactly what `strace` shows.
|
||||
|
||||
The shielded
|
||||
`await actor_nursery._join_procs.wait()` at the top of
|
||||
`subint_proc` (inherited unchanged from the `trio_proc`
|
||||
pattern) is structurally involved too: if main trio *does*
|
||||
get a schedule slice, it'd find the `subint_proc` task
|
||||
parked on `_join_procs` under shield — which traps whatever
|
||||
`Cancelled` arrives. But that's a second-order effect; the
|
||||
signal-pipe-full condition is the primary "Ctrl-C doesn't
|
||||
work" cause.
|
||||
|
||||
## Why we can't fix this from inside tractor
|
||||
|
||||
- **No force-destroy API.** CPython provides neither a
|
||||
`_interpreters.force_destroy()` nor a thread-
|
||||
cancellation primitive (`pthread_cancel` is actively
|
||||
discouraged and unavailable on Windows). A subint stuck
|
||||
in pure-Python loops (or worse, C code that doesn't poll
|
||||
for signals) is structurally unreachable from outside.
|
||||
- **Shared GIL is the root scheduling issue.** As long as
|
||||
we're forced into legacy-mode subints for `msgspec`
|
||||
compatibility, the abandoned-thread scenario is
|
||||
fundamentally a process-global GIL-starvation window.
|
||||
- **`signal.set_wakeup_fd()` is process-global.** Even if
|
||||
we wanted to put our own drainer on the wakeup pipe,
|
||||
only one party owns it at a time.
|
||||
|
||||
## Current workaround
|
||||
|
||||
- **Fixture-side SIGINT loop on the `daemon` subproc** (in
|
||||
this test's `daemon: subprocess.Popen` fixture in
|
||||
`tests/conftest.py`). The daemon dying closes its end of
|
||||
the registry IPC, which unblocks a pending recv in main
|
||||
trio's IPC-server task, which lets the event loop
|
||||
iterate, which drains the wakeup pipe, which finally
|
||||
delivers the test-harness SIGINT.
|
||||
- **Module-level skip on py3.13**
|
||||
(`pytest.importorskip('concurrent.interpreters')`) — the
|
||||
private `_interpreters` C module exists on 3.13 but the
|
||||
multi-trio-task interaction hangs silently there
|
||||
independently of this issue.
|
||||
|
||||
## Path forward
|
||||
|
||||
1. **Primary**: upstream `msgspec` PEP 684 adoption
|
||||
([jcrist/msgspec#563](https://github.com/jcrist/msgspec/issues/563)).
|
||||
Unlocks `concurrent.interpreters.create()` isolated
|
||||
mode → per-interp GIL → abandoned subint threads no
|
||||
longer starve the parent's main trio loop. At that
|
||||
point we can flip `_subint.py` back to the public API
|
||||
(`create()` / `Interpreter.exec()` / `Interpreter.close()`)
|
||||
and drop the private `_interpreters` path.
|
||||
|
||||
2. **Secondary**: watch CPython for a public
|
||||
force-destroy primitive. If something like
|
||||
`Interpreter.close(force=True)` lands, we can use it as
|
||||
a hard-kill final stage and actually tear down
|
||||
abandoned subints.
|
||||
|
||||
3. **Harness-level**: document the fixture-side SIGINT
|
||||
loop pattern as the "known workaround" for subint-
|
||||
backend tests that can leave background state holding
|
||||
the main event loop hostage.
|
||||
|
||||
## References
|
||||
|
||||
- PEP 734 (`concurrent.interpreters`):
|
||||
<https://peps.python.org/pep-0734/>
|
||||
- PEP 684 (per-interpreter GIL):
|
||||
<https://peps.python.org/pep-0684/>
|
||||
- `msgspec` PEP 684 tracker:
|
||||
<https://github.com/jcrist/msgspec/issues/563>
|
||||
- CPython `_interpretersmodule.c` source:
|
||||
<https://github.com/python/cpython/blob/main/Modules/_interpretersmodule.c>
|
||||
- `tractor.spawn._subint` module docstring (in-tree
|
||||
explanation of the legacy-mode choice and its
|
||||
tradeoffs).
|
||||
|
||||
## Reproducer
|
||||
|
||||
```
|
||||
./py314/bin/python -m pytest \
|
||||
tests/discovery/test_registrar.py::test_stale_entry_is_deleted \
|
||||
--spawn-backend=subint \
|
||||
--tb=short --no-header -v
|
||||
```
|
||||
|
||||
Hangs indefinitely without the fixture-side SIGINT loop;
|
||||
with the loop, the test completes (albeit with the
|
||||
abandoned-thread warning in logs).
|
||||
|
||||
## Additional known-hanging tests (same class)
|
||||
|
||||
All three tests below exhibit the same
|
||||
signal-wakeup-fd-starvation fingerprint (`write() → EAGAIN`
|
||||
on the wakeup pipe after enough SIGINT attempts) and
|
||||
share the same structural cause — abandoned legacy-subint
|
||||
driver threads contending with the main interpreter for
|
||||
the shared GIL until the main trio loop can no longer
|
||||
drain its wakeup pipe fast enough to deliver signals.
|
||||
|
||||
They're listed separately because each exposes the class
|
||||
under a different load pattern worth documenting.
|
||||
|
||||
### `tests/discovery/test_registrar.py::test_stale_entry_is_deleted[subint]`
|
||||
|
||||
Original exemplar — see the **Symptom** and **Evidence**
|
||||
sections above. One abandoned subint
|
||||
(`transport_fails_actor`, stuck in `trio.sleep_forever()`
|
||||
after self-cancelling its IPC server) is sufficient to
|
||||
tip main into starvation once the harness's `daemon`
|
||||
fixture subproc keeps its half of the registry IPC alive.
|
||||
|
||||
### `tests/test_cancellation.py::test_cancel_while_childs_child_in_sync_sleep[subint-False]`
|
||||
|
||||
Cancel a grandchild that's in sync Python sleep from 2
|
||||
nurseries up. The test's own docstring declares the
|
||||
dependency: "its parent should issue a 'zombie reaper' to
|
||||
hard kill it after sufficient timeout" — which for
|
||||
`trio`/`mp_*` is an OS-level `SIGKILL` of the grandchild
|
||||
subproc. **Under `subint` there's no equivalent** (no
|
||||
public CPython API to force-destroy a running
|
||||
sub-interpreter), so the grandchild's sync-sleeping
|
||||
`trio.run()` persists inside its abandoned driver thread
|
||||
indefinitely. The nested actor-tree (parent → child →
|
||||
grandchild, all subints) means a single cancel triggers
|
||||
multiple concurrent hard-kill abandonments, each leaving
|
||||
a live driver thread.
|
||||
|
||||
This test often only manifests the starvation under
|
||||
**full-suite runs** rather than solo execution —
|
||||
earlier-in-session subint tests also leave abandoned
|
||||
driver threads behind, and the combined population is
|
||||
what actually tips main trio into starvation. Solo runs
|
||||
may stay Ctrl-C-able with fewer abandoned threads in the
|
||||
mix.
|
||||
|
||||
### `tests/test_cancellation.py::test_multierror_fast_nursery[subint-25-0.5]`
|
||||
|
||||
Nursery-error-path throughput stress-test parametrized
|
||||
for **25 concurrent subactors**. When the multierror
|
||||
fires and the nursery cancels, every subactor goes
|
||||
through our `subint_proc` teardown. The bounded
|
||||
hard-kills run in parallel (all `subint_proc` tasks are
|
||||
sibling trio tasks), so the timeout budget is ~3s total
|
||||
rather than 3s × 25. After that, **25 abandoned
|
||||
`daemon=True` driver threads are simultaneously alive** —
|
||||
an extreme pressure multiplier on the same mechanism.
|
||||
|
||||
The `strace` fingerprint is striking under this load: six
|
||||
or more **successful** `write(16, "\2", 1) = 1` calls
|
||||
(main trio getting brief GIL slices, each long enough to
|
||||
drain exactly one wakeup-pipe byte) before finally
|
||||
saturating with `EAGAIN`:
|
||||
|
||||
```
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(16, "\2", 1) = 1
|
||||
rt_sigreturn({mask=[WINCH]}) = 140141623162400
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(16, "\2", 1) = 1
|
||||
rt_sigreturn({mask=[WINCH]}) = 140141623162400
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(16, "\2", 1) = 1
|
||||
rt_sigreturn({mask=[WINCH]}) = 140141623162400
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(16, "\2", 1) = 1
|
||||
rt_sigreturn({mask=[WINCH]}) = 140141623162400
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(16, "\2", 1) = 1
|
||||
rt_sigreturn({mask=[WINCH]}) = 140141623162400
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(16, "\2", 1) = 1
|
||||
rt_sigreturn({mask=[WINCH]}) = 140141623162400
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(16, "\2", 1) = -1 EAGAIN (Resource temporarily unavailable)
|
||||
rt_sigreturn({mask=[WINCH]}) = 140141623162400
|
||||
```
|
||||
|
||||
Those successful writes indicate CPython's
|
||||
`sys.getswitchinterval()`-based GIL round-robin *is*
|
||||
giving main brief slices — just never long enough to run
|
||||
the Python-level signal handler through to the point
|
||||
where trio converts the delivered SIGINT into a
|
||||
`Cancelled` on the appropriate scope. Once the
|
||||
accumulated write rate outpaces main's drain rate, the
|
||||
pipe saturates and subsequent signals are silently
|
||||
dropped.
|
||||
|
||||
The `pstree` below (pid `530060` = hung `pytest`) shows
|
||||
the subint-driver thread population at the moment of
|
||||
capture. Even with fewer than the full 25 shown (pstree
|
||||
truncates thread names to `subint-driver[<interp_id>` —
|
||||
interpreters `3` and `4` visible across 16 thread
|
||||
entries), the GIL-contender count is more than enough to
|
||||
explain the starvation:
|
||||
|
||||
```
|
||||
>>> pstree -snapt 530060
|
||||
systemd,1 --switched-root --system --deserialize=40
|
||||
└─login,1545 --
|
||||
└─bash,1872
|
||||
└─sway,2012
|
||||
└─alacritty,70471 -e xonsh
|
||||
└─xonsh,70487 .../bin/xonsh
|
||||
└─uv,70955 run xonsh
|
||||
└─xonsh,70959 .../py314/bin/xonsh
|
||||
└─python,530060 .../py314/bin/pytest -v tests/test_cancellation.py --spawn-backend=subint
|
||||
├─{subint-driver[3},531857
|
||||
├─{subint-driver[3},531860
|
||||
├─{subint-driver[3},531862
|
||||
├─{subint-driver[3},531866
|
||||
├─{subint-driver[3},531877
|
||||
├─{subint-driver[3},531882
|
||||
├─{subint-driver[3},531884
|
||||
├─{subint-driver[3},531945
|
||||
├─{subint-driver[3},531950
|
||||
├─{subint-driver[3},531952
|
||||
├─{subint-driver[4},531956
|
||||
├─{subint-driver[4},531959
|
||||
├─{subint-driver[4},531961
|
||||
├─{subint-driver[4},531965
|
||||
├─{subint-driver[4},531968
|
||||
└─{subint-driver[4},531979
|
||||
```
|
||||
|
||||
(`pstree` uses `{...}` to denote threads rather than
|
||||
processes — these are all the **driver OS-threads** our
|
||||
`subint_proc` creates with name
|
||||
`f'subint-driver[{interp_id}]'`. Every one of them is
|
||||
still alive, executing `_interpreters.exec()` inside a
|
||||
sub-interpreter our hard-kill has abandoned. At 16+
|
||||
abandoned driver threads competing for the main GIL, the
|
||||
main-interpreter trio loop gets starved and signal
|
||||
delivery stalls.)
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
session: (ad-hoc, not tracked via conf.toml)
|
||||
timestamp: 2026-04-06T17:28:48Z
|
||||
git_ref: 02b2ef1
|
||||
scope: tests
|
||||
substantive: true
|
||||
raw_file: 20260406T172848Z_02b2ef1_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
User asked to extend `tests/test_resource_cache.py` with a test
|
||||
that reproduces the edge case fixed in commit `02b2ef18` (per-key
|
||||
locking+user tracking in `maybe_open_context()`). The bug was
|
||||
originally triggered in piker's `brokerd.kraken` backend where the
|
||||
same `acm_func` was called with different kwargs, and the old
|
||||
global `_Cache.users` counter caused:
|
||||
|
||||
- teardown skipped for one `ctx_key` bc another key's users kept
|
||||
the global count > 0
|
||||
- re-entry hitting `assert not resources.get(ctx_key)` during the
|
||||
teardown window
|
||||
|
||||
User requested a test that would fail under the old code and pass
|
||||
with the fix.
|
||||
|
||||
## Response summary
|
||||
|
||||
Designed and implemented `test_per_ctx_key_resource_lifecycle`
|
||||
which verifies per-`ctx_key` resource isolation by:
|
||||
|
||||
1. Holding resource `'a'` open in a bg task
|
||||
2. Opening+closing resource `'b'` (same `acm_func`, different
|
||||
kwargs) while `'a'` is still alive
|
||||
3. Re-opening `'b'` and asserting cache MISS — proving `'b'` was
|
||||
torn down independently despite `'a'` keeping its own user
|
||||
count > 0
|
||||
|
||||
With the old global counter, phase 3 would produce a stale cache
|
||||
HIT (leaked resource) or crash on the assert.
|
||||
|
||||
Also added a trivial `acm_with_resource(resource_id)` ACM helper
|
||||
at module level.
|
||||
|
||||
## Files changed
|
||||
|
||||
- `tests/test_resource_cache.py` — add `acm_with_resource` ACM +
|
||||
`test_per_ctx_key_resource_lifecycle` test fn
|
||||
|
||||
## Human edits
|
||||
|
||||
None — committed as generated (pending user review).
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
session: (ad-hoc, not tracked via conf.toml)
|
||||
timestamp: 2026-04-06T19:31:25Z
|
||||
git_ref: 85f9c5d
|
||||
scope: tests
|
||||
substantive: true
|
||||
raw_file: 20260406T193125Z_85f9c5d_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
User asked to reproduce the exact `assert not resources.get(ctx_key)`
|
||||
crash originally seen in piker's `brokerd.kraken` backend via
|
||||
`open_cached_client('kraken')`. Key constraints from user:
|
||||
|
||||
- In piker, kwargs were the **same** (empty) so all callers
|
||||
share one `ctx_key = (fid, ())`
|
||||
- The root issue is `_Cache.locks` being indexed by `fid`
|
||||
rather than `ctx_key`, plus the race window between
|
||||
`values.pop()` and `resources.pop()` in `_Cache.run_ctx`
|
||||
(the acm `__aexit__` has checkpoints in between)
|
||||
- Mark the test `@pytest.mark.xfail` so it can be committed
|
||||
against the unpatched code and later adjusted once the fix
|
||||
from `9e49eddd` is applied
|
||||
|
||||
User pointed to piker's `open_cached_client('kraken')` usage
|
||||
as the real-world pattern to model.
|
||||
|
||||
## Response summary
|
||||
|
||||
Added `test_moc_reentry_during_teardown` which deterministically
|
||||
reproduces the teardown race:
|
||||
|
||||
1. A `cached_client()` acm (no params, like
|
||||
`kraken.api.get_client()`) signals via `in_aexit` event
|
||||
when its `__aexit__` starts, then sleeps to hold the
|
||||
window open
|
||||
2. `use_and_exit` task enters+exits the cached resource
|
||||
3. `reenter_during_teardown` waits on `in_aexit` then
|
||||
re-enters — landing exactly in the window where
|
||||
`values` is popped but `resources` still exists
|
||||
4. The old `assert not resources.get(ctx_key)` fires
|
||||
5. `@pytest.mark.xfail(raises=AssertionError)` catches it
|
||||
|
||||
Test confirmed: `XFAIL` with the exact traceback at
|
||||
`_mngrs.py:327`.
|
||||
|
||||
## Files changed
|
||||
|
||||
- `tests/test_resource_cache.py` — add
|
||||
`test_moc_reentry_during_teardown` xfail test fn
|
||||
|
||||
## Human edits
|
||||
|
||||
None — committed as generated (pending user review).
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
session: (ad-hoc, not tracked via conf.toml)
|
||||
timestamp: 2026-04-06T22:26:48Z
|
||||
git_ref: f086222
|
||||
scope: code
|
||||
substantive: true
|
||||
raw_file: 20260406T222648Z_f086222_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
User applied the per-key locking patch (`f086222d`),
|
||||
removed the `@pytest.mark.xfail` from
|
||||
`test_moc_reentry_during_teardown`, and found the
|
||||
test STILL fails — now with `RuntimeError` instead
|
||||
of `AssertionError`. Asked to diagnose why the fix
|
||||
doesn't actually resolve this race.
|
||||
|
||||
## Response summary
|
||||
|
||||
Traced the exact scheduling sequence across three
|
||||
concurrent tasks to show that the per-`ctx_key`
|
||||
locking change doesn't close the fundamental gap:
|
||||
|
||||
- `maybe_open_context`'s finally pops the lock and
|
||||
sets `no_more_users` in the **exiting caller's
|
||||
task**
|
||||
- But the actual resource cleanup (`values.pop`,
|
||||
acm `__aexit__`, `resources.pop`) runs in the
|
||||
**`run_ctx` task** inside `service_tn`
|
||||
- A re-entering task creates a fresh lock (old one
|
||||
popped), acquires immediately, and hits stale
|
||||
`resources[ctx_key]` while `run_ctx` is still in
|
||||
`__aexit__`
|
||||
|
||||
Identified three possible fix strategies:
|
||||
1. Let `run_ctx` own lock release/pop after
|
||||
`resources.pop()`
|
||||
2. Add a "teardown complete" event that new callers
|
||||
wait on
|
||||
3. Move `resources.pop()` into `run_ctx`'s inner
|
||||
finally (before acm `__aexit__`)
|
||||
|
||||
## Files changed
|
||||
|
||||
No code changes — analysis only.
|
||||
|
||||
## Human edits
|
||||
|
||||
N/A — diagnostic output, no code generated.
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
timestamp: 2026-04-06T22:26:48Z
|
||||
git_ref: f086222
|
||||
diff_cmd: git diff f086222~1..f086222
|
||||
---
|
||||
|
||||
Here's the problem. The per-key locking doesn't fix this race because the **lock lifetime is decoupled from `run_ctx`'s lifetime**. Trace through what happens:
|
||||
|
||||
**`maybe_open_context` finally** (lines 468-495):
|
||||
1. `users[ctx_key] -= 1` → 0
|
||||
2. `no_more_users.set()` → wakes `run_ctx` (but it doesn't run yet, just becomes ready)
|
||||
3. `_Cache.locks.pop(ctx_key)` → **lock is gone**
|
||||
4. `maybe_open_context` exits
|
||||
|
||||
**`run_ctx`** now gets scheduled (lines 237-241):
|
||||
5. Inner finally: `values.pop(ctx_key)` → values gone
|
||||
6. `mng.__aexit__()` runs → `in_aexit.set()`, `await trio.sleep(10)` → **CHECKPOINT**
|
||||
|
||||
**`reenter_during_teardown`** wakes:
|
||||
7. Enters `maybe_open_context` → no lock exists for `ctx_key` → creates **fresh** lock, acquires immediately
|
||||
8. `values[ctx_key]` → `KeyError`
|
||||
9. `resources.get(ctx_key)` → **still exists!** (step 6 hasn't reached `resources.pop()` in `run_ctx`'s outer finally yet)
|
||||
10. `raise RuntimeError('Caching resources ALREADY exist?!')`
|
||||
|
||||
The core issue: `no_more_users.set()` (step 2) and `locks.pop()` (step 3) happen in the **exiting caller's task**, but the actual resource cleanup (`values.pop`, acm `__aexit__`, `resources.pop`) happens in the **`run_ctx` task** inside `service_tn`. There's no synchronization between "lock removed, new callers welcome" and "run_ctx has actually finished tearing down". The per-`ctx_key` lock change just made the lock more granular — it didn't close this gap.
|
||||
|
||||
The fix needs to ensure a new caller can't acquire a lock for `ctx_key` until `run_ctx` has fully exited (including `resources.pop()`). Either:
|
||||
- **don't pop the lock in `maybe_open_context`** — let `run_ctx` release/pop it after `resources.pop()`
|
||||
- **wait on a "teardown complete" event** in the cache-miss path when stale `resources` exist
|
||||
- **pop `resources` before the acm `__aexit__`** (move `resources.pop()` into the inner finally alongside `values.pop()`)
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
session: (ad-hoc, not tracked via conf.toml)
|
||||
timestamp: 2026-04-13T19:21:16Z
|
||||
git_ref: f851f28
|
||||
scope: code
|
||||
substantive: true
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
User provided a detailed implementation plan for
|
||||
a new `test_tpt_bind_addrs.py` test suite covering
|
||||
`open_root_actor(tpt_bind_addrs=...)` — the three
|
||||
runtime code paths in `_root.py:385-450`:
|
||||
|
||||
1. Non-registrar, no explicit bind -> random addrs
|
||||
2. Registrar, no explicit bind -> registry_addrs
|
||||
3. Explicit bind given -> `wrap_address()` + merge
|
||||
|
||||
Plan specified 6 test functions (~10 parametrized
|
||||
variants), predicted a type-mixing bug at line 446,
|
||||
and asked for an in-flight fix if confirmed.
|
||||
|
||||
## Response summary
|
||||
|
||||
Created `tests/discovery/test_tpt_bind_addrs.py`
|
||||
with 9 collected test variants across 6 functions:
|
||||
|
||||
- `test_registrar_root_tpt_bind_addrs` (3 variants:
|
||||
`bind-eq-reg`, `bind-subset-reg`,
|
||||
`bind-disjoint-reg`)
|
||||
- `test_non_registrar_root_tpt_bind_addrs`
|
||||
(2 variants: `bind-same-bindspace`,
|
||||
`bind-disjoint`)
|
||||
- `test_non_registrar_default_random_bind`
|
||||
(baseline, no explicit bind)
|
||||
- `test_tpt_bind_addrs_as_maddr_str`
|
||||
(multiaddr string input)
|
||||
- `test_registrar_merge_binds_union`
|
||||
(registrar + disjoint bind -> union)
|
||||
- `test_open_nursery_forwards_tpt_bind_addrs`
|
||||
(`open_nursery(**kwargs)` forwarding)
|
||||
|
||||
Confirmed and fixed the predicted bug at
|
||||
`_root.py:446`: the registrar merge path mixed
|
||||
`Address` objects (`tpt_bind_addrs`) with raw tuples
|
||||
(`uw_reg_addrs`) inside `set()`, preventing
|
||||
deduplication and causing double-bind `OSError`.
|
||||
|
||||
Fix: wrap `uw_reg_addrs` before the set union:
|
||||
```python
|
||||
# before (broken)
|
||||
tpt_bind_addrs = list(set(
|
||||
tpt_bind_addrs + uw_reg_addrs
|
||||
))
|
||||
# after (fixed)
|
||||
tpt_bind_addrs = list(set(
|
||||
tpt_bind_addrs
|
||||
+ [wrap_address(a) for a in uw_reg_addrs]
|
||||
))
|
||||
```
|
||||
|
||||
All 9 tests pass after the fix.
|
||||
|
||||
## Files changed
|
||||
|
||||
- `tests/discovery/test_tpt_bind_addrs.py` (new)
|
||||
- `tractor/_root.py:446` (bug fix, 1 line)
|
||||
|
||||
## Human edits
|
||||
|
||||
N/A — pending review.
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
session: 76154e65-d8e1-4b5f-9275-0ea45ba7e98a
|
||||
timestamp: 2026-04-13T20:50:48Z
|
||||
git_ref: 269d939c
|
||||
scope: code
|
||||
substantive: true
|
||||
raw_file: 20260413T205048Z_269d939c_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
Implement a `parse_endpoints()` API in
|
||||
`tractor.discovery._multiaddr` that lets downstream
|
||||
projects declare per-actor transport bind addresses
|
||||
as a service table of actor-name -> multiaddr
|
||||
strings (loaded from config, e.g. TOML `[network]`
|
||||
section). Add type aliases `EndpointsTable` and
|
||||
`ParsedEndpoints`, plus 7 unit tests covering TCP,
|
||||
UDS, tuples, mixed input, unsupported protocols, and
|
||||
edge cases.
|
||||
|
||||
A detailed implementation plan was provided
|
||||
specifying insertion points, type signatures, test
|
||||
names, and verification commands.
|
||||
|
||||
## Response summary
|
||||
|
||||
Add `parse_endpoints()` function with two supporting
|
||||
type aliases to `_multiaddr.py`. The function
|
||||
iterates a `dict[str, list[str|tuple]]` service
|
||||
table and delegates each entry to the existing
|
||||
`wrap_address()` helper, which already handles maddr
|
||||
strings, raw tuples, and pre-wrapped `Address`
|
||||
objects. Seven pure unit tests (no trio runtime)
|
||||
validate all input variants and edge cases. All 22
|
||||
multiaddr tests pass.
|
||||
|
||||
## Files changed
|
||||
|
||||
- `tractor/discovery/_multiaddr.py` — add
|
||||
`EndpointsTable`, `ParsedEndpoints` type aliases
|
||||
and `parse_endpoints()` function after line 133
|
||||
- `tests/discovery/test_multiaddr.py` — add
|
||||
`parse_endpoints` import and 7 new test functions
|
||||
|
||||
## Human edits
|
||||
|
||||
None — committed as generated.
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
timestamp: 2026-04-13T20:50:48Z
|
||||
git_ref: 269d939c
|
||||
diff_cmd: git diff 269d939c~1..269d939c
|
||||
---
|
||||
|
||||
## Implementation output
|
||||
|
||||
### `tractor/discovery/_multiaddr.py`
|
||||
|
||||
> `git diff 269d939c~1..269d939c -- tractor/discovery/_multiaddr.py`
|
||||
|
||||
Type aliases `EndpointsTable` and `ParsedEndpoints`
|
||||
plus `parse_endpoints()` helper that wraps each
|
||||
entry via `wrap_address()`.
|
||||
|
||||
### `tests/discovery/test_multiaddr.py`
|
||||
|
||||
> `git diff 269d939c~1..269d939c -- tests/discovery/test_multiaddr.py`
|
||||
|
||||
Add import of `parse_endpoints` to existing import
|
||||
block, then 7 new test functions.
|
||||
|
||||
### Test run output
|
||||
|
||||
```
|
||||
22 passed, 1 warning in 0.05s
|
||||
```
|
||||
|
||||
All 22 tests pass (15 existing + 7 new).
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
session: multiaddr-support-rename-prefer
|
||||
timestamp: 2026-04-14T16:33:00Z
|
||||
git_ref: befedc49
|
||||
scope: code
|
||||
substantive: true
|
||||
raw_file: 20260414T163300Z_befedc49_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
Create a helper function that determines the best transport given
|
||||
actor locality (distributed vs same host). Use PID/hostname
|
||||
comparison for locality detection, apply at registry addr selection
|
||||
only (not spawn-time).
|
||||
|
||||
## Response summary
|
||||
|
||||
New `prefer_addr()` + `_is_local_addr()` helpers
|
||||
in `_api.py` using `socket.getaddrinfo()` and
|
||||
`ipaddress` for PID/hostname locality detection.
|
||||
Preference: UDS > local TCP > remote TCP.
|
||||
Integrated into `query_actor()` and
|
||||
`wait_for_actor()`. Also changed
|
||||
`Registrar.find_actor()` to return full addr list
|
||||
so callers can apply preference.
|
||||
|
||||
## Files changed
|
||||
|
||||
- `tractor/discovery/_discovery.py` → `_api.py`
|
||||
— renamed + added `prefer_addr()`,
|
||||
`_is_local_addr()`; updated `query_actor()` and
|
||||
`wait_for_actor()` call sites
|
||||
- `tractor/discovery/_registry.py`
|
||||
— `Registrar.find_actor()` returns
|
||||
`list[UnwrappedAddress]|None`
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
---
|
||||
model: claude-opus-4-6
|
||||
service: claude
|
||||
timestamp: 2026-04-14T16:33:00Z
|
||||
git_ref: befedc49
|
||||
diff_cmd: git diff befedc49~1..befedc49
|
||||
---
|
||||
|
||||
### `tractor/discovery/_api.py`
|
||||
|
||||
> `git diff befedc49~1..befedc49 -- tractor/discovery/_api.py`
|
||||
|
||||
Add `_is_local_addr()` and `prefer_addr()` transport
|
||||
preference helpers.
|
||||
|
||||
#### `_is_local_addr(addr: Address) -> bool`
|
||||
|
||||
Determines whether an `Address` is reachable on the
|
||||
local host:
|
||||
|
||||
- `UDSAddress`: always returns `True`
|
||||
(filesystem-bound, inherently local)
|
||||
- `TCPAddress`: checks if `._host` is a loopback IP
|
||||
via `ipaddress.ip_address().is_loopback`, then
|
||||
falls back to comparing against the machine's own
|
||||
interface IPs via
|
||||
`socket.getaddrinfo(socket.gethostname(), None)`
|
||||
|
||||
#### `prefer_addr(addrs: list[UnwrappedAddress]) -> UnwrappedAddress`
|
||||
|
||||
Selects the "best" transport address from a
|
||||
multihomed actor's address list. Wraps each
|
||||
candidate via `wrap_address()` to get typed
|
||||
`Address` objects, then classifies into three tiers:
|
||||
|
||||
1. **UDS** (same-host guaranteed, lowest overhead)
|
||||
2. **TCP loopback / same-host IP** (local network)
|
||||
3. **TCP remote** (only option for distributed)
|
||||
|
||||
Within each tier, the last-registered (latest) entry
|
||||
is preferred. Falls back to `addrs[-1]` if no
|
||||
heuristic matches.
|
||||
|
||||
### `tractor/discovery/_registry.py`
|
||||
|
||||
> `git diff befedc49~1..befedc49 -- tractor/discovery/_registry.py`
|
||||
|
||||
`Registrar.find_actor()` return type broadened from
|
||||
single addr to `list[UnwrappedAddress]|None` — full
|
||||
addr list lets callers apply transport preference.
|
||||
|
||||
#### Integration
|
||||
|
||||
`query_actor()` and `wait_for_actor()` now call
|
||||
`prefer_addr(addrs)` instead of `addrs[-1]`.
|
||||
|
||||
### Verification
|
||||
|
||||
All discovery tests pass (13/13 non-daemon).
|
||||
`test_local.py` and `test_multi_program.py` also
|
||||
pass (daemon fixture teardown failures are
|
||||
pre-existing and unrelated).
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
session: subints-spawner-design-kickoff
|
||||
timestamp: 2026-04-17T03:49:18Z
|
||||
git_ref: 9703210
|
||||
scope: docs
|
||||
substantive: true
|
||||
raw_file: 20260417T034918Z_9703210_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
Drive the "first big boi, from GH issue" task seeded by
|
||||
`ai/prompt-io/prompts/subints_spawner.md`: design, plan
|
||||
and implement sub-interpreter (subint) spawn-backend
|
||||
support per issue #379, including (1) modularizing
|
||||
`tractor.spawn._spawn` into per-backend submods, (2) a new
|
||||
`._subint` backend, and (3) harness parametrization via the
|
||||
existing `--spawn-backend` / `start_method` pytest fixture
|
||||
in `tractor._testing.pytest`.
|
||||
|
||||
Follow-up clarifications from the user (this turn):
|
||||
1. Pin `<3.15` on this dev branch and feature-gate subint
|
||||
tests — chose option (a).
|
||||
2. Split Phase A (modularization) into its own PR first.
|
||||
3. Defer the `fork()`-via-subint hack to a follow-up.
|
||||
4. Harness flag is `pytest --spawn-backend <key>` CLI →
|
||||
`start_method` fixture (prompt file updated to match).
|
||||
|
||||
## Response summary
|
||||
|
||||
Produced a three-phase plan and a concrete Phase A (pure
|
||||
modularization) file-split plan for user review; no code
|
||||
written yet — the green-light to start Phase A was given
|
||||
in this same turn conditional on logging this prompt-io
|
||||
entry first.
|
||||
|
||||
Phases:
|
||||
- **A — modularize** `tractor/spawn/_spawn.py` (847 LOC):
|
||||
keep generic machinery in `_spawn.py`, extract
|
||||
`trio_proc` → `spawn/_trio.py`, `mp_proc` →
|
||||
`spawn/_mp.py`. No pin bump.
|
||||
- **B — `_subint` backend**: bump `pyproject.toml`
|
||||
`requires-python` upper to `<3.15`; add `'subint'` to
|
||||
`SpawnMethodKey`; reuse existing UDS transport; shm
|
||||
escape-hatch deferred.
|
||||
- **C — harness**: drive the valid-backend tuple in
|
||||
`tractor/_testing/pytest.py:345-349` from
|
||||
`typing.get_args(SpawnMethodKey)`; skip subint tests on
|
||||
Python < 3.14.
|
||||
|
||||
Key findings surfaced to the user:
|
||||
- `pyproject.toml:12` currently pins `<3.14`; PEP 734
|
||||
`concurrent.interpreters` only ships in 3.14 — the
|
||||
load-bearing constraint.
|
||||
- `_testing/pytest.py:345-349` hardcodes valid backends
|
||||
as a string tuple (`'mp_spawn'`, `'mp_forkserver'`,
|
||||
`'trio'`) — should be `get_args(SpawnMethodKey)`.
|
||||
- `_testing/pytest.py:228` already imports
|
||||
`try_set_start_method` from `tractor.spawn._spawn` —
|
||||
keeping the `_spawn.py` path as the "core" module
|
||||
avoids breaking external refs during Phase A.
|
||||
|
||||
Phase A commit breakdown (each green via `/run-tests`):
|
||||
1. Drive `_testing/pytest.py` backend-tuple from
|
||||
`get_args(SpawnMethodKey)` — prep.
|
||||
2. Extract `trio_proc` → `spawn/_trio.py`; `_methods`
|
||||
registry imports it.
|
||||
3. Extract `mp_proc` → `spawn/_mp.py`; `_methods` imports
|
||||
it.
|
||||
4. Tidy `spawn/__init__.py` docstring for the new layout.
|
||||
|
||||
Workflow: Phase A runs in a new worktree via `/open-wkt
|
||||
spawn_modularize` (branch `wkt/spawn_modularize`) and
|
||||
lands as its own PR before Phase B/C returns to the
|
||||
current `claude_bun_opus4.7_subint_spawner_backend`
|
||||
branch.
|
||||
|
||||
## Files changed
|
||||
|
||||
None this turn — design/planning only. The user updated
|
||||
`ai/prompt-io/prompts/subints_spawner.md` themselves to
|
||||
complete the truncated sentence on line 32 (harness-flag
|
||||
reference).
|
||||
|
||||
Upcoming (Phase A, next turn):
|
||||
- `tractor/spawn/_spawn.py` — trimmed to "core"
|
||||
- `tractor/spawn/_trio.py` — new; holds `trio_proc()`
|
||||
- `tractor/spawn/_mp.py` — new; holds `mp_proc()`
|
||||
- `tractor/spawn/__init__.py` — docstring refresh
|
||||
- `tractor/_testing/pytest.py` — backend tuple from
|
||||
`get_args(SpawnMethodKey)`
|
||||
|
||||
## Human edits
|
||||
|
||||
None — committed as generated. User completed the
|
||||
truncated sentence on line 32 of
|
||||
`ai/prompt-io/prompts/subints_spawner.md` themselves
|
||||
*before* this prompt-io log was written; that edit was
|
||||
the user's authoring, not an edit to AI output.
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
timestamp: 2026-04-17T03:49:18Z
|
||||
git_ref: 9703210
|
||||
diff_cmd: git diff HEAD~1..HEAD
|
||||
---
|
||||
|
||||
No code was produced in this turn — this is a design/planning
|
||||
exchange for subint spawner support per issue #379. Captured
|
||||
verbatim below are the two substantive AI responses, which
|
||||
together fix the phased plan (A: modularize, B: subint, C:
|
||||
harness) and the concrete Phase A file split before any
|
||||
implementation begins.
|
||||
|
||||
## Turn 1 — initial triage + gating questions
|
||||
|
||||
> Read: issue #379, current `spawn/_spawn.py` (847 lines),
|
||||
> and pyproject pins.
|
||||
>
|
||||
> ## Task summary
|
||||
> Design/plan/implement subint backend per #379, plus
|
||||
> modularize `spawn/_spawn.py` into per-backend submods, plus
|
||||
> harness flag for test parametrization.
|
||||
>
|
||||
> ## Key finding
|
||||
> `pyproject.toml:12` pins `requires-python = ">=3.12, <3.14"`
|
||||
> — but `concurrent.interpreters` / PEP 734 **only lands in
|
||||
> 3.14**. This is the load-bearing design question before
|
||||
> coding.
|
||||
>
|
||||
> ## Proposed phasing (incremental commits)
|
||||
> - **Phase A — modularize** `_spawn.py`: extract `trio_proc`
|
||||
> → `spawn/_trio.py`, `mp_proc` → `spawn/_mp.py`, keep
|
||||
> generic helpers (`hard_kill`/`soft_kill`/`new_proc`
|
||||
> dispatcher/`SpawnMethodKey`) in a slim core. Tests green
|
||||
> at each step.
|
||||
> - **Phase B — `_subint` backend**: extend `SpawnMethodKey`
|
||||
> with `'subint'`; reuse UDS transport by default; shm is
|
||||
> escape-hatch follow-up.
|
||||
> - **Phase C — harness**: add backend-parametrize flag, skip
|
||||
> subint when Python < 3.14.
|
||||
>
|
||||
> ## Gating questions before I start
|
||||
> 1. **Python pin**: bump `<3.15` and feature-gate subint
|
||||
> tests (a), require 3.14 outright (b), or backport shim
|
||||
> (c)? I'd recommend (a).
|
||||
> 2. **PR strategy**: one branch, commits per phase (current
|
||||
> `claude_bun_opus4.7_subint_spawner_backend`)? Or split
|
||||
> Phase A as its own PR first?
|
||||
> 3. **`fork()`-via-subint hack** from the issue's "Our own
|
||||
> thoughts" section — v1 or follow-up? I'd defer to
|
||||
> follow-up.
|
||||
> 4. **Prompt file line 32 is truncated** — what's the
|
||||
> existing harness flag name you had in mind ("using the
|
||||
> existing ___")?
|
||||
>
|
||||
> Which of these are already decided vs. open?
|
||||
|
||||
## User answers (for the raw record)
|
||||
|
||||
1. bump `<3.15`, feature-gate — option (a).
|
||||
2. split Phase A as its own PR first.
|
||||
3. defer `fork()`-via-subint hack.
|
||||
4. harness flag is `--spawn-backend` CLI → `start_method`
|
||||
fixture, per `tractor._testing._pytest`. Prompt file
|
||||
updated accordingly.
|
||||
|
||||
## Turn 2 — concrete Phase A plan
|
||||
|
||||
> Good — prompt file now ends cleanly; harness flag
|
||||
> confirmed as `--spawn-backend` CLI → `start_method`
|
||||
> fixture (via `_testing/pytest.py:193,355`). Also noted
|
||||
> `pytest.py:345-349` hardcodes valid backends as a string
|
||||
> tuple — `typing.get_args(SpawnMethodKey)` should drive
|
||||
> that list.
|
||||
>
|
||||
> ## Phase A — concrete plan (separate PR)
|
||||
>
|
||||
> **File split** in `tractor/spawn/`:
|
||||
> - `_spawn.py` stays as "core": keeps `SpawnMethodKey`,
|
||||
> `_spawn_method`, `proc_waiter`, `try_set_start_method`,
|
||||
> `exhaust_portal`, `cancel_on_completion`, `hard_kill`,
|
||||
> `soft_kill`, `new_proc` dispatcher, `_methods`
|
||||
> registry. Kept at this path so `_testing/pytest.py:228`
|
||||
> and other external imports don't break.
|
||||
> - `_trio.py` ← `trio_proc()` (lines 448–671, ~224 lines)
|
||||
> - `_mp.py` ← `mp_proc()` (lines 672–842, ~171 lines)
|
||||
> - `_methods` dict imports the two backend fns from the
|
||||
> new modules.
|
||||
>
|
||||
> **Commits** (each green via `/run-tests`):
|
||||
> 1. Drive `_testing/pytest.py` backend-tuple from
|
||||
> `get_args(SpawnMethodKey)` — tiny prep.
|
||||
> 2. Extract `trio_proc` → `spawn/_trio.py`; `_methods`
|
||||
> imports it.
|
||||
> 3. Extract `mp_proc` → `spawn/_mp.py`; `_methods`
|
||||
> imports it.
|
||||
> 4. Tidy `spawn/__init__.py` docstring to describe new
|
||||
> layout.
|
||||
>
|
||||
> No pyproject pin bump in Phase A (that's Phase B).
|
||||
>
|
||||
> **Workflow**: I'd run this in a new worktree via
|
||||
> `/open-wkt spawn_modularize` (branch
|
||||
> `wkt/spawn_modularize`), then open a PR before returning
|
||||
> to the current `claude_bun_opus4.7_subint_spawner_backend`
|
||||
> branch for Phase B/C.
|
||||
>
|
||||
> Green-light to `/open-wkt` and start Phase A?
|
||||
|
||||
## User final response
|
||||
|
||||
> all this first then yes, green light.
|
||||
|
||||
— log this `/prompt-io` entry first, then proceed with
|
||||
Phase A in a worktree.
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
session: subints-spawner-phase-a-impl
|
||||
timestamp: 2026-04-17T03:58:05Z
|
||||
git_ref: 61a73ba
|
||||
scope: code
|
||||
substantive: true
|
||||
raw_file: 20260417T035800Z_61a73ba_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
Follow-up from the design session logged in
|
||||
`20260417T034918Z_9703210_prompt_io.md`: the user
|
||||
greenlit Phase A after `/prompt-io` tracking, so
|
||||
execute the Phase A "modularize `spawn._spawn`" plan
|
||||
end-to-end in a new worktree — four logical steps
|
||||
(pytest harness prep, `trio_proc` extraction,
|
||||
`mp_proc` extraction, docstring refresh), each green
|
||||
via `/run-tests`.
|
||||
|
||||
User directives during execution:
|
||||
- **Option 3** for the test cadence ("continue A.2–A.4
|
||||
first, then run the full suite once at the end of
|
||||
Phase A").
|
||||
- **One commit** for the whole phase ("can't we just
|
||||
commit the whole patch in one commit?") instead of
|
||||
the 3/4-commit split I initially proposed.
|
||||
- **Don't pre-draft** commit messages — wait for the
|
||||
user to invoke `/commit-msg` (captured as feedback
|
||||
memory `feedback_no_auto_draft_commit_msgs.md`).
|
||||
|
||||
## Response summary
|
||||
|
||||
Produced the cohesive Phase A modularization patch,
|
||||
landed as commit `61a73bae` (subject: `Mv
|
||||
trio_proc`/`mp_proc` to per-backend submods`). Five
|
||||
files changed, +565 / -418 lines.
|
||||
|
||||
Key pieces of the patch (generated by claude,
|
||||
reviewed by the human before commit):
|
||||
- `tractor/spawn/_trio.py` — **new**; receives
|
||||
`trio_proc()` verbatim from `_spawn.py`; imports
|
||||
cross-backend helpers back from `._spawn`.
|
||||
- `tractor/spawn/_mp.py` — **new**; receives
|
||||
`mp_proc()` verbatim; uses `from . import _spawn`
|
||||
for late-binding access to the mutable `_ctx` /
|
||||
`_spawn_method` globals (design decision made
|
||||
during impl, not the original plan).
|
||||
- `tractor/spawn/_spawn.py` — shrunk 847 → 448 LOC;
|
||||
import pruning; bottom-of-module late imports for
|
||||
`trio_proc` / `mp_proc` with a one-line comment
|
||||
explaining the circular-dep reason.
|
||||
- `tractor/spawn/__init__.py` — docstring refresh
|
||||
describing the new layout.
|
||||
- `tractor/_testing/pytest.py` — the valid-backend
|
||||
set now comes from `typing.get_args(SpawnMethodKey)`
|
||||
so future additions (`'subint'`) don't need harness
|
||||
edits.
|
||||
|
||||
## Files changed
|
||||
|
||||
See `git diff 61a73ba~1..61a73ba --stat`:
|
||||
|
||||
```
|
||||
tractor/_testing/pytest.py | 12 +-
|
||||
tractor/spawn/__init__.py | 31 +++-
|
||||
tractor/spawn/_mp.py | 235 ++++++++++++++++++++++++
|
||||
tractor/spawn/_spawn.py | 413 +-------------------------------
|
||||
tractor/spawn/_trio.py | 292 ++++++++++++++++++++++++++++
|
||||
5 files changed, 565 insertions(+), 418 deletions(-)
|
||||
```
|
||||
|
||||
Validation:
|
||||
- import probe + `_methods` wiring check — OK
|
||||
- spawn-relevant test subset — 37 passed, 1 skipped
|
||||
- full suite — 350 passed, 14 skipped, 7 xfailed, 1
|
||||
xpassed
|
||||
|
||||
## Human edits
|
||||
|
||||
None — committed as generated by claude (no diff
|
||||
between `.claude/git_commit_msg_LATEST.md` and the
|
||||
committed body, as far as the assistant could
|
||||
observe).
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
timestamp: 2026-04-17T03:58:05Z
|
||||
git_ref: 61a73ba
|
||||
diff_cmd: git diff 61a73ba~1..61a73ba
|
||||
---
|
||||
|
||||
Code generated in this turn was committed verbatim as
|
||||
`61a73bae` ("Mv `trio_proc`/`mp_proc` to per-backend
|
||||
submods"). Per diff-ref mode, per-file code is captured
|
||||
via the pointers below, each followed by a prose
|
||||
summary of what the AI generated. Non-code output
|
||||
(sanity-check results, design rationale) is included
|
||||
verbatim.
|
||||
|
||||
## Per-file generated content
|
||||
|
||||
### `tractor/spawn/_trio.py` (new, 292 lines)
|
||||
|
||||
> `git diff 61a73ba~1..61a73ba -- tractor/spawn/_trio.py`
|
||||
|
||||
Pure lift-and-shift of `trio_proc()` out of
|
||||
`tractor/spawn/_spawn.py` (previously lines 448–670).
|
||||
Added AGPL header + module docstring describing the
|
||||
backend; imports include local `from ._spawn import
|
||||
cancel_on_completion, hard_kill, soft_kill` which
|
||||
creates the bottom-of-module late-import pattern in
|
||||
the core file to avoid a cycle. All call sites,
|
||||
log-format strings, and body logic are byte-identical
|
||||
to the originals — no semantic change.
|
||||
|
||||
### `tractor/spawn/_mp.py` (new, 235 lines)
|
||||
|
||||
> `git diff 61a73ba~1..61a73ba -- tractor/spawn/_mp.py`
|
||||
|
||||
Pure lift-and-shift of `mp_proc()` out of
|
||||
`tractor/spawn/_spawn.py` (previously lines 672–842).
|
||||
Same AGPL header convention. Key difference from
|
||||
`_trio.py`: uses `from . import _spawn` (module
|
||||
import, not from-import) for `_ctx` and
|
||||
`_spawn_method` references — these are mutated at
|
||||
runtime by `try_set_start_method()`, so late binding
|
||||
via `_spawn._ctx` / `_spawn._spawn_method` is required
|
||||
for correctness. Also imports `cancel_on_completion`,
|
||||
`soft_kill`, `proc_waiter` from `._spawn`.
|
||||
|
||||
### `tractor/spawn/_spawn.py` (modified, 847 → 448 LOC)
|
||||
|
||||
> `git diff 61a73ba~1..61a73ba -- tractor/spawn/_spawn.py`
|
||||
|
||||
- removed `trio_proc()` body (moved to `_trio.py`)
|
||||
- removed `mp_proc()` body (moved to `_mp.py`)
|
||||
- pruned imports now unused in core: `sys`,
|
||||
`is_root_process`, `current_actor`,
|
||||
`is_main_process`, `_mp_main`, `ActorFailure`,
|
||||
`pretty_struct`, `_pformat`
|
||||
- added bottom-of-file late imports
|
||||
`from ._trio import trio_proc` and
|
||||
`from ._mp import mp_proc` with a one-line
|
||||
comment explaining why (circular dep)
|
||||
- `_methods` dict unchanged structurally; still binds
|
||||
`'trio' → trio_proc`, `'mp_spawn' → mp_proc`,
|
||||
`'mp_forkserver' → mp_proc`
|
||||
|
||||
### `tractor/spawn/__init__.py` (modified)
|
||||
|
||||
> `git diff 61a73ba~1..61a73ba -- tractor/spawn/__init__.py`
|
||||
|
||||
Docstring rewrite only — no code. New text describes
|
||||
the per-backend `Layout` with `- ._spawn` (core),
|
||||
`- ._trio` (default), `- ._mp` (two mp variants), and
|
||||
reiterates the "no eager import" NOTE.
|
||||
|
||||
### `tractor/_testing/pytest.py` (modified)
|
||||
|
||||
> `git diff 61a73ba~1..61a73ba -- tractor/_testing/pytest.py`
|
||||
|
||||
Two-line logical change (plus a comment):
|
||||
- `from typing import` grows `get_args`
|
||||
- `pytest_generate_tests()` replaces the hardcoded
|
||||
`('mp_spawn','mp_forkserver','trio')` tuple with
|
||||
`get_args(SpawnMethodKey)` via lazy import
|
||||
`from tractor.spawn._spawn import SpawnMethodKey`
|
||||
inside the function body (matching the project's
|
||||
existing lazy-import convention for the spawn
|
||||
module)
|
||||
|
||||
## Non-code output (verbatim)
|
||||
|
||||
### Design rationale for the file split
|
||||
|
||||
From my Phase A plan turn (already captured in
|
||||
`20260417T034918Z_9703210_prompt_io.md`):
|
||||
- `_spawn.py` stays as the "core" rather than renamed
|
||||
to `_core.py` so external imports
|
||||
(`_testing/pytest.py:228` imports
|
||||
`try_set_start_method` from `tractor.spawn._spawn`)
|
||||
keep working without churn.
|
||||
- Per-backend extraction chosen over alternatives
|
||||
(e.g. splitting generic helpers further) because
|
||||
the immediate motivation is hosting a 3rd
|
||||
`_subint.py` sibling cleanly in Phase B.
|
||||
|
||||
### Sanity-check output (verbatim terminal excerpts)
|
||||
|
||||
Post-extraction import probe:
|
||||
```
|
||||
extraction OK
|
||||
_methods: {'trio': 'tractor.spawn._trio.trio_proc',
|
||||
'mp_spawn': 'tractor.spawn._mp.mp_proc',
|
||||
'mp_forkserver': 'tractor.spawn._mp.mp_proc'}
|
||||
```
|
||||
|
||||
Spawn-relevant test subset (`tests/test_local.py
|
||||
test_rpc.py test_spawning.py test_multi_program.py
|
||||
test_discovery.py`):
|
||||
```
|
||||
37 passed, 1 skipped, 14 warnings in 55.37s
|
||||
```
|
||||
|
||||
Full suite:
|
||||
```
|
||||
350 passed, 14 skipped, 7 xfailed, 1 xpassed,
|
||||
151 warnings in 437.73s (0:07:17)
|
||||
```
|
||||
|
||||
No regressions vs. `main`. One transient `-x`
|
||||
early-stop `ERROR` on
|
||||
`test_close_channel_explicit_remote_registrar[trio-True]`
|
||||
was flaky (passed solo, passed without `-x`), not
|
||||
caused by this refactor.
|
||||
|
||||
### Commit message
|
||||
|
||||
Also AI-drafted (via `/commit-msg`) — the 40-line
|
||||
message on commit `61a73bae` itself. Not reproduced
|
||||
here; see `git log -1 61a73bae`.
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
session: subints-spawner-phase-b1-impl
|
||||
timestamp: 2026-04-17T12:44:37Z
|
||||
git_ref: 5cd6df5
|
||||
scope: code
|
||||
substantive: true
|
||||
raw_file: 20260417T124437Z_5cd6df5_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
Continuing the Phase B work from the design sessions
|
||||
logged in `20260417T034918Z_9703210_prompt_io.md` and
|
||||
the Phase A impl in `20260417T035800Z_61a73ba_...`.
|
||||
|
||||
User direction at this step:
|
||||
- "ok now let's continue with the subints part of the
|
||||
plan ya?" → kick off Phase B.
|
||||
- After I proposed a 4-sub-phase breakdown (B.1
|
||||
scaffolding, B.2 min-viable spawn, B.3 lifecycle,
|
||||
B.4 test enablement), user: "ok can we continue
|
||||
with B" → approving the phasing and design
|
||||
defaults (thread-per-subint, UDS default,
|
||||
code-string bootstrap via `interp.exec()`).
|
||||
- User switched the worktree to
|
||||
`subint_spawner_backend` (instead of opening a
|
||||
fresh worktree as I'd proposed): "i already
|
||||
switched branches in the wkt, you should work off
|
||||
`subint_spawner_backend` now".
|
||||
|
||||
Scope of this turn: **B.1 scaffolding only** —
|
||||
wire the `'subint'` name through the spawn-method
|
||||
registry + harness, add a feature-detected stub
|
||||
module, bump pyproject. No actual spawn flow.
|
||||
|
||||
## Response summary
|
||||
|
||||
Produced the Phase B.1 scaffolding patch, landed as
|
||||
commit `5cd6df58` (subject: `Add `'subint'` spawn
|
||||
backend scaffold (#379)`). Four files changed,
|
||||
+124 / -2 lines.
|
||||
|
||||
Key pieces (all generated by claude, reviewed by
|
||||
human before commit):
|
||||
- `tractor/spawn/_subint.py` — **new**; feature-
|
||||
detects `concurrent.interpreters`; `subint_proc()`
|
||||
stub raises `RuntimeError` on py<3.14 or
|
||||
`NotImplementedError` with issue-#379 URL on
|
||||
py≥3.14. Signature mirrors `trio_proc`/`mp_proc`
|
||||
so B.2 can drop the impl in without touching
|
||||
`_methods`.
|
||||
- `tractor/spawn/_spawn.py` — adds `'subint'` to
|
||||
`SpawnMethodKey`, grows a `case 'subint'` arm in
|
||||
`try_set_start_method()` with feature-gate, re-
|
||||
imports `sys` for the gate-error msg, adds late
|
||||
`from ._subint import subint_proc` import and
|
||||
`_methods` entry.
|
||||
- `tractor/_testing/pytest.py` — converts the
|
||||
gate-error into `pytest.UsageError` via a
|
||||
`try/except` around `try_set_start_method()` so
|
||||
`--spawn-backend=subint` on py<3.14 prints a
|
||||
clean banner instead of a traceback.
|
||||
- `pyproject.toml` — pin `requires-python` `<3.14`
|
||||
→ `<3.15`, add `3.14` trove classifier.
|
||||
|
||||
## Files changed
|
||||
|
||||
See `git diff 5cd6df5~1..5cd6df5 --stat`:
|
||||
|
||||
```
|
||||
pyproject.toml | 3 +-
|
||||
tractor/_testing/pytest.py | 8 +++-
|
||||
tractor/spawn/_spawn.py | 15 ++++++
|
||||
tractor/spawn/_subint.py | 100 +++++++++++++++++++++++++
|
||||
4 files changed, 124 insertions(+), 2 deletions(-)
|
||||
```
|
||||
|
||||
Validation on py3.13:
|
||||
- registry/gate probe — OK; `_has_subints = False`,
|
||||
`subint_proc()` raises `RuntimeError` as expected.
|
||||
- `try_set_start_method('subint')` — raises cleanly.
|
||||
- `pytest --spawn-backend=subint` — exits with
|
||||
`pytest.UsageError` banner.
|
||||
- Spawn-relevant test subset — 69 passed, 1 skipped.
|
||||
- Full suite on py3.13 deferred (scaffolding is
|
||||
no-op on <3.14).
|
||||
- Full-suite on py3.14 pending user setting up the
|
||||
venv.
|
||||
|
||||
## Human edits
|
||||
|
||||
None — committed as generated by claude. `uv.lock`
|
||||
was intentionally left unstaged by the user (pin-
|
||||
bump regenerated cp314 wheel entries but they were
|
||||
deferred to a follow-up).
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
timestamp: 2026-04-17T12:44:37Z
|
||||
git_ref: 5cd6df5
|
||||
diff_cmd: git diff 5cd6df5~1..5cd6df5
|
||||
---
|
||||
|
||||
Code generated in this turn was committed verbatim as
|
||||
`5cd6df58` ("Add `'subint'` spawn backend scaffold
|
||||
(#379)"). Per diff-ref mode, per-file code is captured
|
||||
via the pointers below, each followed by a prose
|
||||
summary. Non-code output (sanity-check results,
|
||||
design rationale) is included verbatim.
|
||||
|
||||
## Per-file generated content
|
||||
|
||||
### `tractor/spawn/_subint.py` (new, 100 lines)
|
||||
|
||||
> `git diff 5cd6df5~1..5cd6df5 -- tractor/spawn/_subint.py`
|
||||
|
||||
New scaffolding module for the PEP 734 subinterpreter
|
||||
backend. Contents:
|
||||
- AGPL header + module docstring (describes backend
|
||||
intent, 3.14+ availability gate, and explicit
|
||||
"SCAFFOLDING STUB" status pointing at issue #379).
|
||||
- Top-level `try/except ImportError` wrapping
|
||||
`from concurrent import interpreters as
|
||||
_interpreters` → sets module-global
|
||||
`_has_subints: bool`. This lets the registry stay
|
||||
introspectable on py<3.14 while spawn-time still
|
||||
fails cleanly.
|
||||
- `subint_proc()` coroutine with signature matching
|
||||
`trio_proc`/`mp_proc` exactly (same param names,
|
||||
defaults, and `TaskStatus[Portal]` typing) —
|
||||
intentional so Phase B.2 can drop the impl in
|
||||
without touching `_methods` or changing call-site
|
||||
binding.
|
||||
- Body raises `RuntimeError` on py<3.14 (with
|
||||
`sys.version` printed) or `NotImplementedError`
|
||||
with issue-#379 URL on py≥3.14.
|
||||
|
||||
### `tractor/spawn/_spawn.py` (modified, +15 LOC)
|
||||
|
||||
> `git diff 5cd6df5~1..5cd6df5 -- tractor/spawn/_spawn.py`
|
||||
|
||||
- `import sys` re-added (pruned during Phase A, now
|
||||
needed again for the py-version string in the
|
||||
`'subint'` gate-error).
|
||||
- `SpawnMethodKey = Literal[...]` grows `'subint'` as
|
||||
the 4th member, with inline comment `# py3.14+ via
|
||||
`concurrent.interpreters` (PEP 734)`.
|
||||
- `try_set_start_method()` match-block grows a new
|
||||
`case 'subint':` arm that imports
|
||||
`from ._subint import _has_subints` lazily and
|
||||
raises `RuntimeError` with a multi-line gate msg
|
||||
if unavailable.
|
||||
- Bottom-of-module late-import section grows
|
||||
`from ._subint import subint_proc` alongside the
|
||||
existing `_trio` / `_mp` imports.
|
||||
- `_methods` dict grows `'subint': subint_proc`.
|
||||
|
||||
### `tractor/_testing/pytest.py` (modified, +8 LOC)
|
||||
|
||||
> `git diff 5cd6df5~1..5cd6df5 -- tractor/_testing/pytest.py`
|
||||
|
||||
`pytest_configure()` wraps the
|
||||
`try_set_start_method(backend)` call in a
|
||||
`try/except RuntimeError` that re-raises as
|
||||
`pytest.UsageError(str(err))`. Rationale: the gate
|
||||
error on py<3.14 is legitimately a configuration
|
||||
problem, not a test failure, so pytest's UsageError
|
||||
path (exit code 4) gives a clean single-line banner
|
||||
instead of a traceback.
|
||||
|
||||
### `pyproject.toml` (modified, +2 / -1)
|
||||
|
||||
> `git diff 5cd6df5~1..5cd6df5 -- pyproject.toml`
|
||||
|
||||
- `requires-python` pin relaxed `>=3.12, <3.14` →
|
||||
`>=3.12, <3.15` to admit 3.14 as a valid target.
|
||||
- Added `"Programming Language :: Python :: 3.14"`
|
||||
to the trove classifiers.
|
||||
|
||||
## Non-code output (verbatim)
|
||||
|
||||
### Design choices captured in the prior turn
|
||||
|
||||
(Previously confirmed by the user on April 17 in this
|
||||
session — relevant excerpts captured here for
|
||||
provenance since they drove the shape of the impl):
|
||||
|
||||
1. **Python pin**: `<3.15` + feature-gate the subint
|
||||
backend at spawn time — user answered option (a)
|
||||
from the design triage.
|
||||
2. **Deferred `fork()`-via-subint hack** from issue
|
||||
#379's "Our own thoughts" section.
|
||||
3. **Phase B phasing**: user approved the B.1 / B.2 /
|
||||
B.3 / B.4 breakdown — this commit is strictly B.1
|
||||
(scaffolding only, no spawn-flow impl).
|
||||
4. **Option (B) worktree strategy**: new worktree
|
||||
branched from `wkt/spawn_modularize`. *(Amended by
|
||||
user at runtime: user switched the existing
|
||||
`spawn_modularize` worktree to the
|
||||
`subint_spawner_backend` branch instead.)*
|
||||
|
||||
### Sanity-check output (verbatim terminal excerpts)
|
||||
|
||||
Registry / feature-gate verification on py3.13:
|
||||
```
|
||||
SpawnMethodKey values: ('trio', 'mp_spawn',
|
||||
'mp_forkserver', 'subint')
|
||||
_methods keys: ['trio', 'mp_spawn',
|
||||
'mp_forkserver', 'subint']
|
||||
_has_subints: False (py version: (3, 13) )
|
||||
[expected] RuntimeError: The 'subint' spawn backend
|
||||
requires Python 3.14+ (stdlib
|
||||
`concurrent.interpreters`, PEP 734).
|
||||
```
|
||||
|
||||
`try_set_start_method('subint')` gate on py3.13:
|
||||
```
|
||||
[expected] RuntimeError: Spawn method 'subint'
|
||||
requires Python 3.14+ (stdlib
|
||||
`concurrent.interpreters`, PEP 734).
|
||||
```
|
||||
|
||||
Pytest `--spawn-backend=subint` on py3.13 (the new
|
||||
UsageError wrapper kicking in):
|
||||
```
|
||||
ERROR: Spawn method 'subint' requires Python 3.14+
|
||||
(stdlib `concurrent.interpreters`, PEP 734).
|
||||
Current runtime: 3.13.11 (main, Dec 5 2025,
|
||||
16:06:33) [GCC 15.2.0]
|
||||
```
|
||||
|
||||
Collection probe: `404 tests collected in 0.18s`
|
||||
(no import errors from the new module).
|
||||
|
||||
Spawn-relevant test subset (`tests/test_local.py
|
||||
test_rpc.py test_spawning.py test_multi_program.py
|
||||
tests/discovery/`):
|
||||
```
|
||||
69 passed, 1 skipped, 10 warnings in 61.38s
|
||||
```
|
||||
|
||||
Full suite was **not** run on py3.13 for this commit
|
||||
— the scaffolding is no-op on <3.14 and full-suite
|
||||
validation under py3.14 is pending that venv being
|
||||
set up by the user.
|
||||
|
||||
### Commit message
|
||||
|
||||
Also AI-drafted (via `/commit-msg`, with the prose
|
||||
rewrapped through `/home/goodboy/.claude/skills/pr-msg/
|
||||
scripts/rewrap.py --width 67`) — the 33-line message
|
||||
on commit `5cd6df58` itself. Not reproduced here; see
|
||||
`git log -1 5cd6df58`.
|
||||
|
||||
### Known follow-ups flagged to user
|
||||
|
||||
- **`uv.lock` deferred**: pin-bump regenerated cp314
|
||||
wheel entries in `uv.lock`, but the user chose to
|
||||
not stage `uv.lock` for this commit. Warned
|
||||
explicitly.
|
||||
- **Phase B.2 needs py3.14 venv** — running the
|
||||
actual subint impl requires it; user said they'd
|
||||
set it up separately.
|
||||
|
|
@ -0,0 +1,117 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
session: subints-phase-b2-destroy-race-fix
|
||||
timestamp: 2026-04-18T04:25:26Z
|
||||
git_ref: 26fb820
|
||||
scope: code
|
||||
substantive: true
|
||||
raw_file: 20260418T042526Z_26fb820_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
Follow-up to Phase B.2 (`5cd6df58`) after the user
|
||||
observed intermittent mid-suite hangs when running
|
||||
the tractor test suite under `--spawn-backend=subint`
|
||||
on py3.14. The specific sequence of prompts over
|
||||
several turns:
|
||||
|
||||
1. User pointed at the `test_context_stream_semantics.py`
|
||||
suite as the first thing to make run clean under
|
||||
`--spawn-backend=subint`.
|
||||
2. After a series of `timeout`-terminated runs that
|
||||
gave no diagnostic info, user nudged me to stop
|
||||
relying on `timeout` and get actual runtime
|
||||
diagnostics ("the suite hangs indefinitely, so i
|
||||
don't think this `timeout 30` is helping you at
|
||||
all.."). Switched to
|
||||
`faulthandler.dump_traceback_later(...)` and a
|
||||
resource-tracker fixture to rule out leaks.
|
||||
3. Captured a stack pinning the hang on
|
||||
`_interpreters.destroy(interp_id)` in the subint
|
||||
teardown finally block.
|
||||
4. Proposed dedicated-OS-thread fix. User greenlit.
|
||||
5. Implemented + verified on-worktree; user needed
|
||||
to be pointed at the *worktree*'s `./py313` venv
|
||||
because bare `pytest` was picking up the main
|
||||
repo's venv (running un-patched `_subint.py`) and
|
||||
still hanging.
|
||||
|
||||
Running theme over the whole exchange: this patch
|
||||
only closes the *destroy race*. The user and I also
|
||||
traced through the deeper cancellation story — SIGINT
|
||||
can't reach subints, legacy-mode shares the GIL,
|
||||
portal-cancel dies when the IPC channel is already
|
||||
broken — and agreed the next step is a bounded
|
||||
hard-kill in `subint_proc`'s teardown plus a
|
||||
dedicated cancellation test suite. Those land as
|
||||
separate commits.
|
||||
|
||||
## Response summary
|
||||
|
||||
Produced the `tractor/spawn/_subint.py` patch landed
|
||||
as commit `26fb8206` ("Fix subint destroy race via
|
||||
dedicated OS thread"). One file, +110/-84 LOC.
|
||||
|
||||
Mechanism: swap `trio.to_thread.run_sync(_interpreters
|
||||
.exec, ...)` for a plain `threading.Thread(target=...
|
||||
, daemon=False)`. The trio thread cache recycles
|
||||
workers — so the OS thread that ran `_interpreters
|
||||
.exec()` remained alive in the cache holding a
|
||||
stale subint tstate, blocking
|
||||
`_interpreters.destroy()` in the finally indefinitely.
|
||||
A dedicated one-shot thread exits naturally after
|
||||
the sync target returns, releasing tstate and
|
||||
unblocking destroy.
|
||||
|
||||
Coordination across the trio↔thread boundary:
|
||||
- `trio.lowlevel.current_trio_token()` captured at
|
||||
`subint_proc` entry
|
||||
- driver thread signals `subint_exited.set()` back
|
||||
to parent trio via `trio.from_thread.run_sync(...,
|
||||
trio_token=token)` (synchronous from the thread's
|
||||
POV; the call returns after trio has run `.set()`)
|
||||
- `trio.RunFinishedError` swallowed in that path for
|
||||
the process-teardown case where parent trio already
|
||||
exited
|
||||
- teardown `finally` off-loads the sync
|
||||
`driver_thread.join()` via `to_thread.run_sync` (a
|
||||
cache thread carries no subint tstate — safe)
|
||||
|
||||
## Files changed
|
||||
|
||||
See `git diff 26fb820~1..26fb820 --stat`:
|
||||
|
||||
```
|
||||
tractor/spawn/_subint.py | 194 +++++++++++++++++++------------
|
||||
1 file changed, 110 insertions(+), 84 deletions(-)
|
||||
```
|
||||
|
||||
Validation:
|
||||
- `test_parent_cancels[chk_ctx_result_before_exit=True-
|
||||
cancel_method=ctx-child_returns_early=False]`
|
||||
(the specific test that was hanging for the user)
|
||||
— passed in 1.06s.
|
||||
- Full `tests/test_context_stream_semantics.py` under
|
||||
subint — 61 passed in 100.35s (clean-cache re-run:
|
||||
100.82s).
|
||||
- Trio backend regression subset — 69 passed / 1
|
||||
skipped / 89.19s — no regressions from this change.
|
||||
|
||||
## Files changed
|
||||
|
||||
Beyond the `_subint.py` patch, the raw log also
|
||||
records the cancellation-semantics research that
|
||||
spanned this conversation but did not ship as code
|
||||
in *this* commit. Preserving it inline under "Non-
|
||||
code output" because it directly informs the
|
||||
Phase B.3 hard-kill impl that will follow (and any
|
||||
upstream CPython bug reports we end up filing).
|
||||
|
||||
## Human edits
|
||||
|
||||
None — committed as generated. The commit message
|
||||
itself was also AI-drafted via `/commit-msg` and
|
||||
rewrapped via the project's `rewrap.py --width 67`
|
||||
tooling; user landed it without edits.
|
||||
|
|
@ -0,0 +1,220 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
timestamp: 2026-04-18T04:25:26Z
|
||||
git_ref: 26fb820
|
||||
diff_cmd: git diff 26fb820~1..26fb820
|
||||
---
|
||||
|
||||
Code generated in this turn was committed as `26fb8206`
|
||||
("Fix subint destroy race via dedicated OS thread").
|
||||
Per diff-ref mode, per-file code is captured via the
|
||||
pointer below; the rest is verbatim non-code output
|
||||
— test-session diagnostics, CPython-internals
|
||||
reasoning, and the design synthesis that's worth
|
||||
keeping alongside the code since it informs the
|
||||
still-pending Phase B.3 hard-kill work.
|
||||
|
||||
## Per-file generated content
|
||||
|
||||
### `tractor/spawn/_subint.py` (modified, +110/-84 LOC)
|
||||
|
||||
> `git diff 26fb820~1..26fb820 -- tractor/spawn/_subint.py`
|
||||
|
||||
Rewrites the subint driver-thread strategy:
|
||||
- replaces `trio.to_thread.run_sync(_interpreters.exec, ...)`
|
||||
with a plain `threading.Thread(target=_subint_target,
|
||||
daemon=False)` so the OS thread truly exits after
|
||||
`_interpreters.exec()` returns
|
||||
- captures a `trio.lowlevel.current_trio_token()` at
|
||||
`subint_proc` entry; the driver thread signals
|
||||
completion back via `trio.from_thread.run_sync(
|
||||
subint_exited.set, trio_token=...)`
|
||||
- swallows `trio.RunFinishedError` in the signal path
|
||||
for the case where the parent trio loop has already
|
||||
exited (process teardown)
|
||||
- in the teardown `finally` off-loads the sync
|
||||
`driver_thread.join()` call to `trio.to_thread.run_sync`
|
||||
(a cache thread w/ no subint tstate — so no cache
|
||||
conflict) to wait for the driver thread to fully
|
||||
exit before calling `_interpreters.destroy()`
|
||||
|
||||
## Non-code output (verbatim) — the CPython-internals research
|
||||
|
||||
### What went wrong before this commit
|
||||
|
||||
Under `--spawn-backend=subint` on py3.14, most single
|
||||
tests passed but longer runs hung intermittently. The
|
||||
position of the hang moved between runs (test #22 on
|
||||
one run, test #53 on another) suggesting a timing-
|
||||
dependent race rather than a deterministic bug.
|
||||
|
||||
`faulthandler.dump_traceback_later()` eventually
|
||||
caught a stack with the main thread blocked in
|
||||
`_interpreters.destroy(interp_id)` at `_subint.py:293`.
|
||||
Only 2 threads were alive:
|
||||
- main thread waiting in `_interpreters.destroy()`
|
||||
- one idle trio thread-cache worker in
|
||||
`trio._core._thread_cache._work`
|
||||
|
||||
No subint was still running (`_interpreters.list_all()`
|
||||
showed only the main interp). A resource-tracker
|
||||
pytest fixture confirmed threads/subints did NOT
|
||||
accumulate across tests — this was not a leak but a
|
||||
specific "destroy blocks on cached thread w/ stale
|
||||
tstate" race.
|
||||
|
||||
### Why the race exists
|
||||
|
||||
`trio.to_thread.run_sync` uses a thread *cache* to
|
||||
avoid OS-thread creation overhead. When the sync
|
||||
callable returns, the OS thread is NOT terminated —
|
||||
it's parked in `_thread_cache._work` waiting for the
|
||||
next job. CPython's subinterpreter implementation
|
||||
attaches a **tstate** (thread-state object) to each
|
||||
OS thread that ever entered a subint via
|
||||
`_interpreters.exec()`. That tstate is released
|
||||
lazily — either when the thread picks up a new job
|
||||
(which re-attaches a new tstate, evicting the old
|
||||
one) or when the thread truly exits.
|
||||
|
||||
`_interpreters.destroy(interp_id)` waits for *all*
|
||||
tstates associated w/ that subint to be released
|
||||
before it can proceed. If the cached worker is idle
|
||||
holding the stale tstate, destroy blocks indefinitely.
|
||||
Whether the race manifests depends on timing — if
|
||||
the cached thread happens to pick up another job
|
||||
quickly, destroy unblocks; if it sits idle, we hang.
|
||||
|
||||
### Why a dedicated `threading.Thread` fixes it
|
||||
|
||||
A plain `threading.Thread(target=_subint_target,
|
||||
daemon=False)` runs its target once and exits. When
|
||||
the target returns, OS-thread teardown (`_bootstrap_inner`
|
||||
→ `_bootstrap`) fires and CPython releases the
|
||||
tstate for that thread. `_interpreters.destroy()`
|
||||
then has no blocker.
|
||||
|
||||
### Diagnostic tactics that actually helped
|
||||
|
||||
1. `faulthandler.dump_traceback_later(n, repeat=False,
|
||||
file=open(path, 'w'))` for captured stack dumps on
|
||||
hang. Critically, pipe to a `file=` not stderr —
|
||||
pytest captures stderr weirdly and the dump is
|
||||
easy to miss.
|
||||
2. A resource-tracker autouse fixture printing
|
||||
per-test `threading.active_count()` +
|
||||
`len(_interpreters.list_all())` deltas → ruled out
|
||||
leak-accumulation theories quickly.
|
||||
3. Running the hanging test *solo* vs in-suite —
|
||||
when solo passes but in-suite hangs, you know
|
||||
it's a cross-test state-transfer bug rather than
|
||||
a test-internal bug.
|
||||
|
||||
### Design synthesis — SIGINT + subints + SC
|
||||
|
||||
The user and I walked through the cancellation
|
||||
semantics of PEP 684/734 subinterpreters in detail.
|
||||
Key findings we want to preserve:
|
||||
|
||||
**Signal delivery in subints (stdlib limitation).**
|
||||
CPython's signal machinery only delivers signals
|
||||
(SIGINT included) to the *main thread of the main
|
||||
interpreter*. Subints cannot install signal handlers
|
||||
that will ever fire. This is an intentional design
|
||||
choice in PEP 684 and not expected to change. For
|
||||
tractor's subint actors, this means:
|
||||
|
||||
- Ctrl-C never reaches a subint directly.
|
||||
- `trio.run()` running on a worker thread (as we do
|
||||
for subints) already skips SIGINT handler install
|
||||
because `signal.signal()` raises on non-main
|
||||
threads.
|
||||
- The only cancellation surface into a subint is
|
||||
our IPC `Portal.cancel_actor()`.
|
||||
|
||||
**Legacy-mode subints share the main GIL** (which
|
||||
our impl uses since `msgspec` lacks PEP 684 support
|
||||
per `jcrist/msgspec#563`). This means a stuck subint
|
||||
thread can starve the parent's trio loop during
|
||||
cancellation — the parent can't even *start* its
|
||||
teardown handling until the subint yields the GIL.
|
||||
|
||||
**Failure modes identified for Phase B.3 audit:**
|
||||
|
||||
1. Portal cancel lands cleanly → subint unwinds →
|
||||
thread exits → destroy succeeds. (Happy path.)
|
||||
2. IPC channel is already broken when we try to
|
||||
send cancel (e.g., `test_ipc_channel_break_*`)
|
||||
→ cancel raises `BrokenResourceError` → subint
|
||||
keeps running unaware → parent hangs waiting for
|
||||
`subint_exited`. This is what breaks
|
||||
`test_advanced_faults.py` under subint.
|
||||
3. Subint is stuck in non-checkpointing Python code
|
||||
→ portal-cancel msg queued but never processed.
|
||||
4. Subint is in a shielded cancel scope when cancel
|
||||
arrives → delay until shield exits.
|
||||
|
||||
**Current teardown has a shield-bug too:**
|
||||
`trio.CancelScope(shield=True)` wrapping the `finally`
|
||||
block absorbs Ctrl-C, so even when the user tries
|
||||
to break out they can't. This is the reason
|
||||
`test_ipc_channel_break_during_stream[break_parent-...
|
||||
no_msgstream_aclose]` locks up unkillable.
|
||||
|
||||
**B.3 hard-kill fix plan (next commit):**
|
||||
|
||||
1. Bound `driver_thread.join()` with
|
||||
`trio.move_on_after(HARD_KILL_TIMEOUT)`.
|
||||
2. If it times out, log a warning naming the
|
||||
`interp_id` and switch the driver thread to
|
||||
`daemon=True` mode (not actually possible after
|
||||
start — so instead create as daemon=True upfront
|
||||
and accept the tradeoff of proc-exit not waiting
|
||||
for a stuck subint).
|
||||
3. Best-effort `_interpreters.destroy()`; catch the
|
||||
`InterpreterError` if the subint is still running.
|
||||
4. Document that the leak is real and the only
|
||||
escape hatch we have without upstream cooperation.
|
||||
|
||||
**Test plan for Phase B.3:**
|
||||
|
||||
New `tests/test_subint_cancellation.py` covering:
|
||||
- SIGINT at spawn
|
||||
- SIGINT mid-portal-RPC
|
||||
- SIGINT during shielded section in subint
|
||||
- Dead-channel cancel (mirror of `test_ipc_channel_
|
||||
break_during_stream` minimized)
|
||||
- Non-checkpointing subint (tight `while True` in
|
||||
user code)
|
||||
- Per-test `pytest-timeout`-style bounds so the
|
||||
tests visibly fail instead of wedging the runner
|
||||
|
||||
### Sanity-check output (verbatim terminal excerpts)
|
||||
|
||||
Post-fix single-test validation:
|
||||
```
|
||||
1 passed, 1 warning in 1.06s
|
||||
```
|
||||
(same test that was hanging pre-fix:
|
||||
`test_parent_cancels[...cancel_method=ctx-...False]`)
|
||||
|
||||
Full `tests/test_context_stream_semantics.py`
|
||||
under subint:
|
||||
```
|
||||
61 passed, 1 warning in 100.35s (0:01:40)
|
||||
```
|
||||
and a clean-cache re-run:
|
||||
```
|
||||
61 passed, 1 warning in 100.82s (0:01:40)
|
||||
```
|
||||
|
||||
No regressions on trio backend (same subset):
|
||||
```
|
||||
69 passed, 1 skipped, 3 warnings in 89.19s
|
||||
```
|
||||
|
||||
### Commit msg
|
||||
|
||||
Also AI-drafted via `/commit-msg` + `rewrap.py
|
||||
--width 67`. See `git log -1 26fb820`.
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
session: subint-phase-b-hang-classification
|
||||
timestamp: 2026-04-20T19:27:39Z
|
||||
git_ref: HEAD (pre-commit; on branch `subint_spawner_backend`)
|
||||
scope: docs
|
||||
substantive: true
|
||||
raw_file: 20260420T192739Z_5e8cd8b2_prompt_io.raw.md
|
||||
---
|
||||
|
||||
## Prompt
|
||||
|
||||
Collab between user (`goodboy`) and `claude` to
|
||||
disambiguate two distinct hang modes hit during
|
||||
Phase B subint-spawn-backend bringup (issue #379).
|
||||
The user ran the failing suites, captured `strace`
|
||||
evidence on hung pytest pids, and set the framing:
|
||||
|
||||
> "These cannot be the same bug. Different fix
|
||||
> paths. Write them up separately or we'll keep
|
||||
> conflating them."
|
||||
|
||||
Follow-on asks:
|
||||
- Cross-link each doc from its reproducer test so
|
||||
a future triager lands on the analysis without
|
||||
needing to grep `ai/conc-anal/`.
|
||||
- On `test_stale_entry_is_deleted`: arm
|
||||
`dump_on_hang(seconds=20, path=...)` so a
|
||||
regression captures a stack dump (pytest stderr
|
||||
capture otherwise eats `faulthandler` output).
|
||||
- Keep `test_stale_entry_is_deleted` un-`skip`ped
|
||||
so the dump file remains inspectable.
|
||||
|
||||
Defer the actual fix for the class-2 hang
|
||||
(Ctrl-C-able, tractor-side bug) to a later commit
|
||||
per the incremental-commits preference.
|
||||
|
||||
## Response summary
|
||||
|
||||
Wrote two new `ai/conc-anal/` classification docs
|
||||
and cross-linked them from the reproducer tests:
|
||||
|
||||
1. **`subint_sigint_starvation_issue.md`** —
|
||||
class 1: abandoned-legacy-subint thread +
|
||||
shared GIL starves main trio loop →
|
||||
signal-wakeup-fd pipe fills → SIGINT silently
|
||||
dropped (`write() = EAGAIN`). Pytest process
|
||||
un-Ctrl-C-able. Structurally a CPython limit;
|
||||
blocked on `msgspec` PEP 684 support
|
||||
(jcrist/msgspec#563). Reproducer:
|
||||
`test_stale_entry_is_deleted[subint]`.
|
||||
|
||||
2. **`subint_cancel_delivery_hang_issue.md`** —
|
||||
class 2: parent-side trio task parks on an
|
||||
orphaned IPC channel after subint teardown;
|
||||
no clean EOF delivered to waiting receiver.
|
||||
Ctrl-C-able (main trio loop iterating fine).
|
||||
OUR bug to fix. Candidate fix: explicit
|
||||
parent-side channel abort in `subint_proc`'s
|
||||
hard-kill teardown. Reproducer:
|
||||
`test_subint_non_checkpointing_child`.
|
||||
|
||||
Test-side cross-links:
|
||||
- `tests/discovery/test_registrar.py`:
|
||||
`test_stale_entry_is_deleted` → `trio.run(main)`
|
||||
wrapped in `dump_on_hang(seconds=20,
|
||||
path=<per-method-tmp>)`; long inline comment
|
||||
summarizes `strace` evidence + root-cause chain
|
||||
and points at both docs.
|
||||
- `tests/test_subint_cancellation.py`:
|
||||
`test_subint_non_checkpointing_child` docstring
|
||||
extended with "KNOWN ISSUE (Ctrl-C-able hang)"
|
||||
section pointing at the class-2 doc + noting
|
||||
the class-1 doc is NOT what this test hits.
|
||||
|
||||
## Files changed
|
||||
|
||||
- `ai/conc-anal/subint_sigint_starvation_issue.md`
|
||||
— new, 205 LOC
|
||||
- `ai/conc-anal/subint_cancel_delivery_hang_issue.md`
|
||||
— new, 161 LOC
|
||||
- `tests/discovery/test_registrar.py` — +52/-1
|
||||
(arm `dump_on_hang`, inline-comment cross-link)
|
||||
- `tests/test_subint_cancellation.py` — +26
|
||||
(docstring "KNOWN ISSUE" block)
|
||||
|
||||
## Human edits
|
||||
|
||||
Substantive collab — prose was jointly iterated:
|
||||
|
||||
- User framed the two-doc split, set the
|
||||
classification criteria (Ctrl-C-able vs not),
|
||||
and provided the `strace` evidence.
|
||||
- User decided to keep `test_stale_entry_is_deleted`
|
||||
un-`skip`ped (my initial suggestion was
|
||||
`pytestmark.skipif(spawn_backend=='subint')`).
|
||||
- User chose the candidate fix ordering for
|
||||
class 2 and marked "explicit parent-side channel
|
||||
abort" as the surgical preferred fix.
|
||||
- User picked the file naming convention
|
||||
(`subint_<hang-shape>_issue.md`) over my initial
|
||||
`hang_class_{1,2}.md`.
|
||||
- Assistant drafted the prose, aggregated prior-
|
||||
session root-cause findings from Phase B.2/B.3
|
||||
bringup, and wrote the test-side cross-linking
|
||||
comments.
|
||||
|
||||
No further mechanical edits expected before
|
||||
commit; user may still rewrap via
|
||||
`scripts/rewrap.py` if preferred.
|
||||
|
|
@ -0,0 +1,198 @@
|
|||
---
|
||||
model: claude-opus-4-7[1m]
|
||||
service: claude
|
||||
timestamp: 2026-04-20T19:27:39Z
|
||||
git_ref: HEAD (pre-commit; will land on branch `subint_spawner_backend`)
|
||||
diff_cmd: git diff HEAD~1..HEAD
|
||||
---
|
||||
|
||||
Collab between `goodboy` (user) and `claude` (this
|
||||
assistant) spanning multiple test-run iterations on
|
||||
branch `subint_spawner_backend`. The user ran the
|
||||
failing suites, captured `strace` evidence on the
|
||||
hung pytest pids, and set the direction ("these are
|
||||
two different hangs — write them up separately so
|
||||
we don't re-confuse ourselves later"). The assistant
|
||||
aggregated prior-session findings (Phase B.2/B.3
|
||||
bringup) into two classification docs + test-side
|
||||
cross-links. All prose was jointly iterated; the
|
||||
user had final say on framing and decided which
|
||||
candidate fix directions to list.
|
||||
|
||||
## Per-file generated content
|
||||
|
||||
### `ai/conc-anal/subint_sigint_starvation_issue.md` (new, 205 LOC)
|
||||
|
||||
> `git diff HEAD~1..HEAD -- ai/conc-anal/subint_sigint_starvation_issue.md`
|
||||
|
||||
Writes up the "abandoned-legacy-subint thread wedges
|
||||
the parent trio loop" class. Key sections:
|
||||
|
||||
- **Symptom** — `test_stale_entry_is_deleted[subint]`
|
||||
hangs indefinitely AND is un-Ctrl-C-able.
|
||||
- **Evidence** — annotated `strace` excerpt showing
|
||||
SIGINT delivered to pytest, C-level signal handler
|
||||
tries to write to the signal-wakeup-fd pipe, gets
|
||||
`write() = -1 EAGAIN (Resource temporarily
|
||||
unavailable)`. Pipe is full because main trio loop
|
||||
isn't iterating often enough to drain it.
|
||||
- **Root-cause chain** — our hard-kill abandons the
|
||||
`daemon=True` driver OS thread after
|
||||
`_HARD_KILL_TIMEOUT`; the subint *inside* that
|
||||
thread is still running `trio.run()`;
|
||||
`_interpreters.destroy()` cannot force-stop a
|
||||
running subint (raises `InterpreterError`); legacy
|
||||
subints share the main GIL → abandoned subint
|
||||
starves main trio loop → wakeup-fd fills → SIGINT
|
||||
silently dropped.
|
||||
- **Why it's structurally a CPython limit** — no
|
||||
public force-destroy primitive for a running
|
||||
subint; the only escape is per-interpreter GIL
|
||||
isolation, gated on msgspec PEP 684 adoption
|
||||
(jcrist/msgspec#563).
|
||||
- **Current escape hatch** — harness-side SIGINT
|
||||
loop in the `daemon` fixture teardown that kills
|
||||
the bg registrar subproc, eventually unblocking
|
||||
a parent-side recv enough for the main loop to
|
||||
drain the wakeup pipe.
|
||||
|
||||
### `ai/conc-anal/subint_cancel_delivery_hang_issue.md` (new, 161 LOC)
|
||||
|
||||
> `git diff HEAD~1..HEAD -- ai/conc-anal/subint_cancel_delivery_hang_issue.md`
|
||||
|
||||
Writes up the *sibling* hang class — same subint
|
||||
backend, distinct root cause:
|
||||
|
||||
- **TL;DR** — Ctrl-C-able, so NOT the SIGINT-
|
||||
starvation class; main trio loop iterates fine;
|
||||
ours to fix.
|
||||
- **Symptom** — `test_subint_non_checkpointing_child`
|
||||
hangs past the expected `_HARD_KILL_TIMEOUT`
|
||||
budget even after the subint is torn down.
|
||||
- **Diagnosis** — a parent-side trio task (likely
|
||||
a `chan.recv()` in `process_messages`) parks on
|
||||
an orphaned IPC channel; channel was torn down
|
||||
without emitting a clean EOF /
|
||||
`BrokenResourceError` to the waiting receiver.
|
||||
- **Candidate fix directions** — listed in rough
|
||||
order of preference:
|
||||
1. Explicit parent-side channel abort in
|
||||
`subint_proc`'s hard-kill teardown (surgical;
|
||||
most likely).
|
||||
2. Audit `process_messages` to add a timeout or
|
||||
cancel-scope protection that catches the
|
||||
orphaned-recv state.
|
||||
3. Wrap subint IPC channel construction in a
|
||||
sentinel that can force-close from the parent
|
||||
side regardless of subint liveness.
|
||||
|
||||
### `tests/discovery/test_registrar.py` (modified, +52/-1 LOC)
|
||||
|
||||
> `git diff HEAD~1..HEAD -- tests/discovery/test_registrar.py`
|
||||
|
||||
Wraps the `trio.run(main)` call at the bottom of
|
||||
`test_stale_entry_is_deleted` in
|
||||
`dump_on_hang(seconds=20, path=<per-method-tmp>)`.
|
||||
Adds a long inline comment that:
|
||||
- Enumerates variant-by-variant status
|
||||
(`[trio]`/`[mp_*]` = clean; `[subint]` = hangs
|
||||
+ un-Ctrl-C-able)
|
||||
- Summarizes the `strace` evidence and root-cause
|
||||
chain inline (so a future reader hitting this
|
||||
test doesn't need to cross-ref the doc to
|
||||
understand the hang shape)
|
||||
- Points at
|
||||
`ai/conc-anal/subint_sigint_starvation_issue.md`
|
||||
for full analysis
|
||||
- Cross-links to the *sibling*
|
||||
`subint_cancel_delivery_hang_issue.md` so
|
||||
readers can tell the two classes apart
|
||||
- Explains why it's kept un-`skip`ped: the dump
|
||||
file is useful if the hang ever returns after
|
||||
a refactor. pytest stderr capture would
|
||||
otherwise eat `faulthandler` output, hence the
|
||||
file path.
|
||||
|
||||
### `tests/test_subint_cancellation.py` (modified, +26 LOC)
|
||||
|
||||
> `git diff HEAD~1..HEAD -- tests/test_subint_cancellation.py`
|
||||
|
||||
Extends the docstring of
|
||||
`test_subint_non_checkpointing_child` with a
|
||||
"KNOWN ISSUE (Ctrl-C-able hang)" block:
|
||||
- Describes the current hang: parent-side orphaned
|
||||
IPC recv after hard-kill; distinct from the
|
||||
SIGINT-starvation sibling class.
|
||||
- Cites `strace` distinguishing signal: wakeup-fd
|
||||
`write() = 1` (not `EAGAIN`) — i.e. main loop
|
||||
iterating.
|
||||
- Points at
|
||||
`ai/conc-anal/subint_cancel_delivery_hang_issue.md`
|
||||
for full analysis + candidate fix directions.
|
||||
- Clarifies that the *other* sibling doc
|
||||
(SIGINT-starvation) is NOT what this test hits.
|
||||
|
||||
## Non-code output
|
||||
|
||||
### Classification reasoning (why two docs, not one)
|
||||
|
||||
The user and I converged on the two-doc split after
|
||||
running the suites and noticing two *qualitatively
|
||||
different* hang symptoms:
|
||||
|
||||
1. `test_stale_entry_is_deleted[subint]` — pytest
|
||||
process un-Ctrl-C-able. Ctrl-C at the terminal
|
||||
does nothing. Must kill-9 from another shell.
|
||||
2. `test_subint_non_checkpointing_child` — pytest
|
||||
process Ctrl-C-able. One Ctrl-C at the prompt
|
||||
unblocks cleanly and the test reports a hang
|
||||
via pytest-timeout.
|
||||
|
||||
From the user: "These cannot be the same bug.
|
||||
Different fix paths. Write them up separately or
|
||||
we'll keep conflating them."
|
||||
|
||||
`strace` on the `[subint]` hang gave the decisive
|
||||
signal for the first class:
|
||||
|
||||
```
|
||||
--- SIGINT {si_signo=SIGINT, si_code=SI_KERNEL} ---
|
||||
write(5, "\2", 1) = -1 EAGAIN (Resource temporarily unavailable)
|
||||
```
|
||||
|
||||
fd 5 is Python's signal-wakeup-fd pipe. `EAGAIN`
|
||||
on a `write()` of 1 byte to a pipe means the pipe
|
||||
buffer is full → reader side (main Python thread
|
||||
inside `trio.run()`) isn't consuming. That's the
|
||||
GIL-hostage signature.
|
||||
|
||||
The second class's `strace` showed `write(5, "\2",
|
||||
1) = 1` — clean drain — so the main trio loop was
|
||||
iterating and the hang had to be on the application
|
||||
side of things, not the kernel-↔-Python signal
|
||||
boundary.
|
||||
|
||||
### Why the candidate fix for class 2 is "explicit parent-side channel abort"
|
||||
|
||||
The second hang class has the trio loop alive. A
|
||||
parked `chan.recv()` that will never get bytes is
|
||||
fundamentally a tractor-side resource-lifetime bug
|
||||
— the IPC channel was torn down (subint destroyed)
|
||||
but no one explicitly raised
|
||||
`BrokenResourceError` at the parent-side receiver.
|
||||
The `subint_proc` hard-kill path is the natural
|
||||
place to add that notification, because it already
|
||||
knows the subint is unreachable at that point.
|
||||
|
||||
Alternative fix paths (blanket timeouts on
|
||||
`process_messages`, sentinel-wrapped channels) are
|
||||
less surgical and risk masking unrelated bugs —
|
||||
hence the preference ordering in the doc.
|
||||
|
||||
### Why we're not just patching the code now
|
||||
|
||||
The user explicitly deferred the fix to a later
|
||||
commit: "Document both classes now, land the fix
|
||||
for class 2 separately so the diff reviews clean."
|
||||
This matches the incremental-commits preference
|
||||
from memory.
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
# AI Prompt I/O Log — claude
|
||||
|
||||
This directory tracks prompt inputs and model
|
||||
outputs for AI-assisted development using
|
||||
`claude` (Claude Code).
|
||||
|
||||
## Policy
|
||||
|
||||
Prompt logging follows the
|
||||
[NLNet generative AI policy][nlnet-ai].
|
||||
All substantive AI contributions are logged
|
||||
with:
|
||||
- Model name and version
|
||||
- Timestamps
|
||||
- The prompts that produced the output
|
||||
- Unedited model output (`.raw.md` files)
|
||||
|
||||
[nlnet-ai]: https://nlnet.nl/foundation/policies/generativeAI/
|
||||
|
||||
## Usage
|
||||
|
||||
Entries are created by the `/prompt-io` skill
|
||||
or automatically via `/commit-msg` integration.
|
||||
|
||||
Human contributors remain accountable for all
|
||||
code decisions. AI-generated content is never
|
||||
presented as human-authored work.
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
ok now i want you to take a look at the most recent commit adding
|
||||
a `tpt_bind_addrs` to `open_root_actor()` and extend the existing
|
||||
tests/discovery/test_multiaddr* and friends to use this new param in
|
||||
at least one suite with parametrizations over,
|
||||
|
||||
- `registry_addrs == tpt_bind_addrs`, as in both inputs are the same.
|
||||
- `set(registry_addrs) >= set(tpt_bind_addrs)`, as in the registry
|
||||
addrs include the bind set.
|
||||
- `registry_addrs != tpt_bind_addrs`, where the reg set is disjoint from
|
||||
the bind set in all possible combos you can imagine.
|
||||
|
||||
All of the ^above cases should further be parametrized over,
|
||||
- the root being the registrar,
|
||||
- a non-registrar root using our bg `daemon` fixture.
|
||||
|
||||
once we have a fairly thorough test suite and have flushed out all
|
||||
bugs and edge cases we want to design a wrapping API which allows
|
||||
declaring full tree's of actors tpt endpoints using multiaddrs such
|
||||
that a `dict[str, list[str]]` of actor-name -> multiaddr can be used
|
||||
to configure a tree of actors-as-services given such an input
|
||||
"endpoints-table" can be matched with the number of appropriately
|
||||
named subactore spawns in a `tractor` user-app.
|
||||
|
||||
Here is a small example from piker,
|
||||
|
||||
- in piker's root conf.toml we define a `[network]` section which can
|
||||
define various actor-service-daemon names set to a maddr
|
||||
(multiaddress str).
|
||||
|
||||
- each actor whether part of the `pikerd` tree (as a sub) or spawned
|
||||
in other non-registrar rooted trees (such as `piker chart`) should
|
||||
configurable in terms of its `tractor` tpt bind addresses via
|
||||
a simple service lookup table,
|
||||
|
||||
```toml
|
||||
[network]
|
||||
pikerd = [
|
||||
'/ip4/127.0.0.1/tcp/6116', # std localhost daemon-actor tree
|
||||
'/uds/run/user/1000/piker/pikerd@6116.sock', # same but serving UDS
|
||||
]
|
||||
chart = [
|
||||
'/ip4/127.0.0.1/tcp/3333', # std localhost daemon-actor tree
|
||||
'/uds/run/user/1000/piker/chart@3333.sock',
|
||||
]
|
||||
```
|
||||
|
||||
We should take whatever common API is needed to support this and
|
||||
distill it into a
|
||||
```python
|
||||
tractor.discovery.parse_endpoints(
|
||||
) -> dict[
|
||||
str,
|
||||
list[Address]
|
||||
|dict[str, list[Address]]
|
||||
# ^recursive case, see below
|
||||
]:
|
||||
```
|
||||
|
||||
style API which can,
|
||||
|
||||
- be re-used easily across dependent projects.
|
||||
- correctly raise tpt-backend support errors when a maddr specifying
|
||||
a unsupport proto is passed.
|
||||
- be used to handle "tunnelled" maddrs per
|
||||
https://github.com/multiformats/py-multiaddr/#tunneling such that
|
||||
for any such tunneled maddr-`str`-entry we deliver a data-structure
|
||||
which can easily be passed to nested `@acm`s which consecutively
|
||||
setup nested net bindspaces for binding the endpoint addrs using
|
||||
a combo of our `.ipc.*` machinery and, say for example something like
|
||||
https://github.com/svinota/pyroute2, more precisely say for
|
||||
managing tunnelled wireguard eps within network-namespaces,
|
||||
* https://docs.pyroute2.org/
|
||||
* https://docs.pyroute2.org/netns.html
|
||||
|
||||
remember to include use of all default `.claude/skills` throughout
|
||||
this work!
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
This is your first big boi, "from GH issue" design, plan and
|
||||
implement task.
|
||||
|
||||
We need to try and add sub-interpreter (aka subint) support per the
|
||||
issue,
|
||||
|
||||
https://github.com/goodboy/tractor/issues/379
|
||||
|
||||
Part of this work should include,
|
||||
|
||||
- modularizing and thus better organizing the `.spawn.*` subpkg by
|
||||
breaking up various backends currently in `spawn._spawn` into
|
||||
separate submods where it makes sense.
|
||||
|
||||
- add a new `._subint` backend which tries to keep as much of the
|
||||
inter-process-isolation machinery in use as possible but with plans
|
||||
to optimize for localhost only benefits as offered by python's
|
||||
subints where possible.
|
||||
|
||||
* utilizing localhost-only tpts like UDS, shm-buffers for
|
||||
performant IPC between subactors but also leveraging the benefits from
|
||||
the traditional OS subprocs mem/storage-domain isolation, linux
|
||||
namespaces where possible and as available/permitted by whatever
|
||||
is happening under the hood with how cpython implements subints.
|
||||
|
||||
* default configuration should encourage state isolation as with
|
||||
with subprocs, but explicit public escape hatches to enable rigorously
|
||||
managed shm channels for high performance apps.
|
||||
|
||||
- all tests should be (able to be) parameterized to use the new
|
||||
`subints` backend and enabled by flag in the harness using the
|
||||
existing `pytest --spawn-backend <spawn-backend>` support offered in
|
||||
the `open_root_actor()` and `.testing._pytest` harness override
|
||||
fixture.
|
||||
|
|
@ -420,20 +420,17 @@ Check out our experimental system for `guest`_-mode controlled
|
|||
|
||||
|
||||
async def aio_echo_server(
|
||||
to_trio: trio.MemorySendChannel,
|
||||
from_trio: asyncio.Queue,
|
||||
chan: tractor.to_asyncio.LinkedTaskChannel,
|
||||
) -> None:
|
||||
|
||||
# a first message must be sent **from** this ``asyncio``
|
||||
# task or the ``trio`` side will never unblock from
|
||||
# ``tractor.to_asyncio.open_channel_from():``
|
||||
to_trio.send_nowait('start')
|
||||
chan.started_nowait('start')
|
||||
|
||||
# XXX: this uses an ``from_trio: asyncio.Queue`` currently but we
|
||||
# should probably offer something better.
|
||||
while True:
|
||||
# echo the msg back
|
||||
to_trio.send_nowait(await from_trio.get())
|
||||
chan.send_nowait(await chan.get())
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
|
|
@ -445,7 +442,7 @@ Check out our experimental system for `guest`_-mode controlled
|
|||
# message.
|
||||
async with tractor.to_asyncio.open_channel_from(
|
||||
aio_echo_server,
|
||||
) as (first, chan):
|
||||
) as (chan, first):
|
||||
|
||||
assert first == 'start'
|
||||
await ctx.started(first)
|
||||
|
|
@ -504,8 +501,10 @@ Yes, we spawn a python process, run ``asyncio``, start ``trio`` on the
|
|||
``asyncio`` loop, then send commands to the ``trio`` scheduled tasks to
|
||||
tell ``asyncio`` tasks what to do XD
|
||||
|
||||
We need help refining the `asyncio`-side channel API to be more
|
||||
`trio`-like. Feel free to sling your opinion in `#273`_!
|
||||
The ``asyncio``-side task receives a single
|
||||
``chan: LinkedTaskChannel`` handle providing a ``trio``-like
|
||||
API: ``.started_nowait()``, ``.send_nowait()``, ``.get()``
|
||||
and more. Feel free to sling your opinion in `#273`_!
|
||||
|
||||
|
||||
.. _#273: https://github.com/goodboy/tractor/issues/273
|
||||
|
|
@ -641,13 +640,15 @@ Help us push toward the future of distributed `Python`.
|
|||
- Typed capability-based (dialog) protocols ( see `#196
|
||||
<https://github.com/goodboy/tractor/issues/196>`_ with draft work
|
||||
started in `#311 <https://github.com/goodboy/tractor/pull/311>`_)
|
||||
- We **recently disabled CI-testing on windows** and need help getting
|
||||
it running again! (see `#327
|
||||
<https://github.com/goodboy/tractor/pull/327>`_). **We do have windows
|
||||
support** (and have for quite a while) but since no active hacker
|
||||
exists in the user-base to help test on that OS, for now we're not
|
||||
actively maintaining testing due to the added hassle and general
|
||||
latency..
|
||||
- **macOS is now officially supported** and tested in CI
|
||||
alongside Linux!
|
||||
- We **recently disabled CI-testing on windows** and need
|
||||
help getting it running again! (see `#327
|
||||
<https://github.com/goodboy/tractor/pull/327>`_). **We do
|
||||
have windows support** (and have for quite a while) but
|
||||
since no active hacker exists in the user-base to help
|
||||
test on that OS, for now we're not actively maintaining
|
||||
testing due to the added hassle and general latency..
|
||||
|
||||
|
||||
Feel like saying hi?
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from tractor import (
|
|||
MsgStream,
|
||||
_testing,
|
||||
trionics,
|
||||
TransportClosed,
|
||||
)
|
||||
import trio
|
||||
import pytest
|
||||
|
|
@ -208,12 +209,16 @@ async def main(
|
|||
# TODO: is this needed or no?
|
||||
raise
|
||||
|
||||
except trio.ClosedResourceError:
|
||||
except (
|
||||
trio.ClosedResourceError,
|
||||
TransportClosed,
|
||||
) as _tpt_err:
|
||||
# NOTE: don't send if we already broke the
|
||||
# connection to avoid raising a closed-error
|
||||
# such that we drop through to the ctl-c
|
||||
# mashing by user.
|
||||
await trio.sleep(0.01)
|
||||
with trio.CancelScope(shield=True):
|
||||
await trio.sleep(0.01)
|
||||
|
||||
# timeout: int = 1
|
||||
# with trio.move_on_after(timeout) as cs:
|
||||
|
|
@ -247,6 +252,7 @@ async def main(
|
|||
await stream.send(i)
|
||||
pytest.fail('stream not closed?')
|
||||
except (
|
||||
TransportClosed,
|
||||
trio.ClosedResourceError,
|
||||
trio.EndOfChannel,
|
||||
) as send_err:
|
||||
|
|
|
|||
|
|
@ -18,15 +18,14 @@ async def aio_sleep_forever():
|
|||
|
||||
|
||||
async def bp_then_error(
|
||||
to_trio: trio.MemorySendChannel,
|
||||
from_trio: asyncio.Queue,
|
||||
chan: to_asyncio.LinkedTaskChannel,
|
||||
|
||||
raise_after_bp: bool = True,
|
||||
|
||||
) -> None:
|
||||
|
||||
# sync with `trio`-side (caller) task
|
||||
to_trio.send_nowait('start')
|
||||
chan.started_nowait('start')
|
||||
|
||||
# NOTE: what happens here inside the hook needs some refinement..
|
||||
# => seems like it's still `.debug._set_trace()` but
|
||||
|
|
@ -60,7 +59,7 @@ async def trio_ctx(
|
|||
to_asyncio.open_channel_from(
|
||||
bp_then_error,
|
||||
# raise_after_bp=not bp_before_started,
|
||||
) as (first, chan),
|
||||
) as (chan, first),
|
||||
|
||||
trio.open_nursery() as tn,
|
||||
):
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ async def sleep(
|
|||
|
||||
|
||||
async def open_ctx(
|
||||
n: tractor._supervise.ActorNursery
|
||||
n: tractor.runtime._supervise.ActorNursery
|
||||
):
|
||||
|
||||
# spawn both actors
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ Verify we can dump a `stackscope` tree on a hang.
|
|||
|
||||
'''
|
||||
import os
|
||||
import platform
|
||||
import signal
|
||||
|
||||
import trio
|
||||
|
|
@ -31,13 +32,26 @@ async def main(
|
|||
from_test: bool = False,
|
||||
) -> None:
|
||||
|
||||
if platform.system() != 'Darwin':
|
||||
tpt = 'uds'
|
||||
else:
|
||||
# XXX, precisely we can't use pytest's tmp-path generation
|
||||
# for tests.. apparently because:
|
||||
#
|
||||
# > The OSError: AF_UNIX path too long in macOS Python occurs
|
||||
# > because the path to the Unix domain socket exceeds the
|
||||
# > operating system's maximum path length limit (around 104
|
||||
#
|
||||
# WHICH IS just, wtf hillarious XD
|
||||
tpt = 'tcp'
|
||||
|
||||
async with (
|
||||
tractor.open_nursery(
|
||||
debug_mode=True,
|
||||
enable_stack_on_sig=True,
|
||||
# maybe_enable_greenback=False,
|
||||
loglevel='devx',
|
||||
enable_transports=['uds'],
|
||||
enable_transports=[tpt],
|
||||
) as an,
|
||||
):
|
||||
ptl: tractor.Portal = await an.start_actor(
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
import platform
|
||||
|
||||
import tractor
|
||||
import trio
|
||||
|
||||
|
|
@ -34,9 +36,22 @@ async def just_bp(
|
|||
|
||||
async def main():
|
||||
|
||||
if platform.system() != 'Darwin':
|
||||
tpt = 'uds'
|
||||
else:
|
||||
# XXX, precisely we can't use pytest's tmp-path generation
|
||||
# for tests.. apparently because:
|
||||
#
|
||||
# > The OSError: AF_UNIX path too long in macOS Python occurs
|
||||
# > because the path to the Unix domain socket exceeds the
|
||||
# > operating system's maximum path length limit (around 104
|
||||
#
|
||||
# WHICH IS just, wtf hillarious XD
|
||||
tpt = 'tcp'
|
||||
|
||||
async with tractor.open_nursery(
|
||||
debug_mode=True,
|
||||
enable_transports=['uds'],
|
||||
enable_transports=[tpt],
|
||||
loglevel='devx',
|
||||
) as n:
|
||||
p = await n.start_actor(
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ async def main() -> list[int]:
|
|||
# yes, a nursery which spawns `trio`-"actors" B)
|
||||
an: ActorNursery
|
||||
async with tractor.open_nursery(
|
||||
loglevel='cancel',
|
||||
loglevel='error',
|
||||
# debug_mode=True,
|
||||
) as an:
|
||||
|
||||
|
|
@ -118,8 +118,10 @@ async def main() -> list[int]:
|
|||
cancelled: bool = await portal.cancel_actor()
|
||||
assert cancelled
|
||||
|
||||
print(f"STREAM TIME = {time.time() - start}")
|
||||
print(f"STREAM + SPAWN TIME = {time.time() - pre_start}")
|
||||
print(
|
||||
f"STREAM TIME = {time.time() - start}\n"
|
||||
f"STREAM + SPAWN TIME = {time.time() - pre_start}\n"
|
||||
)
|
||||
assert result_stream == list(range(seed))
|
||||
return result_stream
|
||||
|
||||
|
|
|
|||
|
|
@ -11,21 +11,17 @@ import tractor
|
|||
|
||||
|
||||
async def aio_echo_server(
|
||||
to_trio: trio.MemorySendChannel,
|
||||
from_trio: asyncio.Queue,
|
||||
|
||||
chan: tractor.to_asyncio.LinkedTaskChannel,
|
||||
) -> None:
|
||||
|
||||
# a first message must be sent **from** this ``asyncio``
|
||||
# task or the ``trio`` side will never unblock from
|
||||
# ``tractor.to_asyncio.open_channel_from():``
|
||||
to_trio.send_nowait('start')
|
||||
chan.started_nowait('start')
|
||||
|
||||
# XXX: this uses an ``from_trio: asyncio.Queue`` currently but we
|
||||
# should probably offer something better.
|
||||
while True:
|
||||
# echo the msg back
|
||||
to_trio.send_nowait(await from_trio.get())
|
||||
chan.send_nowait(await chan.get())
|
||||
await asyncio.sleep(0)
|
||||
|
||||
|
||||
|
|
@ -37,7 +33,7 @@ async def trio_to_aio_echo_server(
|
|||
# message.
|
||||
async with tractor.to_asyncio.open_channel_from(
|
||||
aio_echo_server,
|
||||
) as (first, chan):
|
||||
) as (chan, first):
|
||||
|
||||
assert first == 'start'
|
||||
await ctx.started(first)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
import os
|
||||
|
||||
|
||||
async def child_fn() -> str:
|
||||
return f"child OK pid={os.getpid()}"
|
||||
|
|
@ -0,0 +1,50 @@
|
|||
"""
|
||||
Integration test: spawning tractor actors from an MPI process.
|
||||
|
||||
When a parent is launched via ``mpirun``, Open MPI sets ``OMPI_*`` env
|
||||
vars that bind ``MPI_Init`` to the ``orted`` daemon. Tractor children
|
||||
inherit those env vars, so if ``inherit_parent_main=True`` (the default)
|
||||
the child re-executes ``__main__``, re-imports ``mpi4py``, and
|
||||
``MPI_Init_thread`` fails because the child was never spawned by
|
||||
``orted``::
|
||||
|
||||
getting local rank failed
|
||||
--> Returned value No permission (-17) instead of ORTE_SUCCESS
|
||||
|
||||
Passing ``inherit_parent_main=False`` and placing RPC functions in a
|
||||
separate importable module (``_child``) avoids the re-import entirely.
|
||||
|
||||
Usage::
|
||||
|
||||
mpirun --allow-run-as-root -np 1 python -m \
|
||||
examples.integration.mpi4py.inherit_parent_main
|
||||
"""
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
import os
|
||||
import trio
|
||||
import tractor
|
||||
|
||||
from ._child import child_fn
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
print(f"[parent] rank={rank} pid={os.getpid()}", flush=True)
|
||||
|
||||
async with tractor.open_nursery(start_method='trio') as an:
|
||||
portal = await an.start_actor(
|
||||
'mpi-child',
|
||||
enable_modules=[child_fn.__module__],
|
||||
# Without this the child replays __main__, which
|
||||
# re-imports mpi4py and crashes on MPI_Init.
|
||||
inherit_parent_main=False,
|
||||
)
|
||||
result = await portal.run(child_fn)
|
||||
print(f"[parent] got: {result}", flush=True)
|
||||
await portal.cancel_actor()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
trio.run(main)
|
||||
|
|
@ -10,7 +10,7 @@ async def main(service_name):
|
|||
await an.start_actor(service_name)
|
||||
|
||||
async with tractor.get_registry() as portal:
|
||||
print(f"Arbiter is listening on {portal.channel}")
|
||||
print(f"Registrar is listening on {portal.channel}")
|
||||
|
||||
async with tractor.wait_for_actor(service_name) as sockaddr:
|
||||
print(f"my_service is found at {sockaddr}")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1769018530,
|
||||
"narHash": "sha256-MJ27Cy2NtBEV5tsK+YraYr2g851f3Fl1LpNHDzDX15c=",
|
||||
"owner": "nixos",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "88d3861acdd3d2f0e361767018218e51810df8a1",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nixos",
|
||||
"ref": "nixos-unstable",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
# An "impure" template thx to `pyproject.nix`,
|
||||
# https://pyproject-nix.github.io/pyproject.nix/templates.html#impure
|
||||
# https://github.com/pyproject-nix/pyproject.nix/blob/master/templates/impure/flake.nix
|
||||
{
|
||||
description = "An impure overlay (w dev-shell) using `uv`";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable";
|
||||
};
|
||||
|
||||
outputs =
|
||||
{ nixpkgs, ... }:
|
||||
let
|
||||
inherit (nixpkgs) lib;
|
||||
forAllSystems = lib.genAttrs lib.systems.flakeExposed;
|
||||
in
|
||||
{
|
||||
devShells = forAllSystems (
|
||||
system:
|
||||
let
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
|
||||
# XXX NOTE XXX, for now we overlay specific pkgs via
|
||||
# a major-version-pinned-`cpython`
|
||||
cpython = "python313";
|
||||
venv_dir = "py313";
|
||||
pypkgs = pkgs."${cpython}Packages";
|
||||
in
|
||||
{
|
||||
default = pkgs.mkShell {
|
||||
|
||||
packages = [
|
||||
# XXX, ensure sh completions activate!
|
||||
pkgs.bashInteractive
|
||||
pkgs.bash-completion
|
||||
|
||||
# XXX, on nix(os), use pkgs version to avoid
|
||||
# build/sys-sh-integration issues
|
||||
pkgs.ruff
|
||||
|
||||
pkgs.uv
|
||||
pkgs.${cpython}# ?TODO^ how to set from `cpython` above?
|
||||
];
|
||||
|
||||
shellHook = ''
|
||||
# unmask to debug **this** dev-shell-hook
|
||||
# set -e
|
||||
|
||||
# link-in c++ stdlib for various AOT-ext-pkgs (numpy, etc.)
|
||||
LD_LIBRARY_PATH="${pkgs.stdenv.cc.cc.lib}/lib:$LD_LIBRARY_PATH"
|
||||
|
||||
export LD_LIBRARY_PATH
|
||||
|
||||
# RUNTIME-SETTINGS
|
||||
# ------ uv ------
|
||||
# - always use the ./py313/ venv-subdir
|
||||
# - sync env with all extras
|
||||
export UV_PROJECT_ENVIRONMENT=${venv_dir}
|
||||
uv sync --dev --all-extras
|
||||
|
||||
# ------ TIPS ------
|
||||
# NOTE, to launch the py-venv installed `xonsh` (like @goodboy)
|
||||
# run the `nix develop` cmd with,
|
||||
# >> nix develop -c uv run xonsh
|
||||
'';
|
||||
};
|
||||
}
|
||||
);
|
||||
};
|
||||
}
|
||||
|
|
@ -9,7 +9,7 @@ name = "tractor"
|
|||
version = "0.1.0a6dev0"
|
||||
description = 'structured concurrent `trio`-"actors"'
|
||||
authors = [{ name = "Tyler Goodlet", email = "goodboy_foss@protonmail.com" }]
|
||||
requires-python = ">= 3.11"
|
||||
requires-python = ">=3.13, <3.15"
|
||||
readme = "docs/README.rst"
|
||||
license = "AGPL-3.0-or-later"
|
||||
keywords = [
|
||||
|
|
@ -24,11 +24,14 @@ keywords = [
|
|||
classifiers = [
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Operating System :: POSIX :: Linux",
|
||||
"Operating System :: MacOS",
|
||||
"Framework :: Trio",
|
||||
"License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: 3.14",
|
||||
"Topic :: System :: Distributed Computing",
|
||||
]
|
||||
dependencies = [
|
||||
|
|
@ -41,49 +44,99 @@ dependencies = [
|
|||
"tricycle>=0.4.1,<0.5",
|
||||
"wrapt>=1.16.0,<2",
|
||||
"colorlog>=6.8.2,<7",
|
||||
|
||||
# built-in multi-actor `pdb` REPL
|
||||
"pdbp>=1.6,<2", # windows only (from `pdbp`)
|
||||
"pdbp>=1.8.2,<2", # windows only (from `pdbp`)
|
||||
|
||||
# typed IPC msging
|
||||
"msgspec>=0.19.0",
|
||||
"cffi>=1.17.1",
|
||||
"msgspec>=0.20.0",
|
||||
"bidict>=0.23.1",
|
||||
"multiaddr>=0.2.0",
|
||||
"platformdirs>=4.4.0",
|
||||
]
|
||||
|
||||
# ------ project ------
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
{include-group = 'devx'},
|
||||
{include-group = 'testing'},
|
||||
{include-group = 'repl'},
|
||||
]
|
||||
devx = [
|
||||
# `tractor.devx` tooling
|
||||
"stackscope>=0.2.2,<0.3",
|
||||
# ^ requires this?
|
||||
"typing-extensions>=4.14.1",
|
||||
# {include-group = 'sync_pause'}, # XXX, no 3.14 yet!
|
||||
]
|
||||
sync_pause = [
|
||||
"greenback>=1.2.1,<2", # TODO? 3.14 greenlet on nix?
|
||||
]
|
||||
testing = [
|
||||
# test suite
|
||||
# TODO: maybe some of these layout choices?
|
||||
# https://docs.pytest.org/en/8.0.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules
|
||||
"pytest>=8.3.5",
|
||||
"pexpect>=4.9.0,<5",
|
||||
# `tractor.devx` tooling
|
||||
"greenback>=1.2.1,<2",
|
||||
"stackscope>=0.2.2,<0.3",
|
||||
# ^ requires this?
|
||||
"typing-extensions>=4.14.1",
|
||||
|
||||
# per-test wall-clock bound (used via
|
||||
# `@pytest.mark.timeout(..., method='thread')` on the
|
||||
# known-hanging `subint`-backend audit tests; see
|
||||
# `ai/conc-anal/subint_*_issue.md`).
|
||||
"pytest-timeout>=2.3",
|
||||
]
|
||||
repl = [
|
||||
"pyperclip>=1.9.0",
|
||||
"prompt-toolkit>=3.0.50",
|
||||
"xonsh>=0.19.2",
|
||||
"xonsh>=0.22.8",
|
||||
"psutil>=7.0.0",
|
||||
]
|
||||
lint = [
|
||||
"ruff>=0.9.6"
|
||||
]
|
||||
# XXX, used for linux-only hi perf eventfd+shm channels
|
||||
# now mostly moved over to `hotbaud`.
|
||||
eventfd = [
|
||||
"cffi>=1.17.1",
|
||||
]
|
||||
subints = [
|
||||
"msgspec>=0.21.0",
|
||||
]
|
||||
# TODO, add these with sane versions; were originally in
|
||||
# `requirements-docs.txt`..
|
||||
# docs = [
|
||||
# "sphinx>="
|
||||
# "sphinx_book_theme>="
|
||||
# ]
|
||||
|
||||
# ------ dependency-groups ------
|
||||
|
||||
# ------ dependency-groups ------
|
||||
[tool.uv.dependency-groups]
|
||||
# for subints, we require 3.14+ due to 2 issues,
|
||||
# - hanging behaviour for various multi-task teardown cases (see
|
||||
# "Availability" section in the `tractor.spawn._subints` doc string).
|
||||
# - `msgspec` support which is oustanding per PEP 684 upstream tracker:
|
||||
# https://github.com/jcrist/msgspec/issues/563
|
||||
#
|
||||
# https://docs.astral.sh/uv/concepts/projects/dependencies/#group-requires-python
|
||||
subints = {requires-python = ">=3.14"}
|
||||
eventfd = {requires-python = ">=3.13, <3.14"}
|
||||
sync_pause = {requires-python = ">=3.13, <3.14"}
|
||||
|
||||
[tool.uv.sources]
|
||||
# XXX NOTE, only for @goodboy's hacking on `pprint(sort_dicts=False)`
|
||||
# for the `pp` alias..
|
||||
# pdbp = { path = "../pdbp", editable = true }
|
||||
# ------ gh upstream ------
|
||||
# xonsh = { git = 'https://github.com/anki-code/xonsh.git', branch = 'prompt_next_suggestion' }
|
||||
# ^ https://github.com/xonsh/xonsh/pull/6048
|
||||
# xonsh = { git = 'https://github.com/xonsh/xonsh.git', branch = 'main' }
|
||||
xonsh = { path = "../xonsh", editable = true }
|
||||
|
||||
# [tool.uv.sources.pdbp]
|
||||
# XXX, in case we need to tmp patch again.
|
||||
# git = "https://github.com/goodboy/pdbp.git"
|
||||
# branch ="repair_stack_trace_frame_indexing"
|
||||
# path = "../pdbp"
|
||||
# editable = true
|
||||
|
||||
# ------ tool.uv.sources ------
|
||||
# TODO, distributed (multi-host) extensions
|
||||
|
|
@ -145,6 +198,8 @@ all_bullets = true
|
|||
|
||||
[tool.pytest.ini_options]
|
||||
minversion = '6.0'
|
||||
timeout = 200 # per-test hard limit
|
||||
# https://docs.pytest.org/en/stable/reference/reference.html#configuration-options
|
||||
testpaths = [
|
||||
'tests'
|
||||
]
|
||||
|
|
@ -155,10 +210,17 @@ addopts = [
|
|||
'--import-mode=importlib',
|
||||
# don't show frickin captured logs AGAIN in the report..
|
||||
'--show-capture=no',
|
||||
|
||||
# disable `xonsh` plugin
|
||||
# https://docs.pytest.org/en/stable/how-to/plugins.html#disabling-plugins-from-autoloading
|
||||
# https://docs.pytest.org/en/stable/how-to/plugins.html#deactivating-unregistering-a-plugin-by-name
|
||||
'-p no:xonsh'
|
||||
]
|
||||
log_cli = false
|
||||
# TODO: maybe some of these layout choices?
|
||||
# https://docs.pytest.org/en/8.0.x/explanation/goodpractices.html#choosing-a-test-layout-import-rules
|
||||
# pythonpath = "src"
|
||||
|
||||
# https://docs.pytest.org/en/stable/reference/reference.html#confval-console_output_style
|
||||
console_output_style = 'progress'
|
||||
# ------ tool.pytest ------
|
||||
|
|
|
|||
|
|
@ -1,8 +0,0 @@
|
|||
# vim: ft=ini
|
||||
# pytest.ini for tractor
|
||||
|
||||
[pytest]
|
||||
# don't show frickin captured logs AGAIN in the report..
|
||||
addopts = --show-capture='no'
|
||||
log_cli = false
|
||||
; minversion = 6.0
|
||||
|
|
@ -35,8 +35,8 @@ exclude = [
|
|||
line-length = 88
|
||||
indent-width = 4
|
||||
|
||||
# Assume Python 3.9
|
||||
target-version = "py311"
|
||||
# assume latest minor cpython
|
||||
target-version = "py313"
|
||||
|
||||
[lint]
|
||||
# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default.
|
||||
|
|
|
|||
|
|
@ -9,8 +9,11 @@ import os
|
|||
import signal
|
||||
import platform
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
|
||||
import pytest
|
||||
import tractor
|
||||
from tractor._testing import (
|
||||
examples_dir as examples_dir,
|
||||
tractor_test as tractor_test,
|
||||
|
|
@ -22,6 +25,8 @@ pytest_plugins: list[str] = [
|
|||
'tractor._testing.pytest',
|
||||
]
|
||||
|
||||
_ci_env: bool = os.environ.get('CI', False)
|
||||
_non_linux: bool = platform.system() != 'Linux'
|
||||
|
||||
# Sending signal.SIGINT on subprocess fails on windows. Use CTRL_* alternatives
|
||||
if platform.system() == 'Windows':
|
||||
|
|
@ -34,9 +39,8 @@ else:
|
|||
_INT_SIGNAL = signal.SIGINT
|
||||
_INT_RETURN_CODE = 1 if sys.version_info < (3, 8) else -signal.SIGINT.value
|
||||
_PROC_SPAWN_WAIT = (
|
||||
0.6
|
||||
if sys.version_info < (3, 7)
|
||||
else 0.4
|
||||
2 if _ci_env
|
||||
else 1
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -44,6 +48,80 @@ no_windows = pytest.mark.skipif(
|
|||
platform.system() == "Windows",
|
||||
reason="Test is unsupported on windows",
|
||||
)
|
||||
no_macos = pytest.mark.skipif(
|
||||
platform.system() == "Darwin",
|
||||
reason="Test is unsupported on MacOS",
|
||||
)
|
||||
|
||||
|
||||
def get_cpu_state(
|
||||
icpu: int = 0,
|
||||
setting: Literal[
|
||||
'scaling_governor',
|
||||
'*_pstate_max_freq',
|
||||
'scaling_max_freq',
|
||||
# 'scaling_cur_freq',
|
||||
] = '*_pstate_max_freq',
|
||||
) -> tuple[
|
||||
Path,
|
||||
str|int,
|
||||
]|None:
|
||||
'''
|
||||
Attempt to read the (first) CPU's setting according
|
||||
to the set `setting` from under the file-sys,
|
||||
|
||||
/sys/devices/system/cpu/cpu0/cpufreq/{setting}
|
||||
|
||||
Useful to determine latency headroom for various perf affected
|
||||
test suites.
|
||||
|
||||
'''
|
||||
try:
|
||||
# Read governor for core 0 (usually same for all)
|
||||
setting_path: Path = list(
|
||||
Path(f'/sys/devices/system/cpu/cpu{icpu}/cpufreq/')
|
||||
.glob(f'{setting}')
|
||||
)[0] # <- XXX must be single match!
|
||||
with open(
|
||||
setting_path,
|
||||
'r',
|
||||
) as f:
|
||||
return (
|
||||
setting_path,
|
||||
f.read().strip(),
|
||||
)
|
||||
except (FileNotFoundError, IndexError):
|
||||
return None
|
||||
|
||||
|
||||
def cpu_scaling_factor() -> float:
|
||||
'''
|
||||
Return a latency-headroom multiplier (>= 1.0) reflecting how
|
||||
much to inflate time-limits when CPU-freq scaling is active on
|
||||
linux.
|
||||
|
||||
When no scaling info is available (non-linux, missing sysfs),
|
||||
returns 1.0 (i.e. no headroom adjustment needed).
|
||||
|
||||
'''
|
||||
if _non_linux:
|
||||
return 1.
|
||||
|
||||
mx = get_cpu_state()
|
||||
cur = get_cpu_state(setting='scaling_max_freq')
|
||||
if mx is None or cur is None:
|
||||
return 1.
|
||||
|
||||
_mx_pth, max_freq = mx
|
||||
_cur_pth, cur_freq = cur
|
||||
cpu_scaled: float = int(cur_freq) / int(max_freq)
|
||||
|
||||
if cpu_scaled != 1.:
|
||||
return 1. / (
|
||||
cpu_scaled * 2 # <- bc likely "dual threaded"
|
||||
)
|
||||
|
||||
return 1.
|
||||
|
||||
|
||||
def pytest_addoption(
|
||||
|
|
@ -61,16 +139,54 @@ def pytest_addoption(
|
|||
|
||||
|
||||
@pytest.fixture(scope='session', autouse=True)
|
||||
def loglevel(request):
|
||||
def loglevel(
|
||||
request: pytest.FixtureRequest,
|
||||
) -> str:
|
||||
import tractor
|
||||
orig = tractor.log._default_loglevel
|
||||
level = tractor.log._default_loglevel = request.config.option.loglevel
|
||||
tractor.log.get_console_log(level)
|
||||
log = tractor.log.get_console_log(
|
||||
level=level,
|
||||
name='tractor', # <- enable root logger
|
||||
)
|
||||
log.info(
|
||||
f'Test-harness set runtime loglevel: {level!r}\n'
|
||||
)
|
||||
yield level
|
||||
tractor.log._default_loglevel = orig
|
||||
|
||||
|
||||
_ci_env: bool = os.environ.get('CI', False)
|
||||
@pytest.fixture(scope='function')
|
||||
def test_log(
|
||||
request: pytest.FixtureRequest,
|
||||
loglevel: str,
|
||||
) -> tractor.log.StackLevelAdapter:
|
||||
'''
|
||||
Deliver a per test-module-fn logger instance for reporting from
|
||||
within actual test bodies/fixtures.
|
||||
|
||||
For example this can be handy to report certain error cases from
|
||||
exception handlers using `test_log.exception()`.
|
||||
|
||||
'''
|
||||
modname: str = request.function.__module__
|
||||
log = tractor.log.get_logger(
|
||||
name=modname, # <- enable root logger
|
||||
# pkg_name='tests',
|
||||
)
|
||||
_log = tractor.log.get_console_log(
|
||||
level=loglevel,
|
||||
logger=log,
|
||||
name=modname,
|
||||
# pkg_name='tests',
|
||||
)
|
||||
_log.debug(
|
||||
f'In-test-logging requested\n'
|
||||
f'test_log.name: {log.name!r}\n'
|
||||
f'level: {loglevel!r}\n'
|
||||
|
||||
)
|
||||
yield _log
|
||||
|
||||
|
||||
@pytest.fixture(scope='session')
|
||||
|
|
@ -85,15 +201,39 @@ def ci_env() -> bool:
|
|||
def sig_prog(
|
||||
proc: subprocess.Popen,
|
||||
sig: int,
|
||||
canc_timeout: float = 0.1,
|
||||
canc_timeout: float = 0.2,
|
||||
tries: int = 3,
|
||||
) -> int:
|
||||
"Kill the actor-process with ``sig``."
|
||||
proc.send_signal(sig)
|
||||
time.sleep(canc_timeout)
|
||||
if not proc.poll():
|
||||
'''
|
||||
Kill the actor-process with `sig`.
|
||||
|
||||
Prefer to kill with the provided signal and
|
||||
failing a `canc_timeout`, send a `SIKILL`-like
|
||||
to ensure termination.
|
||||
|
||||
'''
|
||||
for i in range(tries):
|
||||
proc.send_signal(sig)
|
||||
if proc.poll() is None:
|
||||
print(
|
||||
f'WARNING, proc still alive after,\n'
|
||||
f'canc_timeout={canc_timeout!r}\n'
|
||||
f'sig={sig!r}\n'
|
||||
f'\n'
|
||||
f'{proc.args!r}\n'
|
||||
)
|
||||
time.sleep(canc_timeout)
|
||||
else:
|
||||
# TODO: why sometimes does SIGINT not work on teardown?
|
||||
# seems to happen only when trace logging enabled?
|
||||
proc.send_signal(_KILL_SIGNAL)
|
||||
if proc.poll() is None:
|
||||
print(
|
||||
f'XXX WARNING KILLING PROG WITH SIGINT XXX\n'
|
||||
f'canc_timeout={canc_timeout!r}\n'
|
||||
f'{proc.args!r}\n'
|
||||
)
|
||||
proc.send_signal(_KILL_SIGNAL)
|
||||
|
||||
ret: int = proc.wait()
|
||||
assert ret
|
||||
|
||||
|
|
@ -106,6 +246,8 @@ def daemon(
|
|||
testdir: pytest.Pytester,
|
||||
reg_addr: tuple[str, int],
|
||||
tpt_proto: str,
|
||||
ci_env: bool,
|
||||
test_log: tractor.log.StackLevelAdapter,
|
||||
|
||||
) -> subprocess.Popen:
|
||||
'''
|
||||
|
|
@ -121,10 +263,12 @@ def daemon(
|
|||
"import tractor; "
|
||||
"tractor.run_daemon([], "
|
||||
"registry_addrs={reg_addrs}, "
|
||||
"enable_transports={enable_tpts}, "
|
||||
"debug_mode={debug_mode}, "
|
||||
"loglevel={ll})"
|
||||
).format(
|
||||
reg_addrs=str([reg_addr]),
|
||||
enable_tpts=str([tpt_proto]),
|
||||
ll="'{}'".format(loglevel) if loglevel else None,
|
||||
debug_mode=debug_mode,
|
||||
)
|
||||
|
|
@ -143,13 +287,25 @@ def daemon(
|
|||
**kwargs,
|
||||
)
|
||||
|
||||
# TODO! we should poll for the registry socket-bind to take place
|
||||
# and only once that's done yield to the requester!
|
||||
# -[ ] TCP: use the `._root.open_root_actor()`::`ping_tpt_socket()`
|
||||
# closure!
|
||||
# -[ ] UDS: can we do something similar for 'pinging" the
|
||||
# file-socket?
|
||||
#
|
||||
global _PROC_SPAWN_WAIT
|
||||
# UDS sockets are **really** fast to bind()/listen()/connect()
|
||||
# so it's often required that we delay a bit more starting
|
||||
# the first actor-tree..
|
||||
if tpt_proto == 'uds':
|
||||
global _PROC_SPAWN_WAIT
|
||||
_PROC_SPAWN_WAIT = 0.6
|
||||
_PROC_SPAWN_WAIT += 1.6
|
||||
|
||||
if _non_linux and ci_env:
|
||||
_PROC_SPAWN_WAIT += 1
|
||||
|
||||
# XXX, allow time for the sub-py-proc to boot up.
|
||||
# !TODO, see ping-polling ideas above!
|
||||
time.sleep(_PROC_SPAWN_WAIT)
|
||||
|
||||
assert not proc.returncode
|
||||
|
|
@ -159,18 +315,30 @@ def daemon(
|
|||
# XXX! yeah.. just be reaaal careful with this bc sometimes it
|
||||
# can lock up on the `_io.BufferedReader` and hang..
|
||||
stderr: str = proc.stderr.read().decode()
|
||||
if stderr:
|
||||
stdout: str = proc.stdout.read().decode()
|
||||
if (
|
||||
stderr
|
||||
or
|
||||
stdout
|
||||
):
|
||||
print(
|
||||
f'Daemon actor tree produced STDERR:\n'
|
||||
f'Daemon actor tree produced output:\n'
|
||||
f'{proc.args}\n'
|
||||
f'\n'
|
||||
f'{stderr}\n'
|
||||
f'stderr: {stderr!r}\n'
|
||||
f'stdout: {stdout!r}\n'
|
||||
)
|
||||
if proc.returncode != -2:
|
||||
raise RuntimeError(
|
||||
'Daemon actor tree failed !?\n'
|
||||
f'{proc.args}\n'
|
||||
|
||||
if (rc := proc.returncode) != -2:
|
||||
msg: str = (
|
||||
f'Daemon actor tree was not cancelled !?\n'
|
||||
f'proc.args: {proc.args!r}\n'
|
||||
f'proc.returncode: {rc!r}\n'
|
||||
)
|
||||
if rc < 0:
|
||||
raise RuntimeError(msg)
|
||||
|
||||
test_log.error(msg)
|
||||
|
||||
|
||||
# @pytest.fixture(autouse=True)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
'''
|
||||
from __future__ import annotations
|
||||
import platform
|
||||
import signal
|
||||
import time
|
||||
from typing import (
|
||||
Callable,
|
||||
|
|
@ -32,9 +34,23 @@ if TYPE_CHECKING:
|
|||
from pexpect import pty_spawn
|
||||
|
||||
|
||||
_non_linux: bool = platform.system() != 'Linux'
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
# register custom marks to avoid warnings see,
|
||||
# https://docs.pytest.org/en/stable/how-to/writing_plugins.html#registering-custom-markers
|
||||
config.addinivalue_line(
|
||||
'markers',
|
||||
'ctlcs_bish: test will (likely) not behave under SIGINT..'
|
||||
)
|
||||
|
||||
# a fn that sub-instantiates a `pexpect.spawn()`
|
||||
# and returns it.
|
||||
type PexpectSpawner = Callable[[str], pty_spawn.spawn]
|
||||
type PexpectSpawner = Callable[
|
||||
[str],
|
||||
pty_spawn.spawn,
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
@ -64,26 +80,65 @@ def spawn(
|
|||
|
||||
'''
|
||||
import os
|
||||
# disable colored tbs
|
||||
os.environ['PYTHON_COLORS'] = '0'
|
||||
# disable all ANSI color output
|
||||
# os.environ['NO_COLOR'] = '1'
|
||||
|
||||
spawned: PexpectSpawner|None = None
|
||||
|
||||
def _spawn(
|
||||
cmd: str,
|
||||
expect_timeout: float = 4,
|
||||
**mkcmd_kwargs,
|
||||
) -> pty_spawn.spawn:
|
||||
nonlocal spawned
|
||||
unset_colors()
|
||||
return testdir.spawn(
|
||||
spawned = testdir.spawn(
|
||||
cmd=mk_cmd(
|
||||
cmd,
|
||||
**mkcmd_kwargs,
|
||||
),
|
||||
expect_timeout=3,
|
||||
expect_timeout=(timeout:=(
|
||||
expect_timeout + 6
|
||||
if _non_linux and _ci_env
|
||||
else expect_timeout
|
||||
)),
|
||||
# preexec_fn=unset_colors,
|
||||
# ^TODO? get `pytest` core to expose underlying
|
||||
# `pexpect.spawn()` stuff?
|
||||
)
|
||||
# sanity
|
||||
assert spawned.timeout == timeout
|
||||
return spawned
|
||||
|
||||
# such that test-dep can pass input script name.
|
||||
return _spawn # the `PexpectSpawner`, type alias.
|
||||
yield _spawn # the `PexpectSpawner`, type alias.
|
||||
|
||||
if (
|
||||
spawned
|
||||
and
|
||||
(ptyproc := spawned.ptyproc)
|
||||
):
|
||||
start: float = time.time()
|
||||
timeout: float = 5
|
||||
while (
|
||||
ptyproc.isalive()
|
||||
and
|
||||
(
|
||||
(_time_took := (time.time() - start))
|
||||
<
|
||||
timeout
|
||||
)
|
||||
):
|
||||
ptyproc.kill(signal.SIGINT)
|
||||
time.sleep(0.01)
|
||||
|
||||
if ptyproc.isalive():
|
||||
ptyproc.kill(signal.SIGKILL)
|
||||
|
||||
# TODO? ensure we've cleaned up any UDS-paths?
|
||||
# breakpoint()
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
|
|
@ -91,13 +146,12 @@ def spawn(
|
|||
ids='ctl-c={}'.format,
|
||||
)
|
||||
def ctlc(
|
||||
request,
|
||||
request: pytest.FixtureRequest,
|
||||
ci_env: bool,
|
||||
|
||||
) -> bool:
|
||||
|
||||
use_ctlc = request.param
|
||||
|
||||
use_ctlc: bool = request.param
|
||||
node = request.node
|
||||
markers = node.own_markers
|
||||
for mark in markers:
|
||||
|
|
@ -109,7 +163,13 @@ def ctlc(
|
|||
'https://github.com/goodboy/tractor/issues/320'
|
||||
)
|
||||
|
||||
if mark.name == 'ctlcs_bish':
|
||||
if (
|
||||
mark.name == 'ctlcs_bish'
|
||||
and
|
||||
use_ctlc
|
||||
and
|
||||
all(mark.args)
|
||||
):
|
||||
pytest.skip(
|
||||
f'Test {node} prolly uses something from the stdlib (namely `asyncio`..)\n'
|
||||
f'The test and/or underlying example script can *sometimes* run fine '
|
||||
|
|
@ -214,12 +274,13 @@ def assert_before(
|
|||
err_on_false=True,
|
||||
**kwargs
|
||||
)
|
||||
return str(child.before.decode())
|
||||
|
||||
|
||||
def do_ctlc(
|
||||
child,
|
||||
count: int = 3,
|
||||
delay: float = 0.1,
|
||||
delay: float|None = None,
|
||||
patt: str|None = None,
|
||||
|
||||
# expect repl UX to reprint the prompt after every
|
||||
|
|
@ -231,6 +292,7 @@ def do_ctlc(
|
|||
) -> str|None:
|
||||
|
||||
before: str|None = None
|
||||
delay = delay or 0.1
|
||||
|
||||
# make sure ctl-c sends don't do anything but repeat output
|
||||
for _ in range(count):
|
||||
|
|
@ -241,7 +303,10 @@ def do_ctlc(
|
|||
# if you run this test manually it works just fine..
|
||||
if expect_prompt:
|
||||
time.sleep(delay)
|
||||
child.expect(PROMPT)
|
||||
child.expect(
|
||||
PROMPT,
|
||||
timeout=(child.timeout * 2) if _ci_env else child.timeout,
|
||||
)
|
||||
before = str(child.before.decode())
|
||||
time.sleep(delay)
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,9 @@ from .conftest import (
|
|||
in_prompt_msg,
|
||||
assert_before,
|
||||
)
|
||||
from ..conftest import (
|
||||
_ci_env,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..conftest import PexpectSpawner
|
||||
|
|
@ -51,13 +54,14 @@ if TYPE_CHECKING:
|
|||
# - recurrent root errors
|
||||
|
||||
|
||||
_non_linux: bool = platform.system() != 'Linux'
|
||||
|
||||
if platform.system() == 'Windows':
|
||||
pytest.skip(
|
||||
'Debugger tests have no windows support (yet)',
|
||||
allow_module_level=True,
|
||||
)
|
||||
|
||||
|
||||
# TODO: was trying to this xfail style but some weird bug i see in CI
|
||||
# that's happening at collect time.. pretty soon gonna dump actions i'm
|
||||
# thinkin...
|
||||
|
|
@ -193,6 +197,11 @@ def test_root_actor_bp_forever(
|
|||
child.expect(EOF)
|
||||
|
||||
|
||||
# skip on non-Linux CI
|
||||
@pytest.mark.ctlcs_bish(
|
||||
_non_linux,
|
||||
_ci_env,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
'do_next',
|
||||
(True, False),
|
||||
|
|
@ -258,6 +267,11 @@ def test_subactor_error(
|
|||
child.expect(EOF)
|
||||
|
||||
|
||||
# skip on non-Linux CI
|
||||
@pytest.mark.ctlcs_bish(
|
||||
_non_linux,
|
||||
_ci_env,
|
||||
)
|
||||
def test_subactor_breakpoint(
|
||||
spawn,
|
||||
ctlc: bool,
|
||||
|
|
@ -480,8 +494,24 @@ def test_multi_daemon_subactors(
|
|||
stream.
|
||||
|
||||
'''
|
||||
child = spawn('multi_daemon_subactors')
|
||||
non_linux = _non_linux
|
||||
if non_linux and ctlc:
|
||||
pytest.skip(
|
||||
'Ctl-c + MacOS is too unreliable/racy for this test..\n'
|
||||
)
|
||||
# !TODO, if someone with more patience then i wants to muck
|
||||
# with the timings on this please feel free to see all the
|
||||
# `non_linux` branching logic i added on my first attempt
|
||||
# below!
|
||||
#
|
||||
# my conclusion was that if i were to run the script
|
||||
# manually, and thus as slowly as a human would, the test
|
||||
# would and should pass as described in this test fn, however
|
||||
# after fighting with it for >= 1hr. i decided more then
|
||||
# likely the more extensive `linux` testing should cover most
|
||||
# regressions.
|
||||
|
||||
child = spawn('multi_daemon_subactors')
|
||||
child.expect(PROMPT)
|
||||
|
||||
# there can be a race for which subactor will acquire
|
||||
|
|
@ -511,8 +541,19 @@ def test_multi_daemon_subactors(
|
|||
else:
|
||||
raise ValueError('Neither log msg was found !?')
|
||||
|
||||
non_linux_delay: float = 0.3
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
do_ctlc(
|
||||
child,
|
||||
delay=(
|
||||
non_linux_delay
|
||||
if non_linux
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
if non_linux:
|
||||
time.sleep(1)
|
||||
|
||||
# NOTE: previously since we did not have clobber prevention
|
||||
# in the root actor this final resume could result in the debugger
|
||||
|
|
@ -543,33 +584,66 @@ def test_multi_daemon_subactors(
|
|||
# assert "in use by child ('bp_forever'," in before
|
||||
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
do_ctlc(
|
||||
child,
|
||||
delay=(
|
||||
non_linux_delay
|
||||
if non_linux
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
if non_linux:
|
||||
time.sleep(1)
|
||||
|
||||
# expect another breakpoint actor entry
|
||||
child.sendline('c')
|
||||
child.expect(PROMPT)
|
||||
|
||||
try:
|
||||
assert_before(
|
||||
before: str = assert_before(
|
||||
child,
|
||||
bp_forev_parts,
|
||||
)
|
||||
except AssertionError:
|
||||
assert_before(
|
||||
before: str = assert_before(
|
||||
child,
|
||||
name_error_parts,
|
||||
)
|
||||
|
||||
else:
|
||||
if ctlc:
|
||||
do_ctlc(child)
|
||||
before: str = do_ctlc(
|
||||
child,
|
||||
delay=(
|
||||
non_linux_delay
|
||||
if non_linux
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
if non_linux:
|
||||
time.sleep(1)
|
||||
|
||||
# should crash with the 2nd name error (simulates
|
||||
# a retry) and then the root eventually (boxed) errors
|
||||
# after 1 or more further bp actor entries.
|
||||
|
||||
child.sendline('c')
|
||||
child.expect(PROMPT)
|
||||
try:
|
||||
child.expect(
|
||||
PROMPT,
|
||||
timeout=3,
|
||||
)
|
||||
except EOF:
|
||||
before: str = child.before.decode()
|
||||
print(
|
||||
f'\n'
|
||||
f'??? NEVER RXED `pdb` PROMPT ???\n'
|
||||
f'\n'
|
||||
f'{before}\n'
|
||||
)
|
||||
raise
|
||||
|
||||
assert_before(
|
||||
child,
|
||||
name_error_parts,
|
||||
|
|
@ -689,7 +763,8 @@ def test_multi_subactors_root_errors(
|
|||
|
||||
@has_nested_actors
|
||||
def test_multi_nested_subactors_error_through_nurseries(
|
||||
spawn,
|
||||
ci_env: bool,
|
||||
spawn: PexpectSpawner,
|
||||
|
||||
# TODO: address debugger issue for nested tree:
|
||||
# https://github.com/goodboy/tractor/issues/320
|
||||
|
|
@ -710,9 +785,29 @@ def test_multi_nested_subactors_error_through_nurseries(
|
|||
|
||||
# timed_out_early: bool = False
|
||||
|
||||
for send_char in itertools.cycle(['c', 'q']):
|
||||
for (
|
||||
i,
|
||||
send_char,
|
||||
) in enumerate(itertools.cycle(['c', 'q'])):
|
||||
|
||||
timeout: float = -1
|
||||
if (
|
||||
_non_linux
|
||||
and
|
||||
ci_env
|
||||
):
|
||||
timeout: float = 6
|
||||
|
||||
# XXX linux but the first crash sequence
|
||||
# can take longer to arrive at a prompt.
|
||||
elif i == 0:
|
||||
timeout = 5
|
||||
|
||||
try:
|
||||
child.expect(PROMPT)
|
||||
child.expect(
|
||||
PROMPT,
|
||||
timeout=timeout,
|
||||
)
|
||||
child.sendline(send_char)
|
||||
time.sleep(0.01)
|
||||
|
||||
|
|
@ -889,6 +984,11 @@ def test_different_debug_mode_per_actor(
|
|||
)
|
||||
|
||||
|
||||
# skip on non-Linux CI
|
||||
@pytest.mark.ctlcs_bish(
|
||||
_non_linux,
|
||||
_ci_env,
|
||||
)
|
||||
def test_post_mortem_api(
|
||||
spawn,
|
||||
ctlc: bool,
|
||||
|
|
@ -1133,12 +1233,21 @@ def test_ctxep_pauses_n_maybe_ipc_breaks(
|
|||
# closed so verify we see error reporting as well as
|
||||
# a failed crash-REPL request msg and can CTL-c our way
|
||||
# out.
|
||||
|
||||
# ?TODO, match depending on `tpt_proto(s)`?
|
||||
# - [ ] how can we pass it into the script tho?
|
||||
tpt: str = 'UDS'
|
||||
if _non_linux:
|
||||
tpt: str = 'TCP'
|
||||
|
||||
assert_before(
|
||||
child,
|
||||
['peer IPC channel closed abruptly?',
|
||||
'another task closed this fd',
|
||||
'Debug lock request was CANCELLED?',
|
||||
"TransportClosed: 'MsgpackUDSStream' was already closed locally ?",]
|
||||
f"'Msgpack{tpt}Stream' was already closed locally?",
|
||||
f"TransportClosed: 'Msgpack{tpt}Stream' was already closed 'by peer'?",
|
||||
]
|
||||
|
||||
# XXX races on whether these show/hit?
|
||||
# 'Failed to REPl via `_pause()` You called `tractor.pause()` from an already cancelled scope!',
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@ from .conftest import (
|
|||
PROMPT,
|
||||
_pause_msg,
|
||||
)
|
||||
from ..conftest import (
|
||||
no_macos,
|
||||
)
|
||||
|
||||
import pytest
|
||||
from pexpect.exceptions import (
|
||||
|
|
@ -42,6 +45,7 @@ if TYPE_CHECKING:
|
|||
from ..conftest import PexpectSpawner
|
||||
|
||||
|
||||
@no_macos
|
||||
def test_shield_pause(
|
||||
spawn: PexpectSpawner,
|
||||
):
|
||||
|
|
@ -57,6 +61,7 @@ def test_shield_pause(
|
|||
expect(
|
||||
child,
|
||||
'Yo my child hanging..?',
|
||||
timeout=3,
|
||||
)
|
||||
assert_before(
|
||||
child,
|
||||
|
|
@ -121,7 +126,7 @@ def test_shield_pause(
|
|||
child.pid,
|
||||
signal.SIGINT,
|
||||
)
|
||||
from tractor._supervise import _shutdown_msg
|
||||
from tractor.runtime._supervise import _shutdown_msg
|
||||
expect(
|
||||
child,
|
||||
# 'Shutting down actor runtime',
|
||||
|
|
|
|||
|
|
@ -0,0 +1,376 @@
|
|||
'''
|
||||
Multiaddr construction, parsing, and round-trip tests for
|
||||
`tractor.discovery._multiaddr.mk_maddr()` and
|
||||
`tractor.discovery._multiaddr.parse_maddr()`.
|
||||
|
||||
'''
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from multiaddr import Multiaddr
|
||||
|
||||
from tractor.ipc._tcp import TCPAddress
|
||||
from tractor.ipc._uds import UDSAddress
|
||||
from tractor.discovery._multiaddr import (
|
||||
mk_maddr,
|
||||
parse_maddr,
|
||||
parse_endpoints,
|
||||
_tpt_proto_to_maddr,
|
||||
_maddr_to_tpt_proto,
|
||||
)
|
||||
from tractor.discovery._addr import wrap_address
|
||||
|
||||
|
||||
def test_tpt_proto_to_maddr_mapping():
|
||||
'''
|
||||
`_tpt_proto_to_maddr` maps all supported `proto_key`
|
||||
values to their correct multiaddr protocol names.
|
||||
|
||||
'''
|
||||
assert _tpt_proto_to_maddr['tcp'] == 'tcp'
|
||||
assert _tpt_proto_to_maddr['uds'] == 'unix'
|
||||
assert len(_tpt_proto_to_maddr) == 2
|
||||
|
||||
|
||||
def test_mk_maddr_tcp_ipv4():
|
||||
'''
|
||||
`mk_maddr()` on a `TCPAddress` with an IPv4 host
|
||||
produces the correct `/ip4/<host>/tcp/<port>` multiaddr.
|
||||
|
||||
'''
|
||||
addr = TCPAddress('127.0.0.1', 1234)
|
||||
result: Multiaddr = mk_maddr(addr)
|
||||
|
||||
assert isinstance(result, Multiaddr)
|
||||
assert str(result) == '/ip4/127.0.0.1/tcp/1234'
|
||||
|
||||
protos = result.protocols()
|
||||
assert protos[0].name == 'ip4'
|
||||
assert protos[1].name == 'tcp'
|
||||
|
||||
assert result.value_for_protocol('ip4') == '127.0.0.1'
|
||||
assert result.value_for_protocol('tcp') == '1234'
|
||||
|
||||
|
||||
def test_mk_maddr_tcp_ipv6():
|
||||
'''
|
||||
`mk_maddr()` on a `TCPAddress` with an IPv6 host
|
||||
produces the correct `/ip6/<host>/tcp/<port>` multiaddr.
|
||||
|
||||
'''
|
||||
addr = TCPAddress('::1', 5678)
|
||||
result: Multiaddr = mk_maddr(addr)
|
||||
|
||||
assert str(result) == '/ip6/::1/tcp/5678'
|
||||
|
||||
protos = result.protocols()
|
||||
assert protos[0].name == 'ip6'
|
||||
assert protos[1].name == 'tcp'
|
||||
|
||||
|
||||
def test_mk_maddr_uds():
|
||||
'''
|
||||
`mk_maddr()` on a `UDSAddress` produces a `/unix/<path>`
|
||||
multiaddr containing the full socket path.
|
||||
|
||||
'''
|
||||
# NOTE, use an absolute `filedir` to match real runtime
|
||||
# UDS paths; `mk_maddr()` strips the leading `/` to avoid
|
||||
# the double-slash `/unix//run/..` that py-multiaddr
|
||||
# rejects as "empty protocol path".
|
||||
filedir = '/tmp/tractor_test'
|
||||
filename = 'test_sock.sock'
|
||||
addr = UDSAddress(
|
||||
filedir=filedir,
|
||||
filename=filename,
|
||||
)
|
||||
result: Multiaddr = mk_maddr(addr)
|
||||
|
||||
assert isinstance(result, Multiaddr)
|
||||
|
||||
result_str: str = str(result)
|
||||
assert result_str.startswith('/unix/')
|
||||
# verify the leading `/` was stripped to avoid double-slash
|
||||
assert '/unix/tmp/tractor_test/' in result_str
|
||||
|
||||
sockpath_rel: str = str(
|
||||
Path(filedir) / filename
|
||||
).lstrip('/')
|
||||
unix_val: str = result.value_for_protocol('unix')
|
||||
assert unix_val.endswith(sockpath_rel)
|
||||
|
||||
|
||||
def test_mk_maddr_unsupported_proto_key():
|
||||
'''
|
||||
`mk_maddr()` raises `ValueError` for an unsupported
|
||||
`proto_key`.
|
||||
|
||||
'''
|
||||
fake_addr = SimpleNamespace(proto_key='quic')
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match='Unsupported proto_key',
|
||||
):
|
||||
mk_maddr(fake_addr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'addr',
|
||||
[
|
||||
pytest.param(
|
||||
TCPAddress('127.0.0.1', 9999),
|
||||
id='tcp-ipv4',
|
||||
),
|
||||
pytest.param(
|
||||
UDSAddress(
|
||||
filedir='/tmp/tractor_rt',
|
||||
filename='roundtrip.sock',
|
||||
),
|
||||
id='uds',
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_mk_maddr_roundtrip(addr):
|
||||
'''
|
||||
`mk_maddr()` output is valid multiaddr syntax that the
|
||||
library can re-parse back into an equivalent `Multiaddr`.
|
||||
|
||||
'''
|
||||
maddr: Multiaddr = mk_maddr(addr)
|
||||
reparsed = Multiaddr(str(maddr))
|
||||
|
||||
assert reparsed == maddr
|
||||
assert str(reparsed) == str(maddr)
|
||||
|
||||
|
||||
# ------ parse_maddr() tests ------
|
||||
|
||||
def test_maddr_to_tpt_proto_mapping():
|
||||
'''
|
||||
`_maddr_to_tpt_proto` is the exact inverse of
|
||||
`_tpt_proto_to_maddr`.
|
||||
|
||||
'''
|
||||
assert _maddr_to_tpt_proto == {
|
||||
'tcp': 'tcp',
|
||||
'unix': 'uds',
|
||||
}
|
||||
|
||||
|
||||
def test_parse_maddr_tcp_ipv4():
|
||||
'''
|
||||
`parse_maddr()` on an IPv4 TCP multiaddr string
|
||||
produce a `TCPAddress` with the correct host and port.
|
||||
|
||||
'''
|
||||
result = parse_maddr('/ip4/127.0.0.1/tcp/1234')
|
||||
|
||||
assert isinstance(result, TCPAddress)
|
||||
assert result.unwrap() == ('127.0.0.1', 1234)
|
||||
|
||||
|
||||
def test_parse_maddr_tcp_ipv6():
|
||||
'''
|
||||
`parse_maddr()` on an IPv6 TCP multiaddr string
|
||||
produce a `TCPAddress` with the correct host and port.
|
||||
|
||||
'''
|
||||
result = parse_maddr('/ip6/::1/tcp/5678')
|
||||
|
||||
assert isinstance(result, TCPAddress)
|
||||
assert result.unwrap() == ('::1', 5678)
|
||||
|
||||
|
||||
def test_parse_maddr_uds():
|
||||
'''
|
||||
`parse_maddr()` on a `/unix/...` multiaddr string
|
||||
produce a `UDSAddress` with the correct dir and filename,
|
||||
preserving absolute path semantics.
|
||||
|
||||
'''
|
||||
result = parse_maddr('/unix/tmp/tractor_test/test.sock')
|
||||
|
||||
assert isinstance(result, UDSAddress)
|
||||
filedir, filename = result.unwrap()
|
||||
assert filename == 'test.sock'
|
||||
assert str(filedir) == '/tmp/tractor_test'
|
||||
|
||||
|
||||
def test_parse_maddr_unsupported():
|
||||
'''
|
||||
`parse_maddr()` raise `ValueError` for an unsupported
|
||||
protocol combination like UDP.
|
||||
|
||||
'''
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match='Unsupported multiaddr protocol combo',
|
||||
):
|
||||
parse_maddr('/ip4/127.0.0.1/udp/1234')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'addr',
|
||||
[
|
||||
pytest.param(
|
||||
TCPAddress('127.0.0.1', 9999),
|
||||
id='tcp-ipv4',
|
||||
),
|
||||
pytest.param(
|
||||
UDSAddress(
|
||||
filedir='/tmp/tractor_rt',
|
||||
filename='roundtrip.sock',
|
||||
),
|
||||
id='uds',
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_parse_maddr_roundtrip(addr):
|
||||
'''
|
||||
Full round-trip: `addr -> mk_maddr -> str -> parse_maddr`
|
||||
produce an `Address` whose `.unwrap()` matches the original.
|
||||
|
||||
'''
|
||||
maddr: Multiaddr = mk_maddr(addr)
|
||||
maddr_str: str = str(maddr)
|
||||
parsed = parse_maddr(maddr_str)
|
||||
|
||||
assert type(parsed) is type(addr)
|
||||
assert parsed.unwrap() == addr.unwrap()
|
||||
|
||||
|
||||
def test_wrap_address_maddr_str():
|
||||
'''
|
||||
`wrap_address()` accept a multiaddr-format string and
|
||||
return the correct `Address` type.
|
||||
|
||||
'''
|
||||
result = wrap_address('/ip4/127.0.0.1/tcp/9999')
|
||||
|
||||
assert isinstance(result, TCPAddress)
|
||||
assert result.unwrap() == ('127.0.0.1', 9999)
|
||||
|
||||
|
||||
# ------ parse_endpoints() tests ------
|
||||
|
||||
def test_parse_endpoints_tcp_only():
|
||||
'''
|
||||
`parse_endpoints()` with a single TCP maddr per actor
|
||||
produce the correct `TCPAddress` instances.
|
||||
|
||||
'''
|
||||
table = {
|
||||
'registry': ['/ip4/127.0.0.1/tcp/1616'],
|
||||
'data_feed': ['/ip4/0.0.0.0/tcp/5555'],
|
||||
}
|
||||
result = parse_endpoints(table)
|
||||
|
||||
assert set(result.keys()) == {'registry', 'data_feed'}
|
||||
|
||||
reg_addr = result['registry'][0]
|
||||
assert isinstance(reg_addr, TCPAddress)
|
||||
assert reg_addr.unwrap() == ('127.0.0.1', 1616)
|
||||
|
||||
feed_addr = result['data_feed'][0]
|
||||
assert isinstance(feed_addr, TCPAddress)
|
||||
assert feed_addr.unwrap() == ('0.0.0.0', 5555)
|
||||
|
||||
|
||||
def test_parse_endpoints_mixed_tpts():
|
||||
'''
|
||||
`parse_endpoints()` with both TCP and UDS maddrs for
|
||||
the same actor produce the correct mixed `Address` list.
|
||||
|
||||
'''
|
||||
table = {
|
||||
'broker': [
|
||||
'/ip4/127.0.0.1/tcp/4040',
|
||||
'/unix/tmp/tractor/broker.sock',
|
||||
],
|
||||
}
|
||||
result = parse_endpoints(table)
|
||||
addrs = result['broker']
|
||||
|
||||
assert len(addrs) == 2
|
||||
assert isinstance(addrs[0], TCPAddress)
|
||||
assert addrs[0].unwrap() == ('127.0.0.1', 4040)
|
||||
|
||||
assert isinstance(addrs[1], UDSAddress)
|
||||
filedir, filename = addrs[1].unwrap()
|
||||
assert filename == 'broker.sock'
|
||||
assert str(filedir) == '/tmp/tractor'
|
||||
|
||||
|
||||
def test_parse_endpoints_unwrapped_tuples():
|
||||
'''
|
||||
`parse_endpoints()` accept raw `(host, port)` tuples
|
||||
and wrap them as `TCPAddress`.
|
||||
|
||||
'''
|
||||
table = {
|
||||
'ems': [('127.0.0.1', 6666)],
|
||||
}
|
||||
result = parse_endpoints(table)
|
||||
|
||||
addr = result['ems'][0]
|
||||
assert isinstance(addr, TCPAddress)
|
||||
assert addr.unwrap() == ('127.0.0.1', 6666)
|
||||
|
||||
|
||||
def test_parse_endpoints_mixed_str_and_tuple():
|
||||
'''
|
||||
`parse_endpoints()` accept a mix of maddr strings and
|
||||
raw tuples in the same actor entry list.
|
||||
|
||||
'''
|
||||
table = {
|
||||
'quoter': [
|
||||
'/ip4/127.0.0.1/tcp/7777',
|
||||
('127.0.0.1', 8888),
|
||||
],
|
||||
}
|
||||
result = parse_endpoints(table)
|
||||
addrs = result['quoter']
|
||||
|
||||
assert len(addrs) == 2
|
||||
assert isinstance(addrs[0], TCPAddress)
|
||||
assert addrs[0].unwrap() == ('127.0.0.1', 7777)
|
||||
|
||||
assert isinstance(addrs[1], TCPAddress)
|
||||
assert addrs[1].unwrap() == ('127.0.0.1', 8888)
|
||||
|
||||
|
||||
def test_parse_endpoints_unsupported_proto():
|
||||
'''
|
||||
`parse_endpoints()` raise `ValueError` when a maddr
|
||||
string uses an unsupported protocol like `/udp/`.
|
||||
|
||||
'''
|
||||
table = {
|
||||
'bad_actor': ['/ip4/127.0.0.1/udp/9999'],
|
||||
}
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match='Unsupported multiaddr protocol combo',
|
||||
):
|
||||
parse_endpoints(table)
|
||||
|
||||
|
||||
def test_parse_endpoints_empty_table():
|
||||
'''
|
||||
`parse_endpoints()` on an empty table return an empty
|
||||
dict.
|
||||
|
||||
'''
|
||||
assert parse_endpoints({}) == {}
|
||||
|
||||
|
||||
def test_parse_endpoints_empty_actor_list():
|
||||
'''
|
||||
`parse_endpoints()` with an actor mapped to an empty
|
||||
list preserve the key with an empty list value.
|
||||
|
||||
'''
|
||||
result = parse_endpoints({'x': []})
|
||||
assert result == {'x': []}
|
||||
|
|
@ -1,25 +1,33 @@
|
|||
"""
|
||||
Actor "discovery" testing
|
||||
"""
|
||||
'''
|
||||
Discovery subsystem via a "registrar" actor scenarios.
|
||||
|
||||
'''
|
||||
import os
|
||||
import signal
|
||||
import platform
|
||||
from functools import partial
|
||||
import itertools
|
||||
import time
|
||||
from typing import Callable
|
||||
|
||||
import psutil
|
||||
import pytest
|
||||
import subprocess
|
||||
import tractor
|
||||
from tractor.devx import dump_on_hang
|
||||
from tractor.trionics import collapse_eg
|
||||
from tractor._testing import tractor_test
|
||||
from tractor.discovery._addr import wrap_address
|
||||
from tractor.discovery._multiaddr import mk_maddr
|
||||
import trio
|
||||
|
||||
|
||||
@tractor_test
|
||||
async def test_reg_then_unreg(reg_addr):
|
||||
async def test_reg_then_unreg(
|
||||
reg_addr: tuple,
|
||||
):
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_arbiter
|
||||
assert actor.is_registrar
|
||||
assert len(actor._registry) == 1 # only self is registered
|
||||
|
||||
async with tractor.open_nursery(
|
||||
|
|
@ -27,10 +35,10 @@ async def test_reg_then_unreg(reg_addr):
|
|||
) as n:
|
||||
|
||||
portal = await n.start_actor('actor', enable_modules=[__name__])
|
||||
uid = portal.channel.uid
|
||||
uid = portal.channel.aid.uid
|
||||
|
||||
async with tractor.get_registry(reg_addr) as aportal:
|
||||
# this local actor should be the arbiter
|
||||
# this local actor should be the registrar
|
||||
assert actor is aportal.actor
|
||||
|
||||
async with tractor.wait_for_actor('actor'):
|
||||
|
|
@ -48,6 +56,49 @@ async def test_reg_then_unreg(reg_addr):
|
|||
assert not sockaddrs
|
||||
|
||||
|
||||
@tractor_test
|
||||
async def test_reg_then_unreg_maddr(
|
||||
reg_addr: tuple,
|
||||
):
|
||||
'''
|
||||
Same as `test_reg_then_unreg` but pass the registry
|
||||
address as a multiaddr string to verify `wrap_address()`
|
||||
multiaddr parsing end-to-end through the runtime.
|
||||
|
||||
'''
|
||||
# tuple -> Address -> multiaddr string
|
||||
addr_obj = wrap_address(reg_addr)
|
||||
maddr_str: str = str(mk_maddr(addr_obj))
|
||||
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_registrar
|
||||
|
||||
async with tractor.open_nursery(
|
||||
registry_addrs=[maddr_str],
|
||||
) as n:
|
||||
|
||||
portal = await n.start_actor(
|
||||
'actor_maddr',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
uid = portal.channel.aid.uid
|
||||
|
||||
async with tractor.get_registry(maddr_str) as aportal:
|
||||
assert actor is aportal.actor
|
||||
|
||||
async with tractor.wait_for_actor('actor_maddr'):
|
||||
assert uid in aportal.actor._registry
|
||||
sockaddrs = actor._registry[uid]
|
||||
assert sockaddrs
|
||||
|
||||
await n.cancel()
|
||||
|
||||
await trio.sleep(0.1)
|
||||
assert uid not in aportal.actor._registry
|
||||
sockaddrs = actor._registry.get(uid)
|
||||
assert not sockaddrs
|
||||
|
||||
|
||||
the_line = 'Hi my name is {}'
|
||||
|
||||
|
||||
|
|
@ -81,12 +132,20 @@ async def say_hello_use_wait(
|
|||
return result
|
||||
|
||||
|
||||
@pytest.mark.timeout(
|
||||
3,
|
||||
method='thread',
|
||||
)
|
||||
@tractor_test
|
||||
@pytest.mark.parametrize('func', [say_hello, say_hello_use_wait])
|
||||
@pytest.mark.parametrize(
|
||||
'func',
|
||||
[say_hello,
|
||||
say_hello_use_wait]
|
||||
)
|
||||
async def test_trynamic_trio(
|
||||
func,
|
||||
start_method,
|
||||
reg_addr,
|
||||
func: Callable,
|
||||
start_method: str,
|
||||
reg_addr: tuple,
|
||||
):
|
||||
'''
|
||||
Root actor acting as the "director" and running one-shot-task-actors
|
||||
|
|
@ -119,7 +178,10 @@ async def stream_forever():
|
|||
await trio.sleep(0.01)
|
||||
|
||||
|
||||
async def cancel(use_signal, delay=0):
|
||||
async def cancel(
|
||||
use_signal: bool,
|
||||
delay: float = 0,
|
||||
):
|
||||
# hold on there sally
|
||||
await trio.sleep(delay)
|
||||
|
||||
|
|
@ -132,15 +194,17 @@ async def cancel(use_signal, delay=0):
|
|||
raise KeyboardInterrupt
|
||||
|
||||
|
||||
async def stream_from(portal):
|
||||
async def stream_from(portal: tractor.Portal):
|
||||
async with portal.open_stream_from(stream_forever) as stream:
|
||||
async for value in stream:
|
||||
print(value)
|
||||
|
||||
|
||||
async def unpack_reg(actor_or_portal):
|
||||
async def unpack_reg(
|
||||
actor_or_portal: tractor.Portal|tractor.Actor,
|
||||
):
|
||||
'''
|
||||
Get and unpack a "registry" RPC request from the "arbiter" registry
|
||||
Get and unpack a "registry" RPC request from the registrar
|
||||
system.
|
||||
|
||||
'''
|
||||
|
|
@ -149,7 +213,10 @@ async def unpack_reg(actor_or_portal):
|
|||
else:
|
||||
msg = await actor_or_portal.run_from_ns('self', 'get_registry')
|
||||
|
||||
return {tuple(key.split('.')): val for key, val in msg.items()}
|
||||
return {
|
||||
tuple(key.split('.')): val
|
||||
for key, val in msg.items()
|
||||
}
|
||||
|
||||
|
||||
async def spawn_and_check_registry(
|
||||
|
|
@ -173,24 +240,26 @@ async def spawn_and_check_registry(
|
|||
registry_addrs=[reg_addr],
|
||||
debug_mode=debug_mode,
|
||||
):
|
||||
async with tractor.get_registry(reg_addr) as portal:
|
||||
async with tractor.get_registry(
|
||||
addr=reg_addr,
|
||||
) as portal:
|
||||
# runtime needs to be up to call this
|
||||
actor = tractor.current_actor()
|
||||
|
||||
if remote_arbiter:
|
||||
assert not actor.is_arbiter
|
||||
assert not actor.is_registrar
|
||||
|
||||
if actor.is_arbiter:
|
||||
extra = 1 # arbiter is local root actor
|
||||
if actor.is_registrar:
|
||||
extra = 1 # registrar is local root actor
|
||||
get_reg = partial(unpack_reg, actor)
|
||||
|
||||
else:
|
||||
get_reg = partial(unpack_reg, portal)
|
||||
extra = 2 # local root actor + remote arbiter
|
||||
extra = 2 # local root actor + remote registrar
|
||||
|
||||
# ensure current actor is registered
|
||||
registry: dict = await get_reg()
|
||||
assert actor.uid in registry
|
||||
assert actor.aid.uid in registry
|
||||
|
||||
try:
|
||||
async with tractor.open_nursery() as an:
|
||||
|
|
@ -238,22 +307,35 @@ async def spawn_and_check_registry(
|
|||
|
||||
# all subactors should have de-registered
|
||||
registry = await get_reg()
|
||||
assert len(registry) == extra
|
||||
assert actor.uid in registry
|
||||
start: float = time.time()
|
||||
while (
|
||||
not (len(registry) == extra)
|
||||
and
|
||||
(time.time() - start) < 5
|
||||
):
|
||||
print(
|
||||
f'Waiting for remaining subs to dereg..\n'
|
||||
f'{registry!r}\n'
|
||||
)
|
||||
await trio.sleep(0.3)
|
||||
else:
|
||||
assert len(registry) == extra
|
||||
|
||||
assert actor.aid.uid in registry
|
||||
|
||||
|
||||
@pytest.mark.parametrize('use_signal', [False, True])
|
||||
@pytest.mark.parametrize('with_streaming', [False, True])
|
||||
def test_subactors_unregister_on_cancel(
|
||||
debug_mode: bool,
|
||||
start_method,
|
||||
use_signal,
|
||||
reg_addr,
|
||||
with_streaming,
|
||||
start_method: str,
|
||||
use_signal: bool,
|
||||
reg_addr: tuple,
|
||||
with_streaming: bool,
|
||||
):
|
||||
'''
|
||||
Verify that cancelling a nursery results in all subactors
|
||||
deregistering themselves with the arbiter.
|
||||
deregistering themselves with the registrar.
|
||||
|
||||
'''
|
||||
with pytest.raises(KeyboardInterrupt):
|
||||
|
|
@ -274,15 +356,17 @@ def test_subactors_unregister_on_cancel(
|
|||
def test_subactors_unregister_on_cancel_remote_daemon(
|
||||
daemon: subprocess.Popen,
|
||||
debug_mode: bool,
|
||||
start_method,
|
||||
use_signal,
|
||||
reg_addr,
|
||||
with_streaming,
|
||||
start_method: str,
|
||||
use_signal: bool,
|
||||
reg_addr: tuple,
|
||||
with_streaming: bool,
|
||||
):
|
||||
"""Verify that cancelling a nursery results in all subactors
|
||||
deregistering themselves with a **remote** (not in the local process
|
||||
tree) arbiter.
|
||||
"""
|
||||
'''
|
||||
Verify that cancelling a nursery results in all subactors
|
||||
deregistering themselves with a **remote** (not in the local
|
||||
process tree) registrar.
|
||||
|
||||
'''
|
||||
with pytest.raises(KeyboardInterrupt):
|
||||
trio.run(
|
||||
partial(
|
||||
|
|
@ -325,20 +409,24 @@ async def close_chans_before_nursery(
|
|||
try:
|
||||
get_reg = partial(unpack_reg, aportal)
|
||||
|
||||
async with tractor.open_nursery() as tn:
|
||||
portal1 = await tn.start_actor(
|
||||
name='consumer1', enable_modules=[__name__])
|
||||
portal2 = await tn.start_actor(
|
||||
'consumer2', enable_modules=[__name__])
|
||||
async with tractor.open_nursery() as an:
|
||||
portal1 = await an.start_actor(
|
||||
name='consumer1',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
portal2 = await an.start_actor(
|
||||
'consumer2',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
|
||||
# TODO: compact this back as was in last commit once
|
||||
# 3.9+, see https://github.com/goodboy/tractor/issues/207
|
||||
async with portal1.open_stream_from(
|
||||
stream_forever
|
||||
) as agen1:
|
||||
async with portal2.open_stream_from(
|
||||
async with (
|
||||
portal1.open_stream_from(
|
||||
stream_forever
|
||||
) as agen2:
|
||||
) as agen1,
|
||||
portal2.open_stream_from(
|
||||
stream_forever
|
||||
) as agen2,
|
||||
):
|
||||
async with (
|
||||
collapse_eg(),
|
||||
trio.open_nursery() as tn,
|
||||
|
|
@ -349,7 +437,7 @@ async def close_chans_before_nursery(
|
|||
await streamer(agen2)
|
||||
finally:
|
||||
# Kill the root nursery thus resulting in
|
||||
# normal arbiter channel ops to fail during
|
||||
# normal registrar channel ops to fail during
|
||||
# teardown. It doesn't seem like this is
|
||||
# reliably triggered by an external SIGINT.
|
||||
# tractor.current_actor()._root_nursery.cancel_scope.cancel()
|
||||
|
|
@ -361,27 +449,30 @@ async def close_chans_before_nursery(
|
|||
# also kill off channels cuz why not
|
||||
await agen1.aclose()
|
||||
await agen2.aclose()
|
||||
|
||||
finally:
|
||||
with trio.CancelScope(shield=True):
|
||||
await trio.sleep(1)
|
||||
|
||||
# all subactors should have de-registered
|
||||
registry = await get_reg()
|
||||
assert portal1.channel.uid not in registry
|
||||
assert portal2.channel.uid not in registry
|
||||
assert portal1.channel.aid.uid not in registry
|
||||
assert portal2.channel.aid.uid not in registry
|
||||
assert len(registry) == entries_at_end
|
||||
|
||||
|
||||
@pytest.mark.parametrize('use_signal', [False, True])
|
||||
def test_close_channel_explicit(
|
||||
start_method,
|
||||
use_signal,
|
||||
reg_addr,
|
||||
start_method: str,
|
||||
use_signal: bool,
|
||||
reg_addr: tuple,
|
||||
):
|
||||
"""Verify that closing a stream explicitly and killing the actor's
|
||||
'''
|
||||
Verify that closing a stream explicitly and killing the actor's
|
||||
"root nursery" **before** the containing nursery tears down also
|
||||
results in subactor(s) deregistering from the arbiter.
|
||||
"""
|
||||
results in subactor(s) deregistering from the registrar.
|
||||
|
||||
'''
|
||||
with pytest.raises(KeyboardInterrupt):
|
||||
trio.run(
|
||||
partial(
|
||||
|
|
@ -394,16 +485,18 @@ def test_close_channel_explicit(
|
|||
|
||||
|
||||
@pytest.mark.parametrize('use_signal', [False, True])
|
||||
def test_close_channel_explicit_remote_arbiter(
|
||||
def test_close_channel_explicit_remote_registrar(
|
||||
daemon: subprocess.Popen,
|
||||
start_method,
|
||||
use_signal,
|
||||
reg_addr,
|
||||
start_method: str,
|
||||
use_signal: bool,
|
||||
reg_addr: tuple,
|
||||
):
|
||||
"""Verify that closing a stream explicitly and killing the actor's
|
||||
'''
|
||||
Verify that closing a stream explicitly and killing the actor's
|
||||
"root nursery" **before** the containing nursery tears down also
|
||||
results in subactor(s) deregistering from the arbiter.
|
||||
"""
|
||||
results in subactor(s) deregistering from the registrar.
|
||||
|
||||
'''
|
||||
with pytest.raises(KeyboardInterrupt):
|
||||
trio.run(
|
||||
partial(
|
||||
|
|
@ -413,3 +506,138 @@ def test_close_channel_explicit_remote_arbiter(
|
|||
remote_arbiter=True,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@tractor.context
|
||||
async def kill_transport(
|
||||
ctx: tractor.Context,
|
||||
) -> None:
|
||||
|
||||
await ctx.started()
|
||||
actor: tractor.Actor = tractor.current_actor()
|
||||
actor.ipc_server.cancel()
|
||||
await trio.sleep_forever()
|
||||
|
||||
|
||||
|
||||
# Wall-clock bound via `pytest-timeout` (`method='thread'`).
|
||||
# Under `--spawn-backend=subint` this test can wedge in an
|
||||
# un-Ctrl-C-able state (abandoned-subint + shared-GIL
|
||||
# starvation → signal-wakeup-fd pipe fills → SIGINT silently
|
||||
# dropped; see `ai/conc-anal/subint_sigint_starvation_issue.md`).
|
||||
# `method='thread'` is specifically required because `signal`-
|
||||
# method SIGALRM suffers the same GIL-starvation path and
|
||||
# wouldn't fire the Python-level handler.
|
||||
# At timeout the plugin hard-kills the pytest process — that's
|
||||
# the intended behavior here; the alternative is an unattended
|
||||
# suite run that never returns.
|
||||
@pytest.mark.timeout(
|
||||
3, # NOTE should be a 2.1s happy path.
|
||||
method='thread',
|
||||
)
|
||||
@pytest.mark.skipon_spawn_backend(
|
||||
'subint',
|
||||
reason=(
|
||||
'XXX SUBINT HANGING TEST XXX\n'
|
||||
'See oustanding issue(s)\n'
|
||||
# TODO, put issue link!
|
||||
)
|
||||
)
|
||||
# @pytest.mark.parametrize('use_signal', [False, True])
|
||||
#
|
||||
def test_stale_entry_is_deleted(
|
||||
debug_mode: bool,
|
||||
daemon: subprocess.Popen,
|
||||
start_method: str,
|
||||
reg_addr: tuple,
|
||||
):
|
||||
'''
|
||||
Ensure that when a stale entry is detected in the registrar's
|
||||
table that the `find_actor()` API takes care of deleting the
|
||||
stale entry and not delivering a bad portal.
|
||||
|
||||
'''
|
||||
async def main():
|
||||
|
||||
name: str = 'transport_fails_actor'
|
||||
_reg_ptl: tractor.Portal
|
||||
an: tractor.ActorNursery
|
||||
async with (
|
||||
tractor.open_nursery(
|
||||
debug_mode=debug_mode,
|
||||
registry_addrs=[reg_addr],
|
||||
) as an,
|
||||
tractor.get_registry(reg_addr) as _reg_ptl,
|
||||
):
|
||||
ptl: tractor.Portal = await an.start_actor(
|
||||
name,
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
async with ptl.open_context(
|
||||
kill_transport,
|
||||
) as (first, ctx):
|
||||
async with tractor.find_actor(
|
||||
name,
|
||||
registry_addrs=[reg_addr],
|
||||
) as maybe_portal:
|
||||
# because the transitive
|
||||
# `._api.maybe_open_portal()` call should
|
||||
# fail and implicitly call `.delete_addr()`
|
||||
assert maybe_portal is None
|
||||
registry: dict = await unpack_reg(_reg_ptl)
|
||||
assert ptl.chan.aid.uid not in registry
|
||||
|
||||
# should fail since we knocked out the IPC tpt XD
|
||||
await ptl.cancel_actor()
|
||||
await an.cancel()
|
||||
|
||||
# TODO, remove once the `[subint]` variant no longer hangs.
|
||||
#
|
||||
# Status (as of Phase B hard-kill landing):
|
||||
#
|
||||
# - `[trio]`/`[mp_*]` variants: completes normally; `dump_on_hang`
|
||||
# is a no-op safety net here.
|
||||
#
|
||||
# - `[subint]` variant: hangs indefinitely AND is un-Ctrl-C-able.
|
||||
# `strace -p <pytest_pid>` while in the hang reveals a silently-
|
||||
# dropped SIGINT — the C signal handler tries to write the
|
||||
# signum byte to Python's signal-wakeup fd and gets `EAGAIN`,
|
||||
# meaning the pipe is full (nobody's draining it).
|
||||
#
|
||||
# Root-cause chain: our hard-kill in `spawn._subint` abandoned
|
||||
# the driver OS-thread (which is `daemon=True`) after the soft-
|
||||
# kill timeout, but the *sub-interpreter* inside that thread is
|
||||
# still running `trio.run()` — `_interpreters.destroy()` can't
|
||||
# force-stop a running subint (raises `InterpreterError`), and
|
||||
# legacy-config subints share the main GIL. The abandoned subint
|
||||
# starves the parent's trio event loop from iterating often
|
||||
# enough to drain its wakeup pipe → SIGINT silently drops.
|
||||
#
|
||||
# This is structurally a CPython-level limitation: there's no
|
||||
# public force-destroy primitive for a running subint. We
|
||||
# escape on the harness side via a SIGINT-loop in the `daemon`
|
||||
# fixture teardown (killing the bg registrar subproc closes its
|
||||
# end of the IPC, which eventually unblocks a recv in main trio,
|
||||
# which lets the loop drain the wakeup pipe). Long-term fix path:
|
||||
# msgspec PEP 684 support (jcrist/msgspec#563) → isolated-mode
|
||||
# subints with per-interp GIL.
|
||||
#
|
||||
# Full analysis:
|
||||
# `ai/conc-anal/subint_sigint_starvation_issue.md`
|
||||
#
|
||||
# See also the *sibling* hang class documented in
|
||||
# `ai/conc-anal/subint_cancel_delivery_hang_issue.md` — same
|
||||
# subint backend, different root cause (Ctrl-C-able hang, main
|
||||
# trio loop iterating fine; ours to fix, not CPython's).
|
||||
# Reproduced by `tests/test_subint_cancellation.py
|
||||
# ::test_subint_non_checkpointing_child`.
|
||||
#
|
||||
# Kept here (and not behind a `pytestmark.skip`) so we can still
|
||||
# inspect the dump file if the hang ever returns after a refactor.
|
||||
# `pytest`'s stderr capture eats `faulthandler` output otherwise,
|
||||
# so we route `dump_on_hang` to a file.
|
||||
with dump_on_hang(
|
||||
seconds=20,
|
||||
path=f'/tmp/test_stale_entry_is_deleted_{start_method}.dump',
|
||||
):
|
||||
trio.run(main)
|
||||
|
|
@ -0,0 +1,345 @@
|
|||
'''
|
||||
`open_root_actor(tpt_bind_addrs=...)` test suite.
|
||||
|
||||
Verify all three runtime code paths for explicit IPC-server
|
||||
bind-address selection in `_root.py`:
|
||||
|
||||
1. Non-registrar, no explicit bind -> random addrs from registry proto
|
||||
2. Registrar, no explicit bind -> binds to registry_addrs
|
||||
3. Explicit bind given -> wraps via `wrap_address()` and uses them
|
||||
|
||||
'''
|
||||
import pytest
|
||||
import trio
|
||||
import tractor
|
||||
from tractor.discovery._addr import (
|
||||
wrap_address,
|
||||
)
|
||||
from tractor.discovery._multiaddr import mk_maddr
|
||||
from tractor._testing.addr import get_rando_addr
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# helpers
|
||||
# ------------------------------------------------------------------
|
||||
def _bound_bindspaces(
|
||||
actor: tractor.Actor,
|
||||
) -> set[str]:
|
||||
'''
|
||||
Collect the set of bindspace strings from the actor's
|
||||
currently bound IPC-server accept addresses.
|
||||
|
||||
'''
|
||||
return {
|
||||
wrap_address(a).bindspace
|
||||
for a in actor.accept_addrs
|
||||
}
|
||||
|
||||
|
||||
def _bound_wrapped(
|
||||
actor: tractor.Actor,
|
||||
) -> list:
|
||||
'''
|
||||
Return the actor's accept addrs as wrapped `Address` objects.
|
||||
|
||||
'''
|
||||
return [
|
||||
wrap_address(a)
|
||||
for a in actor.accept_addrs
|
||||
]
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 1) Registrar + explicit tpt_bind_addrs
|
||||
# ------------------------------------------------------------------
|
||||
@pytest.mark.parametrize(
|
||||
'addr_combo',
|
||||
[
|
||||
'bind-eq-reg',
|
||||
'bind-subset-reg',
|
||||
'bind-disjoint-reg',
|
||||
],
|
||||
ids=lambda v: v,
|
||||
)
|
||||
def test_registrar_root_tpt_bind_addrs(
|
||||
reg_addr: tuple,
|
||||
tpt_proto: str,
|
||||
debug_mode: bool,
|
||||
addr_combo: str,
|
||||
):
|
||||
'''
|
||||
Registrar root-actor with explicit `tpt_bind_addrs`:
|
||||
bound set must include all registry + all bind addr bindspaces
|
||||
(merge behavior).
|
||||
|
||||
'''
|
||||
reg_wrapped = wrap_address(reg_addr)
|
||||
|
||||
if addr_combo == 'bind-eq-reg':
|
||||
bind_addrs = [reg_addr]
|
||||
# extra secondary reg addr for subset test
|
||||
extra_reg = []
|
||||
|
||||
elif addr_combo == 'bind-subset-reg':
|
||||
second_reg = get_rando_addr(tpt_proto)
|
||||
bind_addrs = [reg_addr]
|
||||
extra_reg = [second_reg]
|
||||
|
||||
elif addr_combo == 'bind-disjoint-reg':
|
||||
# port=0 on same host -> completely different addr
|
||||
rando = wrap_address(reg_addr).get_random(
|
||||
bindspace=reg_wrapped.bindspace,
|
||||
)
|
||||
bind_addrs = [rando.unwrap()]
|
||||
extra_reg = []
|
||||
|
||||
all_reg = [reg_addr] + extra_reg
|
||||
|
||||
async def _main():
|
||||
async with tractor.open_root_actor(
|
||||
registry_addrs=all_reg,
|
||||
tpt_bind_addrs=bind_addrs,
|
||||
debug_mode=debug_mode,
|
||||
):
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_registrar
|
||||
|
||||
bound = actor.accept_addrs
|
||||
bound_bs = _bound_bindspaces(actor)
|
||||
|
||||
# all registry bindspaces must appear in bound set
|
||||
for ra in all_reg:
|
||||
assert wrap_address(ra).bindspace in bound_bs
|
||||
|
||||
# all bind-addr bindspaces must appear
|
||||
for ba in bind_addrs:
|
||||
assert wrap_address(ba).bindspace in bound_bs
|
||||
|
||||
# registry addr must appear verbatim in bound
|
||||
# (after wrapping both sides for comparison)
|
||||
bound_w = _bound_wrapped(actor)
|
||||
assert reg_wrapped in bound_w
|
||||
|
||||
if addr_combo == 'bind-disjoint-reg':
|
||||
assert len(bound) >= 2
|
||||
|
||||
trio.run(_main)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'addr_combo',
|
||||
[
|
||||
'bind-same-bindspace',
|
||||
'bind-disjoint',
|
||||
],
|
||||
ids=lambda v: v,
|
||||
)
|
||||
def test_non_registrar_root_tpt_bind_addrs(
|
||||
daemon,
|
||||
reg_addr: tuple,
|
||||
tpt_proto: str,
|
||||
debug_mode: bool,
|
||||
addr_combo: str,
|
||||
):
|
||||
'''
|
||||
Non-registrar root with explicit `tpt_bind_addrs`:
|
||||
bound set must exactly match the requested bind addrs
|
||||
(no merge with registry).
|
||||
|
||||
'''
|
||||
reg_wrapped = wrap_address(reg_addr)
|
||||
|
||||
if addr_combo == 'bind-same-bindspace':
|
||||
# same bindspace as reg but port=0 so we get a random port
|
||||
rando = reg_wrapped.get_random(
|
||||
bindspace=reg_wrapped.bindspace,
|
||||
)
|
||||
bind_addrs = [rando.unwrap()]
|
||||
|
||||
elif addr_combo == 'bind-disjoint':
|
||||
rando = reg_wrapped.get_random(
|
||||
bindspace=reg_wrapped.bindspace,
|
||||
)
|
||||
bind_addrs = [rando.unwrap()]
|
||||
|
||||
async def _main():
|
||||
async with tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
tpt_bind_addrs=bind_addrs,
|
||||
debug_mode=debug_mode,
|
||||
):
|
||||
actor = tractor.current_actor()
|
||||
assert not actor.is_registrar
|
||||
|
||||
bound = actor.accept_addrs
|
||||
assert len(bound) == len(bind_addrs)
|
||||
|
||||
# bindspaces must match
|
||||
bound_bs = _bound_bindspaces(actor)
|
||||
for ba in bind_addrs:
|
||||
assert wrap_address(ba).bindspace in bound_bs
|
||||
|
||||
# TCP port=0 should resolve to a real port
|
||||
for uw_addr in bound:
|
||||
w = wrap_address(uw_addr)
|
||||
if w.proto_key == 'tcp':
|
||||
_host, port = uw_addr
|
||||
assert port > 0
|
||||
|
||||
trio.run(_main)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 3) Non-registrar, default random bind (baseline)
|
||||
# ------------------------------------------------------------------
|
||||
def test_non_registrar_default_random_bind(
|
||||
daemon,
|
||||
reg_addr: tuple,
|
||||
debug_mode: bool,
|
||||
):
|
||||
'''
|
||||
Baseline: no `tpt_bind_addrs`, daemon running.
|
||||
Bound bindspace matches registry bindspace,
|
||||
but bound addr differs from reg_addr (random).
|
||||
|
||||
'''
|
||||
reg_wrapped = wrap_address(reg_addr)
|
||||
|
||||
async def _main():
|
||||
async with tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
debug_mode=debug_mode,
|
||||
):
|
||||
actor = tractor.current_actor()
|
||||
assert not actor.is_registrar
|
||||
|
||||
bound_bs = _bound_bindspaces(actor)
|
||||
assert reg_wrapped.bindspace in bound_bs
|
||||
|
||||
# bound addr should differ from the registry addr
|
||||
# (the runtime picks a random port/path)
|
||||
bound_w = _bound_wrapped(actor)
|
||||
assert reg_wrapped not in bound_w
|
||||
|
||||
trio.run(_main)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 4) Multiaddr string input
|
||||
# ------------------------------------------------------------------
|
||||
def test_tpt_bind_addrs_as_maddr_str(
|
||||
reg_addr: tuple,
|
||||
debug_mode: bool,
|
||||
):
|
||||
'''
|
||||
Pass multiaddr strings as `tpt_bind_addrs`.
|
||||
Runtime should parse and bind successfully.
|
||||
|
||||
'''
|
||||
reg_wrapped = wrap_address(reg_addr)
|
||||
# build a port-0 / random maddr string for binding
|
||||
rando = reg_wrapped.get_random(
|
||||
bindspace=reg_wrapped.bindspace,
|
||||
)
|
||||
maddr_str: str = str(mk_maddr(rando))
|
||||
|
||||
async def _main():
|
||||
async with tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
tpt_bind_addrs=[maddr_str],
|
||||
debug_mode=debug_mode,
|
||||
):
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_registrar
|
||||
|
||||
for uw_addr in actor.accept_addrs:
|
||||
w = wrap_address(uw_addr)
|
||||
if w.proto_key == 'tcp':
|
||||
_host, port = uw_addr
|
||||
assert port > 0
|
||||
|
||||
trio.run(_main)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 5) Registrar merge produces union of binds
|
||||
# ------------------------------------------------------------------
|
||||
def test_registrar_merge_binds_union(
|
||||
tpt_proto: str,
|
||||
debug_mode: bool,
|
||||
):
|
||||
'''
|
||||
Registrar + disjoint bind addr: bound set must include
|
||||
both registry and explicit bind addresses.
|
||||
|
||||
'''
|
||||
reg_addr = get_rando_addr(tpt_proto)
|
||||
reg_wrapped = wrap_address(reg_addr)
|
||||
|
||||
rando = reg_wrapped.get_random(
|
||||
bindspace=reg_wrapped.bindspace,
|
||||
)
|
||||
bind_addrs = [rando.unwrap()]
|
||||
|
||||
# NOTE: for UDS, `get_random()` produces the same
|
||||
# filename for the same pid+actor-state, so the
|
||||
# "disjoint" premise only holds when the addrs
|
||||
# actually differ (always true for TCP, may
|
||||
# collide for UDS).
|
||||
expect_disjoint: bool = (
|
||||
tuple(reg_addr) != rando.unwrap()
|
||||
)
|
||||
|
||||
async def _main():
|
||||
async with tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
tpt_bind_addrs=bind_addrs,
|
||||
debug_mode=debug_mode,
|
||||
):
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_registrar
|
||||
|
||||
bound = actor.accept_addrs
|
||||
bound_w = _bound_wrapped(actor)
|
||||
|
||||
if expect_disjoint:
|
||||
# must have at least 2 (registry + bind)
|
||||
assert len(bound) >= 2
|
||||
|
||||
# registry addr must appear in bound set
|
||||
assert reg_wrapped in bound_w
|
||||
|
||||
trio.run(_main)
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 6) open_nursery forwards tpt_bind_addrs
|
||||
# ------------------------------------------------------------------
|
||||
def test_open_nursery_forwards_tpt_bind_addrs(
|
||||
reg_addr: tuple,
|
||||
debug_mode: bool,
|
||||
):
|
||||
'''
|
||||
`open_nursery(tpt_bind_addrs=...)` forwards through
|
||||
`**kwargs` to `open_root_actor()`.
|
||||
|
||||
'''
|
||||
reg_wrapped = wrap_address(reg_addr)
|
||||
rando = reg_wrapped.get_random(
|
||||
bindspace=reg_wrapped.bindspace,
|
||||
)
|
||||
bind_addrs = [rando.unwrap()]
|
||||
|
||||
async def _main():
|
||||
async with tractor.open_nursery(
|
||||
registry_addrs=[reg_addr],
|
||||
tpt_bind_addrs=bind_addrs,
|
||||
debug_mode=debug_mode,
|
||||
):
|
||||
actor = tractor.current_actor()
|
||||
bound_bs = _bound_bindspaces(actor)
|
||||
|
||||
for ba in bind_addrs:
|
||||
assert wrap_address(ba).bindspace in bound_bs
|
||||
|
||||
trio.run(_main)
|
||||
|
|
@ -8,17 +8,16 @@ from pathlib import Path
|
|||
import pytest
|
||||
import trio
|
||||
import tractor
|
||||
from tractor import (
|
||||
Actor,
|
||||
_state,
|
||||
_addr,
|
||||
)
|
||||
from tractor import Actor
|
||||
from tractor.runtime import _state
|
||||
from tractor.discovery import _addr
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def bindspace_dir_str() -> str:
|
||||
|
||||
rt_dir: Path = tractor._state.get_rt_dir()
|
||||
from tractor.runtime._state import get_rt_dir
|
||||
rt_dir: Path = get_rt_dir()
|
||||
bs_dir: Path = rt_dir / 'doggy'
|
||||
bs_dir_str: str = str(bs_dir)
|
||||
assert not bs_dir.is_dir()
|
||||
|
|
|
|||
|
|
@ -13,9 +13,9 @@ from tractor import (
|
|||
Portal,
|
||||
ipc,
|
||||
msg,
|
||||
_state,
|
||||
_addr,
|
||||
)
|
||||
from tractor.runtime import _state
|
||||
from tractor.discovery import _addr
|
||||
|
||||
@tractor.context
|
||||
async def chk_tpts(
|
||||
|
|
@ -62,6 +62,13 @@ def test_root_passes_tpt_to_sub(
|
|||
reg_addr: tuple,
|
||||
debug_mode: bool,
|
||||
):
|
||||
# XXX NOTE, the `reg_addr` addr won't be the same type as the
|
||||
# `tpt_proto_key` would deliver here unless you pass `--tpt-proto
|
||||
# <tpt_proto_key>` on the CLI.
|
||||
#
|
||||
# if tpt_proto_key == 'uds':
|
||||
# breakpoint()
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(
|
||||
enable_transports=[tpt_proto_key],
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
'''
|
||||
`tractor.msg.*` sub-sys test suite.
|
||||
|
||||
'''
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
'''
|
||||
`tractor.msg.*` test sub-pkg conf.
|
||||
|
||||
'''
|
||||
|
|
@ -61,7 +61,7 @@ async def maybe_expect_raises(
|
|||
Async wrapper for ensuring errors propagate from the inner scope.
|
||||
|
||||
'''
|
||||
if tractor._state.debug_mode():
|
||||
if tractor.debug_mode():
|
||||
timeout += 999
|
||||
|
||||
with trio.fail_after(timeout):
|
||||
|
|
@ -0,0 +1,240 @@
|
|||
'''
|
||||
Unit tests for `tractor.msg.pretty_struct`
|
||||
private-field filtering in `pformat()`.
|
||||
|
||||
'''
|
||||
import pytest
|
||||
|
||||
from tractor.msg.pretty_struct import (
|
||||
Struct,
|
||||
pformat,
|
||||
iter_struct_ppfmt_lines,
|
||||
)
|
||||
from tractor.msg._codec import (
|
||||
MsgDec,
|
||||
mk_dec,
|
||||
)
|
||||
|
||||
|
||||
# ------ test struct definitions ------ #
|
||||
|
||||
class PublicOnly(Struct):
|
||||
'''
|
||||
All-public fields for baseline testing.
|
||||
|
||||
'''
|
||||
name: str = 'alice'
|
||||
age: int = 30
|
||||
|
||||
|
||||
class PrivateOnly(Struct):
|
||||
'''
|
||||
Only underscore-prefixed (private) fields.
|
||||
|
||||
'''
|
||||
_secret: str = 'hidden'
|
||||
_internal: int = 99
|
||||
|
||||
|
||||
class MixedFields(Struct):
|
||||
'''
|
||||
Mix of public and private fields.
|
||||
|
||||
'''
|
||||
name: str = 'bob'
|
||||
_hidden: int = 42
|
||||
value: float = 3.14
|
||||
_meta: str = 'internal'
|
||||
|
||||
|
||||
class Inner(
|
||||
Struct,
|
||||
frozen=True,
|
||||
):
|
||||
'''
|
||||
Frozen inner struct with a private field,
|
||||
for nesting tests.
|
||||
|
||||
'''
|
||||
x: int = 1
|
||||
_secret: str = 'nope'
|
||||
|
||||
|
||||
class Outer(Struct):
|
||||
'''
|
||||
Outer struct nesting an `Inner`.
|
||||
|
||||
'''
|
||||
label: str = 'outer'
|
||||
inner: Inner = Inner()
|
||||
|
||||
|
||||
class EmptyStruct(Struct):
|
||||
'''
|
||||
Struct with zero fields.
|
||||
|
||||
'''
|
||||
pass
|
||||
|
||||
|
||||
# ------ tests ------ #
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'struct_and_expected',
|
||||
[
|
||||
(
|
||||
PublicOnly(),
|
||||
{
|
||||
'shown': ['name', 'age'],
|
||||
'hidden': [],
|
||||
},
|
||||
),
|
||||
(
|
||||
MixedFields(),
|
||||
{
|
||||
'shown': ['name', 'value'],
|
||||
'hidden': ['_hidden', '_meta'],
|
||||
},
|
||||
),
|
||||
(
|
||||
PrivateOnly(),
|
||||
{
|
||||
'shown': [],
|
||||
'hidden': ['_secret', '_internal'],
|
||||
},
|
||||
),
|
||||
],
|
||||
ids=[
|
||||
'all-public',
|
||||
'mixed-pub-priv',
|
||||
'all-private',
|
||||
],
|
||||
)
|
||||
def test_field_visibility_in_pformat(
|
||||
struct_and_expected: tuple[
|
||||
Struct,
|
||||
dict[str, list[str]],
|
||||
],
|
||||
):
|
||||
'''
|
||||
Verify `pformat()` shows public fields
|
||||
and hides `_`-prefixed private fields.
|
||||
|
||||
'''
|
||||
(
|
||||
struct,
|
||||
expected,
|
||||
) = struct_and_expected
|
||||
output: str = pformat(struct)
|
||||
|
||||
for field_name in expected['shown']:
|
||||
assert field_name in output, (
|
||||
f'{field_name!r} should appear in:\n'
|
||||
f'{output}'
|
||||
)
|
||||
|
||||
for field_name in expected['hidden']:
|
||||
assert field_name not in output, (
|
||||
f'{field_name!r} should NOT appear in:\n'
|
||||
f'{output}'
|
||||
)
|
||||
|
||||
|
||||
def test_iter_ppfmt_lines_skips_private():
|
||||
'''
|
||||
Directly verify `iter_struct_ppfmt_lines()`
|
||||
never yields tuples with `_`-prefixed field
|
||||
names.
|
||||
|
||||
'''
|
||||
struct = MixedFields()
|
||||
lines: list[tuple[str, str]] = list(
|
||||
iter_struct_ppfmt_lines(
|
||||
struct,
|
||||
field_indent=2,
|
||||
)
|
||||
)
|
||||
# should have lines for public fields only
|
||||
assert len(lines) == 2
|
||||
|
||||
for _prefix, line_content in lines:
|
||||
field_name: str = (
|
||||
line_content.split(':')[0].strip()
|
||||
)
|
||||
assert not field_name.startswith('_'), (
|
||||
f'private field leaked: {field_name!r}'
|
||||
)
|
||||
|
||||
|
||||
def test_nested_struct_filters_inner_private():
|
||||
'''
|
||||
Verify that nested struct's private fields
|
||||
are also filtered out during recursion.
|
||||
|
||||
'''
|
||||
outer = Outer()
|
||||
output: str = pformat(outer)
|
||||
|
||||
# outer's public field
|
||||
assert 'label' in output
|
||||
|
||||
# inner's public field (recursed into)
|
||||
assert 'x' in output
|
||||
|
||||
# inner's private field must be hidden
|
||||
assert '_secret' not in output
|
||||
|
||||
|
||||
def test_empty_struct_pformat():
|
||||
'''
|
||||
An empty struct should produce a valid
|
||||
`pformat()` result with no field lines.
|
||||
|
||||
'''
|
||||
output: str = pformat(EmptyStruct())
|
||||
assert 'EmptyStruct(' in output
|
||||
assert output.rstrip().endswith(')')
|
||||
|
||||
# no field lines => only struct header+footer
|
||||
lines: list[tuple[str, str]] = list(
|
||||
iter_struct_ppfmt_lines(
|
||||
EmptyStruct(),
|
||||
field_indent=2,
|
||||
)
|
||||
)
|
||||
assert lines == []
|
||||
|
||||
|
||||
def test_real_msgdec_pformat_hides_private():
|
||||
'''
|
||||
Verify `pformat()` on a real `MsgDec`
|
||||
hides the `_dec` internal field.
|
||||
|
||||
NOTE: `MsgDec.__repr__` is custom and does
|
||||
NOT call `pformat()`, so we call it directly.
|
||||
|
||||
'''
|
||||
dec: MsgDec = mk_dec(spec=int)
|
||||
output: str = pformat(dec)
|
||||
|
||||
# the private `_dec` field should be filtered
|
||||
assert '_dec' not in output
|
||||
|
||||
# but the struct type name should be present
|
||||
assert 'MsgDec(' in output
|
||||
|
||||
|
||||
def test_pformat_repr_integration():
|
||||
'''
|
||||
Verify that `Struct.__repr__()` (which calls
|
||||
`pformat()`) also hides private fields for
|
||||
custom structs that do NOT override `__repr__`.
|
||||
|
||||
'''
|
||||
mixed = MixedFields()
|
||||
output: str = repr(mixed)
|
||||
|
||||
assert 'name' in output
|
||||
assert 'value' in output
|
||||
assert '_hidden' not in output
|
||||
assert '_meta' not in output
|
||||
|
|
@ -1,7 +1,12 @@
|
|||
"""
|
||||
Bidirectional streaming.
|
||||
'''
|
||||
Audit the simplest inter-actor bidirectional (streaming)
|
||||
msg patterns.
|
||||
|
||||
"""
|
||||
'''
|
||||
from __future__ import annotations
|
||||
from typing import (
|
||||
Callable,
|
||||
)
|
||||
import pytest
|
||||
import trio
|
||||
import tractor
|
||||
|
|
@ -9,10 +14,8 @@ import tractor
|
|||
|
||||
@tractor.context
|
||||
async def simple_rpc(
|
||||
|
||||
ctx: tractor.Context,
|
||||
data: int,
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Test a small ping-pong server.
|
||||
|
|
@ -39,15 +42,13 @@ async def simple_rpc(
|
|||
|
||||
@tractor.context
|
||||
async def simple_rpc_with_forloop(
|
||||
|
||||
ctx: tractor.Context,
|
||||
data: int,
|
||||
|
||||
) -> None:
|
||||
"""Same as previous test but using ``async for`` syntax/api.
|
||||
|
||||
"""
|
||||
'''
|
||||
Same as previous test but using `async for` syntax/api.
|
||||
|
||||
'''
|
||||
# signal to parent that we're up
|
||||
await ctx.started(data + 1)
|
||||
|
||||
|
|
@ -68,62 +69,78 @@ async def simple_rpc_with_forloop(
|
|||
|
||||
@pytest.mark.parametrize(
|
||||
'use_async_for',
|
||||
[True, False],
|
||||
[
|
||||
True,
|
||||
False,
|
||||
],
|
||||
ids='use_async_for={}'.format,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
'server_func',
|
||||
[simple_rpc, simple_rpc_with_forloop],
|
||||
[
|
||||
simple_rpc,
|
||||
simple_rpc_with_forloop,
|
||||
],
|
||||
ids='server_func={}'.format,
|
||||
)
|
||||
def test_simple_rpc(server_func, use_async_for):
|
||||
def test_simple_rpc(
|
||||
server_func: Callable,
|
||||
use_async_for: bool,
|
||||
loglevel: str,
|
||||
debug_mode: bool,
|
||||
):
|
||||
'''
|
||||
The simplest request response pattern.
|
||||
|
||||
'''
|
||||
async def main():
|
||||
async with tractor.open_nursery() as n:
|
||||
with trio.fail_after(6):
|
||||
async with tractor.open_nursery(
|
||||
loglevel=loglevel,
|
||||
debug_mode=debug_mode,
|
||||
) as an:
|
||||
portal: tractor.Portal = await an.start_actor(
|
||||
'rpc_server',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
|
||||
portal = await n.start_actor(
|
||||
'rpc_server',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
async with portal.open_context(
|
||||
server_func, # taken from pytest parameterization
|
||||
data=10,
|
||||
) as (ctx, sent):
|
||||
|
||||
async with portal.open_context(
|
||||
server_func, # taken from pytest parameterization
|
||||
data=10,
|
||||
) as (ctx, sent):
|
||||
assert sent == 11
|
||||
|
||||
assert sent == 11
|
||||
async with ctx.open_stream() as stream:
|
||||
|
||||
async with ctx.open_stream() as stream:
|
||||
if use_async_for:
|
||||
|
||||
if use_async_for:
|
||||
|
||||
count = 0
|
||||
# receive msgs using async for style
|
||||
print('ping')
|
||||
await stream.send('ping')
|
||||
|
||||
async for msg in stream:
|
||||
assert msg == 'pong'
|
||||
count = 0
|
||||
# receive msgs using async for style
|
||||
print('ping')
|
||||
await stream.send('ping')
|
||||
count += 1
|
||||
|
||||
if count >= 9:
|
||||
break
|
||||
async for msg in stream:
|
||||
assert msg == 'pong'
|
||||
print('ping')
|
||||
await stream.send('ping')
|
||||
count += 1
|
||||
|
||||
else:
|
||||
# classic send/receive style
|
||||
for _ in range(10):
|
||||
if count >= 9:
|
||||
break
|
||||
|
||||
print('ping')
|
||||
await stream.send('ping')
|
||||
assert await stream.receive() == 'pong'
|
||||
else:
|
||||
# classic send/receive style
|
||||
for _ in range(10):
|
||||
|
||||
# stream should terminate here
|
||||
print('ping')
|
||||
await stream.send('ping')
|
||||
assert await stream.receive() == 'pong'
|
||||
|
||||
# final context result(s) should be consumed here in __aexit__()
|
||||
# stream should terminate here
|
||||
|
||||
await portal.cancel_actor()
|
||||
# final context result(s) should be consumed here in __aexit__()
|
||||
|
||||
await portal.cancel_actor()
|
||||
|
||||
trio.run(main)
|
||||
|
|
|
|||
|
|
@ -98,7 +98,8 @@ def test_ipc_channel_break_during_stream(
|
|||
expect_final_exc = TransportClosed
|
||||
|
||||
mod: ModuleType = import_path(
|
||||
examples_dir() / 'advanced_faults'
|
||||
examples_dir()
|
||||
/ 'advanced_faults'
|
||||
/ 'ipc_failure_during_stream.py',
|
||||
root=examples_dir(),
|
||||
consider_namespace_packages=False,
|
||||
|
|
@ -113,8 +114,9 @@ def test_ipc_channel_break_during_stream(
|
|||
if (
|
||||
# only expect EoC if trans is broken on the child side,
|
||||
ipc_break['break_child_ipc_after'] is not False
|
||||
and
|
||||
# AND we tell the child to call `MsgStream.aclose()`.
|
||||
and pre_aclose_msgstream
|
||||
pre_aclose_msgstream
|
||||
):
|
||||
# expect_final_exc = trio.EndOfChannel
|
||||
# ^XXX NOPE! XXX^ since now `.open_stream()` absorbs this
|
||||
|
|
@ -144,9 +146,6 @@ def test_ipc_channel_break_during_stream(
|
|||
# a user sending ctl-c by raising a KBI.
|
||||
if pre_aclose_msgstream:
|
||||
expect_final_exc = KeyboardInterrupt
|
||||
if tpt_proto == 'uds':
|
||||
expect_final_exc = TransportClosed
|
||||
expect_final_cause = trio.BrokenResourceError
|
||||
|
||||
# XXX OLD XXX
|
||||
# if child calls `MsgStream.aclose()` then expect EoC.
|
||||
|
|
@ -160,16 +159,13 @@ def test_ipc_channel_break_during_stream(
|
|||
ipc_break['break_child_ipc_after'] is not False
|
||||
and (
|
||||
ipc_break['break_parent_ipc_after']
|
||||
> ipc_break['break_child_ipc_after']
|
||||
>
|
||||
ipc_break['break_child_ipc_after']
|
||||
)
|
||||
):
|
||||
if pre_aclose_msgstream:
|
||||
expect_final_exc = KeyboardInterrupt
|
||||
|
||||
if tpt_proto == 'uds':
|
||||
expect_final_exc = TransportClosed
|
||||
expect_final_cause = trio.BrokenResourceError
|
||||
|
||||
# NOTE when the parent IPC side dies (even if the child does as well
|
||||
# but the child fails BEFORE the parent) we always expect the
|
||||
# IPC layer to raise a closed-resource, NEVER do we expect
|
||||
|
|
@ -248,8 +244,15 @@ def test_ipc_channel_break_during_stream(
|
|||
# get raw instance from pytest wrapper
|
||||
value = excinfo.value
|
||||
if isinstance(value, ExceptionGroup):
|
||||
excs = value.exceptions
|
||||
assert len(excs) == 1
|
||||
excs: tuple[Exception] = value.exceptions
|
||||
assert (
|
||||
len(excs) <= 2
|
||||
and
|
||||
all(
|
||||
isinstance(exc, TransportClosed)
|
||||
for exc in excs
|
||||
)
|
||||
)
|
||||
final_exc = excs[0]
|
||||
assert isinstance(final_exc, expect_final_exc)
|
||||
|
||||
|
|
|
|||
|
|
@ -17,8 +17,18 @@ from tractor._testing import (
|
|||
from .conftest import no_windows
|
||||
|
||||
|
||||
def is_win():
|
||||
return platform.system() == 'Windows'
|
||||
_non_linux: bool = platform.system() != 'Linux'
|
||||
_friggin_windows: bool = platform.system() == 'Windows'
|
||||
|
||||
|
||||
pytestmark = pytest.mark.skipon_spawn_backend(
|
||||
'subint',
|
||||
reason=(
|
||||
'XXX SUBINT HANGING TEST XXX\n'
|
||||
'See oustanding issue(s)\n'
|
||||
# TODO, put issue link!
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def assert_err(delay=0):
|
||||
|
|
@ -110,8 +120,17 @@ def test_remote_error(reg_addr, args_err):
|
|||
assert exc.boxed_type == errtype
|
||||
|
||||
|
||||
# @pytest.mark.skipon_spawn_backend(
|
||||
# 'subint',
|
||||
# reason=(
|
||||
# 'XXX SUBINT HANGING TEST XXX\n'
|
||||
# 'See oustanding issue(s)\n'
|
||||
# # TODO, put issue link!
|
||||
# )
|
||||
# )
|
||||
def test_multierror(
|
||||
reg_addr: tuple[str, int],
|
||||
start_method: str,
|
||||
):
|
||||
'''
|
||||
Verify we raise a ``BaseExceptionGroup`` out of a nursery where
|
||||
|
|
@ -141,15 +160,28 @@ def test_multierror(
|
|||
trio.run(main)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('delay', (0, 0.5))
|
||||
@pytest.mark.parametrize(
|
||||
'num_subactors', range(25, 26),
|
||||
'delay',
|
||||
(0, 0.5),
|
||||
ids='delays={}'.format,
|
||||
)
|
||||
def test_multierror_fast_nursery(reg_addr, start_method, num_subactors, delay):
|
||||
"""Verify we raise a ``BaseExceptionGroup`` out of a nursery where
|
||||
@pytest.mark.parametrize(
|
||||
'num_subactors',
|
||||
range(25, 26),
|
||||
ids= 'num_subs={}'.format,
|
||||
)
|
||||
def test_multierror_fast_nursery(
|
||||
reg_addr: tuple,
|
||||
start_method: str,
|
||||
num_subactors: int,
|
||||
delay: float,
|
||||
):
|
||||
'''
|
||||
Verify we raise a ``BaseExceptionGroup`` out of a nursery where
|
||||
more then one actor errors and also with a delay before failure
|
||||
to test failure during an ongoing spawning.
|
||||
"""
|
||||
|
||||
'''
|
||||
async def main():
|
||||
async with tractor.open_nursery(
|
||||
registry_addrs=[reg_addr],
|
||||
|
|
@ -189,8 +221,15 @@ async def do_nothing():
|
|||
pass
|
||||
|
||||
|
||||
@pytest.mark.parametrize('mechanism', ['nursery_cancel', KeyboardInterrupt])
|
||||
def test_cancel_single_subactor(reg_addr, mechanism):
|
||||
@pytest.mark.parametrize(
|
||||
'mechanism', [
|
||||
'nursery_cancel',
|
||||
KeyboardInterrupt,
|
||||
])
|
||||
def test_cancel_single_subactor(
|
||||
reg_addr: tuple,
|
||||
mechanism: str|KeyboardInterrupt,
|
||||
):
|
||||
'''
|
||||
Ensure a ``ActorNursery.start_actor()`` spawned subactor
|
||||
cancels when the nursery is cancelled.
|
||||
|
|
@ -232,9 +271,12 @@ async def stream_forever():
|
|||
await trio.sleep(0.01)
|
||||
|
||||
|
||||
@tractor_test
|
||||
async def test_cancel_infinite_streamer(start_method):
|
||||
|
||||
@tractor_test(
|
||||
timeout=6,
|
||||
)
|
||||
async def test_cancel_infinite_streamer(
|
||||
start_method: str
|
||||
):
|
||||
# stream for at most 1 seconds
|
||||
with (
|
||||
trio.fail_after(4),
|
||||
|
|
@ -257,6 +299,14 @@ async def test_cancel_infinite_streamer(start_method):
|
|||
assert n.cancelled
|
||||
|
||||
|
||||
# @pytest.mark.skipon_spawn_backend(
|
||||
# 'subint',
|
||||
# reason=(
|
||||
# 'XXX SUBINT HANGING TEST XXX\n'
|
||||
# 'See oustanding issue(s)\n'
|
||||
# # TODO, put issue link!
|
||||
# )
|
||||
# )
|
||||
@pytest.mark.parametrize(
|
||||
'num_actors_and_errs',
|
||||
[
|
||||
|
|
@ -286,7 +336,9 @@ async def test_cancel_infinite_streamer(start_method):
|
|||
'no_daemon_actors_fail_all_run_in_actors_sleep_then_fail',
|
||||
],
|
||||
)
|
||||
@tractor_test
|
||||
@tractor_test(
|
||||
timeout=10,
|
||||
)
|
||||
async def test_some_cancels_all(
|
||||
num_actors_and_errs: tuple,
|
||||
start_method: str,
|
||||
|
|
@ -370,7 +422,10 @@ async def test_some_cancels_all(
|
|||
pytest.fail("Should have gotten a remote assertion error?")
|
||||
|
||||
|
||||
async def spawn_and_error(breadth, depth) -> None:
|
||||
async def spawn_and_error(
|
||||
breadth: int,
|
||||
depth: int,
|
||||
) -> None:
|
||||
name = tractor.current_actor().name
|
||||
async with tractor.open_nursery() as nursery:
|
||||
for i in range(breadth):
|
||||
|
|
@ -396,7 +451,10 @@ async def spawn_and_error(breadth, depth) -> None:
|
|||
|
||||
|
||||
@tractor_test
|
||||
async def test_nested_multierrors(loglevel, start_method):
|
||||
async def test_nested_multierrors(
|
||||
loglevel: str,
|
||||
start_method: str,
|
||||
):
|
||||
'''
|
||||
Test that failed actor sets are wrapped in `BaseExceptionGroup`s. This
|
||||
test goes only 2 nurseries deep but we should eventually have tests
|
||||
|
|
@ -431,7 +489,7 @@ async def test_nested_multierrors(loglevel, start_method):
|
|||
for subexc in err.exceptions:
|
||||
|
||||
# verify first level actor errors are wrapped as remote
|
||||
if is_win():
|
||||
if _friggin_windows:
|
||||
|
||||
# windows is often too slow and cancellation seems
|
||||
# to happen before an actor is spawned
|
||||
|
|
@ -464,7 +522,7 @@ async def test_nested_multierrors(loglevel, start_method):
|
|||
# XXX not sure what's up with this..
|
||||
# on windows sometimes spawning is just too slow and
|
||||
# we get back the (sent) cancel signal instead
|
||||
if is_win():
|
||||
if _friggin_windows:
|
||||
if isinstance(subexc, tractor.RemoteActorError):
|
||||
assert subexc.boxed_type in (
|
||||
BaseExceptionGroup,
|
||||
|
|
@ -483,20 +541,21 @@ async def test_nested_multierrors(loglevel, start_method):
|
|||
|
||||
@no_windows
|
||||
def test_cancel_via_SIGINT(
|
||||
loglevel,
|
||||
start_method,
|
||||
spawn_backend,
|
||||
loglevel: str,
|
||||
start_method: str,
|
||||
):
|
||||
"""Ensure that a control-C (SIGINT) signal cancels both the parent and
|
||||
'''
|
||||
Ensure that a control-C (SIGINT) signal cancels both the parent and
|
||||
child processes in trionic fashion
|
||||
"""
|
||||
pid = os.getpid()
|
||||
|
||||
'''
|
||||
pid: int = os.getpid()
|
||||
|
||||
async def main():
|
||||
with trio.fail_after(2):
|
||||
async with tractor.open_nursery() as tn:
|
||||
await tn.start_actor('sucka')
|
||||
if 'mp' in spawn_backend:
|
||||
if 'mp' in start_method:
|
||||
time.sleep(0.1)
|
||||
os.kill(pid, signal.SIGINT)
|
||||
await trio.sleep_forever()
|
||||
|
|
@ -507,19 +566,31 @@ def test_cancel_via_SIGINT(
|
|||
|
||||
@no_windows
|
||||
def test_cancel_via_SIGINT_other_task(
|
||||
loglevel,
|
||||
start_method,
|
||||
spawn_backend,
|
||||
loglevel: str,
|
||||
start_method: str,
|
||||
spawn_backend: str,
|
||||
):
|
||||
"""Ensure that a control-C (SIGINT) signal cancels both the parent
|
||||
and child processes in trionic fashion even a subprocess is started
|
||||
from a seperate ``trio`` child task.
|
||||
"""
|
||||
pid = os.getpid()
|
||||
timeout: float = 2
|
||||
if is_win(): # smh
|
||||
'''
|
||||
Ensure that a control-C (SIGINT) signal cancels both the parent
|
||||
and child processes in trionic fashion even a subprocess is
|
||||
started from a seperate ``trio`` child task.
|
||||
|
||||
'''
|
||||
from .conftest import cpu_scaling_factor
|
||||
|
||||
pid: int = os.getpid()
|
||||
timeout: float = (
|
||||
4 if _non_linux
|
||||
else 2
|
||||
)
|
||||
if _friggin_windows: # smh
|
||||
timeout += 1
|
||||
|
||||
# add latency headroom for CPU freq scaling (auto-cpufreq et al.)
|
||||
headroom: float = cpu_scaling_factor()
|
||||
if headroom != 1.:
|
||||
timeout *= headroom
|
||||
|
||||
async def spawn_and_sleep_forever(
|
||||
task_status=trio.TASK_STATUS_IGNORED
|
||||
):
|
||||
|
|
@ -568,6 +639,14 @@ async def spawn_sub_with_sync_blocking_task():
|
|||
print('exiting first subactor layer..\n')
|
||||
|
||||
|
||||
# @pytest.mark.skipon_spawn_backend(
|
||||
# 'subint',
|
||||
# reason=(
|
||||
# 'XXX SUBINT HANGING TEST XXX\n'
|
||||
# 'See oustanding issue(s)\n'
|
||||
# # TODO, put issue link!
|
||||
# )
|
||||
# )
|
||||
@pytest.mark.parametrize(
|
||||
'man_cancel_outer',
|
||||
[
|
||||
|
|
@ -644,7 +723,11 @@ def test_cancel_while_childs_child_in_sync_sleep(
|
|||
#
|
||||
# delay = 1 # no AssertionError in eg, TooSlowError raised.
|
||||
# delay = 2 # is AssertionError in eg AND no TooSlowError !?
|
||||
delay = 4 # is AssertionError in eg AND no _cs cancellation.
|
||||
# is AssertionError in eg AND no _cs cancellation.
|
||||
delay = (
|
||||
6 if _non_linux
|
||||
else 4
|
||||
)
|
||||
|
||||
with trio.fail_after(delay) as _cs:
|
||||
# with trio.CancelScope() as cs:
|
||||
|
|
@ -678,7 +761,7 @@ def test_cancel_while_childs_child_in_sync_sleep(
|
|||
|
||||
|
||||
def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
|
||||
start_method,
|
||||
start_method: str,
|
||||
):
|
||||
'''
|
||||
This is a very subtle test which demonstrates how cancellation
|
||||
|
|
@ -696,7 +779,7 @@ def test_fast_graceful_cancel_when_spawn_task_in_soft_proc_wait_for_daemon(
|
|||
kbi_delay = 0.5
|
||||
timeout: float = 2.9
|
||||
|
||||
if is_win(): # smh
|
||||
if _friggin_windows: # smh
|
||||
timeout += 1
|
||||
|
||||
async def main():
|
||||
|
|
|
|||
|
|
@ -18,16 +18,15 @@ from tractor import RemoteActorError
|
|||
|
||||
|
||||
async def aio_streamer(
|
||||
from_trio: asyncio.Queue,
|
||||
to_trio: trio.abc.SendChannel,
|
||||
chan: tractor.to_asyncio.LinkedTaskChannel,
|
||||
) -> trio.abc.ReceiveChannel:
|
||||
|
||||
# required first msg to sync caller
|
||||
to_trio.send_nowait(None)
|
||||
chan.started_nowait(None)
|
||||
|
||||
from itertools import cycle
|
||||
for i in cycle(range(10)):
|
||||
to_trio.send_nowait(i)
|
||||
chan.send_nowait(i)
|
||||
await asyncio.sleep(0.01)
|
||||
|
||||
|
||||
|
|
@ -69,7 +68,7 @@ async def wrapper_mngr(
|
|||
else:
|
||||
async with tractor.to_asyncio.open_channel_from(
|
||||
aio_streamer,
|
||||
) as (first, from_aio):
|
||||
) as (from_aio, first):
|
||||
assert not first
|
||||
|
||||
# cache it so next task uses broadcast receiver
|
||||
|
|
|
|||
|
|
@ -10,7 +10,19 @@ from tractor._testing import tractor_test
|
|||
MESSAGE = 'tractoring at full speed'
|
||||
|
||||
|
||||
def test_empty_mngrs_input_raises() -> None:
|
||||
def test_empty_mngrs_input_raises(
|
||||
tpt_proto: str,
|
||||
) -> None:
|
||||
# TODO, the `open_actor_cluster()` teardown hangs
|
||||
# intermittently on UDS when `gather_contexts(mngrs=())`
|
||||
# raises `ValueError` mid-setup; likely a race in the
|
||||
# actor-nursery cleanup vs UDS socket shutdown. Needs
|
||||
# a deeper look at `._clustering`/`._supervise` teardown
|
||||
# paths with the UDS transport.
|
||||
if tpt_proto == 'uds':
|
||||
pytest.skip(
|
||||
'actor-cluster teardown hangs intermittently on UDS'
|
||||
)
|
||||
|
||||
async def main():
|
||||
with trio.fail_after(3):
|
||||
|
|
@ -56,25 +68,39 @@ async def worker(
|
|||
print(msg)
|
||||
assert msg == MESSAGE
|
||||
|
||||
# TODO: does this ever cause a hang
|
||||
# ?TODO, does this ever cause a hang?
|
||||
# assert 0
|
||||
|
||||
|
||||
# ?TODO, but needs a fn-scoped tpt_proto fixture..
|
||||
# @pytest.mark.no_tpt('uds')
|
||||
@tractor_test
|
||||
async def test_streaming_to_actor_cluster() -> None:
|
||||
async def test_streaming_to_actor_cluster(
|
||||
tpt_proto: str,
|
||||
):
|
||||
'''
|
||||
Open an actor "cluster" using the (experimental) `._clustering`
|
||||
API and conduct standard inter-task-ctx streaming.
|
||||
|
||||
async with (
|
||||
open_actor_cluster(modules=[__name__]) as portals,
|
||||
'''
|
||||
if tpt_proto == 'uds':
|
||||
pytest.skip(
|
||||
f'Test currently fails with tpt-proto={tpt_proto!r}\n'
|
||||
)
|
||||
|
||||
gather_contexts(
|
||||
mngrs=[p.open_context(worker) for p in portals.values()],
|
||||
) as contexts,
|
||||
with trio.fail_after(6):
|
||||
async with (
|
||||
open_actor_cluster(modules=[__name__]) as portals,
|
||||
|
||||
gather_contexts(
|
||||
mngrs=[ctx[0].open_stream() for ctx in contexts],
|
||||
) as streams,
|
||||
gather_contexts(
|
||||
mngrs=[p.open_context(worker) for p in portals.values()],
|
||||
) as contexts,
|
||||
|
||||
):
|
||||
with trio.move_on_after(1):
|
||||
for stream in itertools.cycle(streams):
|
||||
await stream.send(MESSAGE)
|
||||
gather_contexts(
|
||||
mngrs=[ctx[0].open_stream() for ctx in contexts],
|
||||
) as streams,
|
||||
|
||||
):
|
||||
with trio.move_on_after(1):
|
||||
for stream in itertools.cycle(streams):
|
||||
await stream.send(MESSAGE)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from itertools import count
|
|||
import math
|
||||
import platform
|
||||
from pprint import pformat
|
||||
import sys
|
||||
from typing import (
|
||||
Callable,
|
||||
)
|
||||
|
|
@ -25,7 +26,7 @@ from tractor._exceptions import (
|
|||
StreamOverrun,
|
||||
ContextCancelled,
|
||||
)
|
||||
from tractor._state import current_ipc_ctx
|
||||
from tractor.runtime._state import current_ipc_ctx
|
||||
|
||||
from tractor._testing import (
|
||||
tractor_test,
|
||||
|
|
@ -938,9 +939,14 @@ def test_one_end_stream_not_opened(
|
|||
|
||||
'''
|
||||
overrunner, buf_size_increase, entrypoint = overrun_by
|
||||
from tractor._runtime import Actor
|
||||
from tractor.runtime._runtime import Actor
|
||||
buf_size = buf_size_increase + Actor.msg_buffer_size
|
||||
|
||||
timeout: float = (
|
||||
1 if sys.platform == 'linux'
|
||||
else 3
|
||||
)
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(
|
||||
debug_mode=debug_mode,
|
||||
|
|
@ -950,7 +956,7 @@ def test_one_end_stream_not_opened(
|
|||
enable_modules=[__name__],
|
||||
)
|
||||
|
||||
with trio.fail_after(1):
|
||||
with trio.fail_after(timeout):
|
||||
async with portal.open_context(
|
||||
entrypoint,
|
||||
) as (ctx, sent):
|
||||
|
|
|
|||
|
|
@ -9,12 +9,17 @@ import sys
|
|||
import subprocess
|
||||
import platform
|
||||
import shutil
|
||||
from typing import Callable
|
||||
|
||||
import pytest
|
||||
import tractor
|
||||
from tractor._testing import (
|
||||
examples_dir,
|
||||
)
|
||||
|
||||
_non_linux: bool = platform.system() != 'Linux'
|
||||
_friggin_macos: bool = platform.system() == 'Darwin'
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def run_example_in_subproc(
|
||||
|
|
@ -89,8 +94,10 @@ def run_example_in_subproc(
|
|||
for f in p[2]
|
||||
|
||||
if (
|
||||
'__' not in f
|
||||
and f[0] != '_'
|
||||
'__' not in f # ignore any pkg-mods
|
||||
# ignore any `__pycache__` subdir
|
||||
and '__pycache__' not in str(p[0])
|
||||
and f[0] != '_' # ignore any WIP "examplel mods"
|
||||
and 'debugging' not in p[0]
|
||||
and 'integration' not in p[0]
|
||||
and 'advanced_faults' not in p[0]
|
||||
|
|
@ -101,8 +108,10 @@ def run_example_in_subproc(
|
|||
ids=lambda t: t[1],
|
||||
)
|
||||
def test_example(
|
||||
run_example_in_subproc,
|
||||
example_script,
|
||||
run_example_in_subproc: Callable,
|
||||
example_script: str,
|
||||
test_log: tractor.log.StackLevelAdapter,
|
||||
ci_env: bool,
|
||||
):
|
||||
'''
|
||||
Load and run scripts from this repo's ``examples/`` dir as a user
|
||||
|
|
@ -116,9 +125,39 @@ def test_example(
|
|||
'''
|
||||
ex_file: str = os.path.join(*example_script)
|
||||
|
||||
if 'rpc_bidir_streaming' in ex_file and sys.version_info < (3, 9):
|
||||
if (
|
||||
'rpc_bidir_streaming' in ex_file
|
||||
and
|
||||
sys.version_info < (3, 9)
|
||||
):
|
||||
pytest.skip("2-way streaming example requires py3.9 async with syntax")
|
||||
|
||||
if (
|
||||
'full_fledged_streaming_service' in ex_file
|
||||
and
|
||||
_friggin_macos
|
||||
and
|
||||
ci_env
|
||||
):
|
||||
pytest.skip(
|
||||
'Streaming example is too flaky in CI\n'
|
||||
'AND their competitor runs this CI service..\n'
|
||||
'This test does run just fine "in person" however..'
|
||||
)
|
||||
|
||||
from .conftest import cpu_scaling_factor
|
||||
|
||||
timeout: float = (
|
||||
60
|
||||
if ci_env and _non_linux
|
||||
else 16
|
||||
)
|
||||
|
||||
# add latency headroom for CPU freq scaling (auto-cpufreq et al.)
|
||||
headroom: float = cpu_scaling_factor()
|
||||
if headroom != 1.:
|
||||
timeout *= headroom
|
||||
|
||||
with open(ex_file, 'r') as ex:
|
||||
code = ex.read()
|
||||
|
||||
|
|
@ -126,9 +165,12 @@ def test_example(
|
|||
err = None
|
||||
try:
|
||||
if not proc.poll():
|
||||
_, err = proc.communicate(timeout=15)
|
||||
_, err = proc.communicate(timeout=timeout)
|
||||
|
||||
except subprocess.TimeoutExpired as e:
|
||||
test_log.exception(
|
||||
f'Example failed to finish within {timeout}s ??\n'
|
||||
)
|
||||
proc.kill()
|
||||
err = e.stderr
|
||||
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@ from tractor import (
|
|||
to_asyncio,
|
||||
RemoteActorError,
|
||||
ContextCancelled,
|
||||
_state,
|
||||
)
|
||||
from tractor.runtime import _state
|
||||
from tractor.trionics import BroadcastReceiver
|
||||
from tractor._testing import expect_ctxc
|
||||
|
||||
|
|
@ -47,12 +47,11 @@ async def sleep_and_err(
|
|||
|
||||
# just signature placeholders for compat with
|
||||
# ``to_asyncio.open_channel_from()``
|
||||
to_trio: trio.MemorySendChannel|None = None,
|
||||
from_trio: asyncio.Queue|None = None,
|
||||
chan: to_asyncio.LinkedTaskChannel|None = None,
|
||||
|
||||
):
|
||||
if to_trio:
|
||||
to_trio.send_nowait('start')
|
||||
if chan:
|
||||
chan.started_nowait('start')
|
||||
|
||||
await asyncio.sleep(sleep_for)
|
||||
assert 0
|
||||
|
|
@ -238,7 +237,7 @@ async def trio_ctx(
|
|||
trio.open_nursery() as tn,
|
||||
tractor.to_asyncio.open_channel_from(
|
||||
sleep_and_err,
|
||||
) as (first, chan),
|
||||
) as (chan, first),
|
||||
):
|
||||
|
||||
assert first == 'start'
|
||||
|
|
@ -399,7 +398,7 @@ async def no_to_trio_in_args():
|
|||
|
||||
async def push_from_aio_task(
|
||||
sequence: Iterable,
|
||||
to_trio: trio.abc.SendChannel,
|
||||
chan: to_asyncio.LinkedTaskChannel,
|
||||
expect_cancel: False,
|
||||
fail_early: bool,
|
||||
exit_early: bool,
|
||||
|
|
@ -407,15 +406,12 @@ async def push_from_aio_task(
|
|||
) -> None:
|
||||
|
||||
try:
|
||||
# print('trying breakpoint')
|
||||
# breakpoint()
|
||||
|
||||
# sync caller ctx manager
|
||||
to_trio.send_nowait(True)
|
||||
chan.started_nowait(True)
|
||||
|
||||
for i in sequence:
|
||||
print(f'asyncio sending {i}')
|
||||
to_trio.send_nowait(i)
|
||||
chan.send_nowait(i)
|
||||
await asyncio.sleep(0.001)
|
||||
|
||||
if (
|
||||
|
|
@ -478,7 +474,7 @@ async def stream_from_aio(
|
|||
trio_exit_early
|
||||
))
|
||||
|
||||
) as (first, chan):
|
||||
) as (chan, first):
|
||||
|
||||
assert first is True
|
||||
|
||||
|
|
@ -732,15 +728,21 @@ def test_aio_errors_and_channel_propagates_and_closes(
|
|||
|
||||
|
||||
async def aio_echo_server(
|
||||
to_trio: trio.MemorySendChannel,
|
||||
from_trio: asyncio.Queue,
|
||||
chan: to_asyncio.LinkedTaskChannel,
|
||||
) -> None:
|
||||
'''
|
||||
An IPC-msg "echo server" with msgs received and relayed by
|
||||
a parent `trio.Task` into a child `asyncio.Task`
|
||||
and then repeated back to that local parent (`trio.Task`)
|
||||
and sent again back to the original calling remote actor.
|
||||
|
||||
to_trio.send_nowait('start')
|
||||
'''
|
||||
# same semantics as `trio.TaskStatus.started()`
|
||||
chan.started_nowait('start')
|
||||
|
||||
while True:
|
||||
try:
|
||||
msg = await from_trio.get()
|
||||
msg = await chan.get()
|
||||
except to_asyncio.TrioTaskExited:
|
||||
print(
|
||||
'breaking aio echo loop due to `trio` exit!'
|
||||
|
|
@ -748,7 +750,7 @@ async def aio_echo_server(
|
|||
break
|
||||
|
||||
# echo the msg back
|
||||
to_trio.send_nowait(msg)
|
||||
chan.send_nowait(msg)
|
||||
|
||||
# if we get the terminate sentinel
|
||||
# break the echo loop
|
||||
|
|
@ -765,7 +767,10 @@ async def trio_to_aio_echo_server(
|
|||
):
|
||||
async with to_asyncio.open_channel_from(
|
||||
aio_echo_server,
|
||||
) as (first, chan):
|
||||
) as (
|
||||
chan,
|
||||
first, # value from `chan.started_nowait()` above
|
||||
):
|
||||
assert first == 'start'
|
||||
|
||||
await ctx.started(first)
|
||||
|
|
@ -776,7 +781,8 @@ async def trio_to_aio_echo_server(
|
|||
await chan.send(msg)
|
||||
|
||||
out = await chan.receive()
|
||||
# echo back to parent actor-task
|
||||
|
||||
# echo back to parent-actor's remote parent-ctx-task!
|
||||
await stream.send(out)
|
||||
|
||||
if out is None:
|
||||
|
|
@ -1090,24 +1096,21 @@ def test_sigint_closes_lifetime_stack(
|
|||
|
||||
|
||||
# ?TODO asyncio.Task fn-deco?
|
||||
# -[ ] do sig checkingat import time like @context?
|
||||
# -[ ] maybe name it @aio_task ??
|
||||
# -[ ] chan: to_asyncio.InterloopChannel ??
|
||||
# -[ ] do fn-sig checking at import time like @context?
|
||||
# |_[ ] maybe name it @a(sync)io_task ??
|
||||
# @asyncio_task <- not bad ??
|
||||
async def raise_before_started(
|
||||
# from_trio: asyncio.Queue,
|
||||
# to_trio: trio.abc.SendChannel,
|
||||
chan: to_asyncio.LinkedTaskChannel,
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
`asyncio.Task` entry point which RTEs before calling
|
||||
`to_trio.send_nowait()`.
|
||||
`chan.started_nowait()`.
|
||||
|
||||
'''
|
||||
await asyncio.sleep(0.2)
|
||||
raise RuntimeError('Some shite went wrong before `.send_nowait()`!!')
|
||||
|
||||
# to_trio.send_nowait('Uhh we shouldve RTE-d ^^ ??')
|
||||
chan.started_nowait('Uhh we shouldve RTE-d ^^ ??')
|
||||
await asyncio.sleep(float('inf'))
|
||||
|
||||
|
|
|
|||
|
|
@ -11,18 +11,30 @@ import trio
|
|||
import tractor
|
||||
from tractor import ( # typing
|
||||
Actor,
|
||||
current_actor,
|
||||
open_nursery,
|
||||
Portal,
|
||||
Context,
|
||||
ContextCancelled,
|
||||
MsgStream,
|
||||
Portal,
|
||||
RemoteActorError,
|
||||
current_actor,
|
||||
open_nursery,
|
||||
)
|
||||
from tractor._testing import (
|
||||
# tractor_test,
|
||||
expect_ctxc,
|
||||
)
|
||||
|
||||
from .conftest import cpu_scaling_factor
|
||||
|
||||
pytestmark = pytest.mark.skipon_spawn_backend(
|
||||
'subint',
|
||||
reason=(
|
||||
'XXX SUBINT GIL-CONTENTION HANGING TEST XXX\n'
|
||||
'See oustanding issue(s)\n'
|
||||
# TODO, put issue link!
|
||||
)
|
||||
)
|
||||
|
||||
# XXX TODO cases:
|
||||
# - [x] WE cancelled the peer and thus should not see any raised
|
||||
# `ContextCancelled` as it should be reaped silently?
|
||||
|
|
@ -200,7 +212,7 @@ async def stream_from_peer(
|
|||
) -> None:
|
||||
|
||||
# sanity
|
||||
assert tractor._state.debug_mode() == debug_mode
|
||||
assert tractor.debug_mode() == debug_mode
|
||||
|
||||
peer: Portal
|
||||
try:
|
||||
|
|
@ -580,7 +592,7 @@ def test_peer_canceller(
|
|||
assert (
|
||||
re.canceller
|
||||
==
|
||||
root.uid
|
||||
root.aid.uid
|
||||
)
|
||||
|
||||
else: # the other 2 ctxs
|
||||
|
|
@ -589,7 +601,7 @@ def test_peer_canceller(
|
|||
and (
|
||||
re.canceller
|
||||
==
|
||||
canceller.channel.uid
|
||||
canceller.channel.aid.uid
|
||||
)
|
||||
)
|
||||
|
||||
|
|
@ -744,7 +756,7 @@ def test_peer_canceller(
|
|||
# -> each context should have received
|
||||
# a silently absorbed context cancellation
|
||||
# in its remote nursery scope.
|
||||
# assert ctx.chan.uid == ctx.canceller
|
||||
# assert ctx.chan.aid.uid == ctx.canceller
|
||||
|
||||
# NOTE: when an inter-peer cancellation
|
||||
# occurred, we DO NOT expect this
|
||||
|
|
@ -796,12 +808,12 @@ async def basic_echo_server(
|
|||
|
||||
) -> None:
|
||||
'''
|
||||
Just the simplest `MsgStream` echo server which resays what
|
||||
you told it but with its uid in front ;)
|
||||
Just the simplest `MsgStream` echo server which resays what you
|
||||
told it but with its uid in front ;)
|
||||
|
||||
'''
|
||||
actor: Actor = tractor.current_actor()
|
||||
uid: tuple = actor.uid
|
||||
uid: tuple = actor.aid.uid
|
||||
await ctx.started(uid)
|
||||
async with ctx.open_stream() as ipc:
|
||||
async for msg in ipc:
|
||||
|
|
@ -840,7 +852,7 @@ async def serve_subactors(
|
|||
async with open_nursery() as an:
|
||||
|
||||
# sanity
|
||||
assert tractor._state.debug_mode() == debug_mode
|
||||
assert tractor.debug_mode() == debug_mode
|
||||
|
||||
await ctx.started(peer_name)
|
||||
async with ctx.open_stream() as ipc:
|
||||
|
|
@ -856,7 +868,7 @@ async def serve_subactors(
|
|||
f'|_{peer}\n'
|
||||
)
|
||||
await ipc.send((
|
||||
peer.chan.uid,
|
||||
peer.chan.aid.uid,
|
||||
peer.chan.raddr.unwrap(),
|
||||
))
|
||||
|
||||
|
|
@ -879,7 +891,7 @@ async def client_req_subactor(
|
|||
) -> None:
|
||||
# sanity
|
||||
if debug_mode:
|
||||
assert tractor._state.debug_mode()
|
||||
assert tractor.debug_mode()
|
||||
|
||||
# TODO: other cases to do with sub lifetimes:
|
||||
# -[ ] test that we can have the server spawn a sub
|
||||
|
|
@ -966,9 +978,14 @@ async def tell_little_bro(
|
|||
|
||||
caller: str = '',
|
||||
err_after: float|None = None,
|
||||
rng_seed: int = 50,
|
||||
rng_seed: int = 100,
|
||||
# NOTE, ensure ^ is large enough (on fast hw anyway)
|
||||
# to ensure the peer cancel req arrives before the
|
||||
# echoing dialog does itself Bp
|
||||
):
|
||||
# contact target actor, do a stream dialog.
|
||||
lb: Portal
|
||||
echo_ipc: MsgStream
|
||||
async with (
|
||||
tractor.wait_for_actor(
|
||||
name=actor_name
|
||||
|
|
@ -983,17 +1000,17 @@ async def tell_little_bro(
|
|||
else None
|
||||
),
|
||||
) as (sub_ctx, first),
|
||||
|
||||
sub_ctx.open_stream() as echo_ipc,
|
||||
):
|
||||
actor: Actor = current_actor()
|
||||
uid: tuple = actor.uid
|
||||
uid: tuple = actor.aid.uid
|
||||
for i in range(rng_seed):
|
||||
msg: tuple = (
|
||||
uid,
|
||||
i,
|
||||
)
|
||||
await echo_ipc.send(msg)
|
||||
await trio.sleep(0.001)
|
||||
resp = await echo_ipc.receive()
|
||||
print(
|
||||
f'{caller} => {actor_name}: {msg}\n'
|
||||
|
|
@ -1006,6 +1023,9 @@ async def tell_little_bro(
|
|||
assert sub_uid != uid
|
||||
assert _i == i
|
||||
|
||||
# XXX, usually should never get here!
|
||||
# await tractor.pause()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'raise_client_error',
|
||||
|
|
@ -1020,6 +1040,10 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
raise_client_error: str,
|
||||
reg_addr: tuple[str, int],
|
||||
raise_sub_spawn_error_after: float|None,
|
||||
loglevel: str,
|
||||
test_log: tractor.log.StackLevelAdapter,
|
||||
# ^XXX, set to 'warning' to see masked-exc warnings
|
||||
# that may transpire during actor-nursery teardown.
|
||||
):
|
||||
# NOTE: this tests for the modden `mod wks open piker` bug
|
||||
# discovered as part of implementing workspace ctx
|
||||
|
|
@ -1049,6 +1073,7 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
# NOTE: to halt the peer tasks on ctxc, uncomment this.
|
||||
debug_mode=debug_mode,
|
||||
registry_addrs=[reg_addr],
|
||||
loglevel=loglevel,
|
||||
) as an:
|
||||
server: Portal = await an.start_actor(
|
||||
(server_name := 'spawn_server'),
|
||||
|
|
@ -1084,7 +1109,7 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
) as (client_ctx, client_says),
|
||||
):
|
||||
root: Actor = current_actor()
|
||||
spawner_uid: tuple = spawn_ctx.chan.uid
|
||||
spawner_uid: tuple = spawn_ctx.chan.aid.uid
|
||||
print(
|
||||
f'Server says: {first}\n'
|
||||
f'Client says: {client_says}\n'
|
||||
|
|
@ -1103,7 +1128,7 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
print(
|
||||
'Sub-spawn came online\n'
|
||||
f'portal: {sub}\n'
|
||||
f'.uid: {sub.actor.uid}\n'
|
||||
f'.uid: {sub.actor.aid.uid}\n'
|
||||
f'chan.raddr: {sub.chan.raddr}\n'
|
||||
)
|
||||
|
||||
|
|
@ -1137,7 +1162,7 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
|
||||
assert isinstance(res, ContextCancelled)
|
||||
assert client_ctx.cancel_acked
|
||||
assert res.canceller == root.uid
|
||||
assert res.canceller == root.aid.uid
|
||||
assert not raise_sub_spawn_error_after
|
||||
|
||||
# cancelling the spawner sub should
|
||||
|
|
@ -1171,8 +1196,8 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
# little_bro: a `RuntimeError`.
|
||||
#
|
||||
check_inner_rte(rae)
|
||||
assert rae.relay_uid == client.chan.uid
|
||||
assert rae.src_uid == sub.chan.uid
|
||||
assert rae.relay_uid == client.chan.aid.uid
|
||||
assert rae.src_uid == sub.chan.aid.uid
|
||||
|
||||
assert not client_ctx.cancel_acked
|
||||
assert (
|
||||
|
|
@ -1201,12 +1226,12 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
except ContextCancelled as ctxc:
|
||||
_ctxc = ctxc
|
||||
print(
|
||||
f'{root.uid} caught ctxc from ctx with {client_ctx.chan.uid}\n'
|
||||
f'{root.aid.uid} caught ctxc from ctx with {client_ctx.chan.aid.uid}\n'
|
||||
f'{repr(ctxc)}\n'
|
||||
)
|
||||
|
||||
if not raise_sub_spawn_error_after:
|
||||
assert ctxc.canceller == root.uid
|
||||
assert ctxc.canceller == root.aid.uid
|
||||
else:
|
||||
assert ctxc.canceller == spawner_uid
|
||||
|
||||
|
|
@ -1237,9 +1262,20 @@ def test_peer_spawns_and_cancels_service_subactor(
|
|||
|
||||
# assert spawn_ctx.cancelled_caught
|
||||
|
||||
|
||||
async def _main():
|
||||
headroom: float = cpu_scaling_factor()
|
||||
this_fast_on_linux: float = 3
|
||||
this_fast = this_fast_on_linux * headroom
|
||||
if headroom != 1.:
|
||||
test_log.warning(
|
||||
f'Adding latency headroom on linux bc CPU scaling,\n'
|
||||
f'headroom: {headroom}\n'
|
||||
f'this_fast_on_linux: {this_fast_on_linux} -> {this_fast}\n'
|
||||
)
|
||||
with trio.fail_after(
|
||||
3 if not debug_mode
|
||||
this_fast
|
||||
if not debug_mode
|
||||
else 999
|
||||
):
|
||||
await main()
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
"""
|
||||
Streaming via async gen api
|
||||
Streaming via the, now legacy, "async-gen API".
|
||||
|
||||
"""
|
||||
import time
|
||||
from functools import partial
|
||||
import platform
|
||||
from typing import Callable
|
||||
|
||||
import trio
|
||||
import tractor
|
||||
|
|
@ -19,7 +21,11 @@ def test_must_define_ctx():
|
|||
async def no_ctx():
|
||||
pass
|
||||
|
||||
assert "no_ctx must be `ctx: tractor.Context" in str(err.value)
|
||||
assert (
|
||||
"no_ctx must be `ctx: tractor.Context"
|
||||
in
|
||||
str(err.value)
|
||||
)
|
||||
|
||||
@tractor.stream
|
||||
async def has_ctx(ctx):
|
||||
|
|
@ -69,14 +75,14 @@ async def stream_from_single_subactor(
|
|||
async with tractor.open_nursery(
|
||||
registry_addrs=[reg_addr],
|
||||
start_method=start_method,
|
||||
) as nursery:
|
||||
) as an:
|
||||
|
||||
async with tractor.find_actor('streamerd') as portals:
|
||||
|
||||
if not portals:
|
||||
|
||||
# no brokerd actor found
|
||||
portal = await nursery.start_actor(
|
||||
portal = await an.start_actor(
|
||||
'streamerd',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
|
|
@ -116,11 +122,22 @@ async def stream_from_single_subactor(
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'stream_func', [async_gen_stream, context_stream]
|
||||
'stream_func',
|
||||
[
|
||||
async_gen_stream,
|
||||
context_stream,
|
||||
],
|
||||
ids='stream_func={}'.format
|
||||
)
|
||||
def test_stream_from_single_subactor(reg_addr, start_method, stream_func):
|
||||
"""Verify streaming from a spawned async generator.
|
||||
"""
|
||||
def test_stream_from_single_subactor(
|
||||
reg_addr: tuple,
|
||||
start_method: str,
|
||||
stream_func: Callable,
|
||||
):
|
||||
'''
|
||||
Verify streaming from a spawned async generator.
|
||||
|
||||
'''
|
||||
trio.run(
|
||||
partial(
|
||||
stream_from_single_subactor,
|
||||
|
|
@ -132,10 +149,9 @@ def test_stream_from_single_subactor(reg_addr, start_method, stream_func):
|
|||
|
||||
|
||||
# this is the first 2 actors, streamer_1 and streamer_2
|
||||
async def stream_data(seed):
|
||||
async def stream_data(seed: int):
|
||||
|
||||
for i in range(seed):
|
||||
|
||||
yield i
|
||||
|
||||
# trigger scheduler to simulate practical usage
|
||||
|
|
@ -143,15 +159,17 @@ async def stream_data(seed):
|
|||
|
||||
|
||||
# this is the third actor; the aggregator
|
||||
async def aggregate(seed):
|
||||
"""Ensure that the two streams we receive match but only stream
|
||||
async def aggregate(seed: int):
|
||||
'''
|
||||
Ensure that the two streams we receive match but only stream
|
||||
a single set of values to the parent.
|
||||
"""
|
||||
async with tractor.open_nursery() as nursery:
|
||||
|
||||
'''
|
||||
async with tractor.open_nursery() as an:
|
||||
portals = []
|
||||
for i in range(1, 3):
|
||||
# fork point
|
||||
portal = await nursery.start_actor(
|
||||
portal = await an.start_actor(
|
||||
name=f'streamer_{i}',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
|
|
@ -164,20 +182,28 @@ async def aggregate(seed):
|
|||
async with send_chan:
|
||||
|
||||
async with portal.open_stream_from(
|
||||
stream_data, seed=seed,
|
||||
stream_data,
|
||||
seed=seed,
|
||||
) as stream:
|
||||
|
||||
async for value in stream:
|
||||
# leverage trio's built-in backpressure
|
||||
await send_chan.send(value)
|
||||
|
||||
print(f"FINISHED ITERATING {portal.channel.uid}")
|
||||
print(
|
||||
f'FINISHED ITERATING!\n'
|
||||
f'peer: {portal.channel.aid.uid}'
|
||||
)
|
||||
|
||||
# spawn 2 trio tasks to collect streams and push to a local queue
|
||||
async with trio.open_nursery() as n:
|
||||
async with trio.open_nursery() as tn:
|
||||
|
||||
for portal in portals:
|
||||
n.start_soon(push_to_chan, portal, send_chan.clone())
|
||||
tn.start_soon(
|
||||
push_to_chan,
|
||||
portal,
|
||||
send_chan.clone(),
|
||||
)
|
||||
|
||||
# close this local task's reference to send side
|
||||
await send_chan.aclose()
|
||||
|
|
@ -194,20 +220,21 @@ async def aggregate(seed):
|
|||
|
||||
print("FINISHED ITERATING in aggregator")
|
||||
|
||||
await nursery.cancel()
|
||||
await an.cancel()
|
||||
print("WAITING on `ActorNursery` to finish")
|
||||
print("AGGREGATOR COMPLETE!")
|
||||
|
||||
|
||||
# this is the main actor and *arbiter*
|
||||
async def a_quadruple_example():
|
||||
# a nursery which spawns "actors"
|
||||
async with tractor.open_nursery() as nursery:
|
||||
async def a_quadruple_example() -> list[int]:
|
||||
'''
|
||||
Open the root-actor which is also a "registrar".
|
||||
|
||||
'''
|
||||
async with tractor.open_nursery() as an:
|
||||
seed = int(1e3)
|
||||
pre_start = time.time()
|
||||
|
||||
portal = await nursery.start_actor(
|
||||
portal = await an.start_actor(
|
||||
name='aggregator',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
|
|
@ -228,8 +255,14 @@ async def a_quadruple_example():
|
|||
return result_stream
|
||||
|
||||
|
||||
async def cancel_after(wait, reg_addr):
|
||||
async with tractor.open_root_actor(registry_addrs=[reg_addr]):
|
||||
async def cancel_after(
|
||||
wait: float,
|
||||
reg_addr: tuple,
|
||||
) -> list[int]:
|
||||
|
||||
async with tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
):
|
||||
with trio.move_on_after(wait):
|
||||
return await a_quadruple_example()
|
||||
|
||||
|
|
@ -240,6 +273,10 @@ def time_quad_ex(
|
|||
ci_env: bool,
|
||||
spawn_backend: str,
|
||||
):
|
||||
non_linux: bool = (_sys := platform.system()) != 'Linux'
|
||||
if ci_env and non_linux:
|
||||
pytest.skip(f'Test is too flaky on {_sys!r} in CI')
|
||||
|
||||
if spawn_backend == 'mp':
|
||||
'''
|
||||
no idea but the mp *nix runs are flaking out here often...
|
||||
|
|
@ -247,32 +284,59 @@ def time_quad_ex(
|
|||
'''
|
||||
pytest.skip("Test is too flaky on mp in CI")
|
||||
|
||||
timeout = 7 if platform.system() in ('Windows', 'Darwin') else 4
|
||||
timeout = 7 if non_linux else 4
|
||||
start = time.time()
|
||||
results = trio.run(cancel_after, timeout, reg_addr)
|
||||
diff = time.time() - start
|
||||
results: list[int] = trio.run(
|
||||
cancel_after,
|
||||
timeout,
|
||||
reg_addr,
|
||||
)
|
||||
diff: float = time.time() - start
|
||||
assert results
|
||||
return results, diff
|
||||
|
||||
|
||||
def test_a_quadruple_example(
|
||||
time_quad_ex: tuple,
|
||||
time_quad_ex: tuple[list[int], float],
|
||||
ci_env: bool,
|
||||
spawn_backend: str,
|
||||
test_log: tractor.log.StackLevelAdapter,
|
||||
):
|
||||
'''
|
||||
This also serves as a kind of "we'd like to be this fast test".
|
||||
This also serves as a "we'd like to be this fast" smoke test
|
||||
given past empirical eval of this suite.
|
||||
|
||||
'''
|
||||
non_linux: bool = (_sys := platform.system()) != 'Linux'
|
||||
|
||||
this_fast_on_linux: float = 3
|
||||
this_fast = (
|
||||
6 if non_linux
|
||||
else this_fast_on_linux
|
||||
)
|
||||
# ^ XXX NOTE,
|
||||
# i've noticed that tweaking the CPU governor setting
|
||||
# to not "always" enable "turbo" mode can result in latency
|
||||
# which causes this limit to be too little. Not sure if it'd
|
||||
# be worth it to adjust the linux value based on reading the
|
||||
# CPU conf from the sys?
|
||||
#
|
||||
# For ex, see the `auto-cpufreq` docs on such settings,
|
||||
# https://github.com/AdnanHodzic/auto-cpufreq?tab=readme-ov-file#example-config-file-contents
|
||||
#
|
||||
# HENCE this below latency-headroom compensation logic..
|
||||
from .conftest import cpu_scaling_factor
|
||||
headroom: float = cpu_scaling_factor()
|
||||
if headroom != 1.:
|
||||
this_fast = this_fast_on_linux * headroom
|
||||
test_log.warning(
|
||||
f'Adding latency headroom on linux bc CPU scaling,\n'
|
||||
f'headroom: {headroom}\n'
|
||||
f'this_fast_on_linux: {this_fast_on_linux} -> {this_fast}\n'
|
||||
)
|
||||
|
||||
results, diff = time_quad_ex
|
||||
assert results
|
||||
this_fast = (
|
||||
6 if platform.system() in (
|
||||
'Windows',
|
||||
'Darwin',
|
||||
)
|
||||
else 3
|
||||
)
|
||||
assert diff < this_fast
|
||||
|
||||
|
||||
|
|
@ -281,43 +345,58 @@ def test_a_quadruple_example(
|
|||
list(map(lambda i: i/10, range(3, 9)))
|
||||
)
|
||||
def test_not_fast_enough_quad(
|
||||
reg_addr, time_quad_ex, cancel_delay, ci_env, spawn_backend
|
||||
reg_addr: tuple,
|
||||
time_quad_ex: tuple[list[int], float],
|
||||
cancel_delay: float,
|
||||
ci_env: bool,
|
||||
spawn_backend: str,
|
||||
):
|
||||
"""Verify we can cancel midway through the quad example and all actors
|
||||
cancel gracefully.
|
||||
"""
|
||||
'''
|
||||
Verify we can cancel midway through the quad example and all
|
||||
actors cancel gracefully.
|
||||
|
||||
'''
|
||||
results, diff = time_quad_ex
|
||||
delay = max(diff - cancel_delay, 0)
|
||||
results = trio.run(cancel_after, delay, reg_addr)
|
||||
system = platform.system()
|
||||
if system in ('Windows', 'Darwin') and results is not None:
|
||||
results = trio.run(
|
||||
cancel_after,
|
||||
delay,
|
||||
reg_addr,
|
||||
)
|
||||
system: str = platform.system()
|
||||
if (
|
||||
system in ('Windows', 'Darwin')
|
||||
and
|
||||
results is not None
|
||||
):
|
||||
# In CI envoirments it seems later runs are quicker then the first
|
||||
# so just ignore these
|
||||
print(f"Woa there {system} caught your breath eh?")
|
||||
print(f'Woa there {system} caught your breath eh?')
|
||||
else:
|
||||
# should be cancelled mid-streaming
|
||||
assert results is None
|
||||
|
||||
|
||||
@tractor_test
|
||||
@tractor_test(timeout=20)
|
||||
async def test_respawn_consumer_task(
|
||||
reg_addr,
|
||||
spawn_backend,
|
||||
loglevel,
|
||||
reg_addr: tuple,
|
||||
spawn_backend: str,
|
||||
loglevel: str,
|
||||
):
|
||||
"""Verify that ``._portal.ReceiveStream.shield()``
|
||||
'''
|
||||
Verify that ``._portal.ReceiveStream.shield()``
|
||||
sucessfully protects the underlying IPC channel from being closed
|
||||
when cancelling and respawning a consumer task.
|
||||
|
||||
This also serves to verify that all values from the stream can be
|
||||
received despite the respawns.
|
||||
|
||||
"""
|
||||
'''
|
||||
stream = None
|
||||
|
||||
async with tractor.open_nursery() as n:
|
||||
async with tractor.open_nursery() as an:
|
||||
|
||||
portal = await n.start_actor(
|
||||
portal = await an.start_actor(
|
||||
name='streamer',
|
||||
enable_modules=[__name__]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
Arbiter and "local" actor api
|
||||
Registrar and "local" actor api
|
||||
"""
|
||||
import time
|
||||
|
||||
|
|
@ -12,11 +12,11 @@ from tractor._testing import tractor_test
|
|||
|
||||
@pytest.mark.trio
|
||||
async def test_no_runtime():
|
||||
"""An arbitter must be established before any nurseries
|
||||
"""A registrar must be established before any nurseries
|
||||
can be created.
|
||||
|
||||
(In other words ``tractor.open_root_actor()`` must be engaged at
|
||||
some point?)
|
||||
(In other words ``tractor.open_root_actor()`` must be
|
||||
engaged at some point?)
|
||||
"""
|
||||
with pytest.raises(RuntimeError) :
|
||||
async with tractor.find_actor('doggy'):
|
||||
|
|
@ -25,9 +25,9 @@ async def test_no_runtime():
|
|||
|
||||
@tractor_test
|
||||
async def test_self_is_registered(reg_addr):
|
||||
"Verify waiting on the arbiter to register itself using the standard api."
|
||||
"Verify waiting on the registrar to register itself using the standard api."
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_arbiter
|
||||
assert actor.is_registrar
|
||||
with trio.fail_after(0.2):
|
||||
async with tractor.wait_for_actor('root') as portal:
|
||||
assert portal.channel.uid[0] == 'root'
|
||||
|
|
@ -35,11 +35,11 @@ async def test_self_is_registered(reg_addr):
|
|||
|
||||
@tractor_test
|
||||
async def test_self_is_registered_localportal(reg_addr):
|
||||
"Verify waiting on the arbiter to register itself using a local portal."
|
||||
"Verify waiting on the registrar to register itself using a local portal."
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_arbiter
|
||||
assert actor.is_registrar
|
||||
async with tractor.get_registry(reg_addr) as portal:
|
||||
assert isinstance(portal, tractor._portal.LocalPortal)
|
||||
assert isinstance(portal, tractor.runtime._portal.LocalPortal)
|
||||
|
||||
with trio.fail_after(0.2):
|
||||
sockaddr = await portal.run_from_ns(
|
||||
|
|
@ -57,8 +57,8 @@ def test_local_actor_async_func(reg_addr):
|
|||
async with tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
):
|
||||
# arbiter is started in-proc if dne
|
||||
assert tractor.current_actor().is_arbiter
|
||||
# registrar is started in-proc if dne
|
||||
assert tractor.current_actor().is_registrar
|
||||
|
||||
for i in range(10):
|
||||
nums.append(i)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,185 @@
|
|||
'''
|
||||
`tractor.log`-wrapping unit tests.
|
||||
|
||||
'''
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
from types import ModuleType
|
||||
|
||||
import pytest
|
||||
import tractor
|
||||
from tractor import (
|
||||
_code_load,
|
||||
log,
|
||||
)
|
||||
|
||||
|
||||
def test_root_pkg_not_duplicated_in_logger_name():
|
||||
'''
|
||||
When both `pkg_name` and `name` are passed and they have
|
||||
a common `<root_name>.< >` prefix, ensure that it is not
|
||||
duplicated in the child's `StackLevelAdapter.name: str`.
|
||||
|
||||
'''
|
||||
project_name: str = 'pylib'
|
||||
pkg_path: str = 'pylib.subpkg.mod'
|
||||
|
||||
assert not tractor.current_actor(
|
||||
err_on_no_runtime=False,
|
||||
)
|
||||
proj_log = log.get_logger(
|
||||
pkg_name=project_name,
|
||||
mk_sublog=False,
|
||||
)
|
||||
|
||||
sublog = log.get_logger(
|
||||
pkg_name=project_name,
|
||||
name=pkg_path,
|
||||
)
|
||||
|
||||
assert proj_log is not sublog
|
||||
assert sublog.name.count(proj_log.name) == 1
|
||||
assert 'mod' not in sublog.name
|
||||
|
||||
|
||||
def test_implicit_mod_name_applied_for_child(
|
||||
testdir: pytest.Pytester,
|
||||
loglevel: str,
|
||||
):
|
||||
'''
|
||||
Verify that when `.log.get_logger(pkg_name='pylib')` is called
|
||||
from a given sub-mod from within the `pylib` pkg-path, we
|
||||
implicitly set the equiv of `name=__name__` from the caller's
|
||||
module.
|
||||
|
||||
'''
|
||||
# tractor.log.get_console_log(level=loglevel)
|
||||
proj_name: str = 'snakelib'
|
||||
mod_code: str = (
|
||||
f'import tractor\n'
|
||||
f'\n'
|
||||
# if you need to trace `testdir` stuff @ import-time..
|
||||
# f'breakpoint()\n'
|
||||
f'log = tractor.log.get_logger(pkg_name="{proj_name}")\n'
|
||||
)
|
||||
|
||||
# create a sub-module for each pkg layer
|
||||
_lib = testdir.mkpydir(proj_name)
|
||||
pkg: Path = Path(_lib)
|
||||
pkg_init_mod: Path = pkg / "__init__.py"
|
||||
pkg_init_mod.write_text(mod_code)
|
||||
|
||||
subpkg: Path = pkg / 'subpkg'
|
||||
subpkg.mkdir()
|
||||
subpkgmod: Path = subpkg / "__init__.py"
|
||||
subpkgmod.touch()
|
||||
subpkgmod.write_text(mod_code)
|
||||
|
||||
_submod: Path = testdir.makepyfile(
|
||||
_mod=mod_code,
|
||||
)
|
||||
|
||||
pkg_submod = pkg / 'mod.py'
|
||||
pkg_subpkg_submod = subpkg / 'submod.py'
|
||||
shutil.copyfile(
|
||||
_submod,
|
||||
pkg_submod,
|
||||
)
|
||||
shutil.copyfile(
|
||||
_submod,
|
||||
pkg_subpkg_submod,
|
||||
)
|
||||
testdir.chdir()
|
||||
# NOTE, to introspect the py-file-module-layout use (in .xsh
|
||||
# syntax): `ranger @str(testdir)`
|
||||
|
||||
# XXX NOTE, once the "top level" pkg mod has been
|
||||
# imported, we can then use `import` syntax to
|
||||
# import it's sub-pkgs and modules.
|
||||
subpkgmod: ModuleType = _code_load.load_module_from_path(
|
||||
Path(pkg / '__init__.py'),
|
||||
module_name=proj_name,
|
||||
)
|
||||
|
||||
pkg_root_log = log.get_logger(
|
||||
pkg_name=proj_name,
|
||||
mk_sublog=False,
|
||||
)
|
||||
# the top level pkg-mod, created just now,
|
||||
# by above API call.
|
||||
assert pkg_root_log.name == proj_name
|
||||
assert not pkg_root_log.logger.getChildren()
|
||||
#
|
||||
# ^TODO! test this same output but created via a `get_logger()`
|
||||
# call in the `snakelib.__init__py`!!
|
||||
|
||||
# NOTE, the pkg-level "init mod" should of course
|
||||
# have the same name as the package ns-path.
|
||||
import snakelib as init_mod
|
||||
assert init_mod.log.name == proj_name
|
||||
|
||||
# NOTE, a first-pkg-level sub-module should only
|
||||
# use the package-name since the leaf-node-module
|
||||
# will be included in log headers by default.
|
||||
from snakelib import mod
|
||||
assert mod.log.name == proj_name
|
||||
|
||||
from snakelib import subpkg
|
||||
assert (
|
||||
subpkg.log.name
|
||||
==
|
||||
subpkg.__package__
|
||||
==
|
||||
f'{proj_name}.subpkg'
|
||||
)
|
||||
|
||||
from snakelib.subpkg import submod
|
||||
assert (
|
||||
submod.log.name
|
||||
==
|
||||
submod.__package__
|
||||
==
|
||||
f'{proj_name}.subpkg'
|
||||
)
|
||||
|
||||
sub_logs = pkg_root_log.logger.getChildren()
|
||||
assert len(sub_logs) == 1 # only one nested sub-pkg module
|
||||
assert submod.log.logger in sub_logs
|
||||
|
||||
|
||||
# TODO, moar tests against existing feats:
|
||||
# ------ - ------
|
||||
# - [ ] color settings?
|
||||
# - [ ] header contents like,
|
||||
# - actor + thread + task names from various conc-primitives,
|
||||
# - [ ] `StackLevelAdapter` extensions,
|
||||
# - our custom levels/methods: `transport|runtime|cance|pdb|devx`
|
||||
# - [ ] custom-headers support?
|
||||
#
|
||||
|
||||
# TODO, test driven dev of new-ideas/long-wanted feats,
|
||||
# ------ - ------
|
||||
# - [ ] https://github.com/goodboy/tractor/issues/244
|
||||
# - [ ] @catern mentioned using a sync / deterministic sys
|
||||
# and in particular `svlogd`?
|
||||
# |_ https://smarden.org/runit/svlogd.8
|
||||
|
||||
# - [ ] using adapter vs. filters?
|
||||
# - https://stackoverflow.com/questions/60691759/add-information-to-every-log-message-in-python-logging/61830838#61830838
|
||||
|
||||
# - [ ] `.at_least_level()` optimization which short circuits wtv
|
||||
# `logging` is doing behind the scenes when the level filters
|
||||
# the emission..?
|
||||
|
||||
# - [ ] use of `.log.get_console_log()` in subactors and the
|
||||
# subtleties of ensuring it actually emits from a subproc.
|
||||
|
||||
# - [ ] this idea of activating per-subsys emissions with some
|
||||
# kind of `.name` filter passed to the runtime or maybe configured
|
||||
# via the root `StackLevelAdapter`?
|
||||
|
||||
# - [ ] use of `logging.dict.dictConfig()` to simplify the impl
|
||||
# of any of ^^ ??
|
||||
# - https://stackoverflow.com/questions/7507825/where-is-a-complete-example-of-logging-config-dictconfig
|
||||
# - https://docs.python.org/3/library/logging.config.html#configuration-dictionary-schema
|
||||
# - https://docs.python.org/3/library/logging.config.html#logging.config.dictConfig
|
||||
|
|
@ -1,8 +1,13 @@
|
|||
"""
|
||||
Multiple python programs invoking the runtime.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import platform
|
||||
import subprocess
|
||||
import time
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
)
|
||||
|
||||
import pytest
|
||||
import trio
|
||||
|
|
@ -10,14 +15,32 @@ import tractor
|
|||
from tractor._testing import (
|
||||
tractor_test,
|
||||
)
|
||||
from tractor import (
|
||||
current_actor,
|
||||
Actor,
|
||||
Context,
|
||||
Portal,
|
||||
)
|
||||
from tractor.runtime import _state
|
||||
from .conftest import (
|
||||
sig_prog,
|
||||
_INT_SIGNAL,
|
||||
_INT_RETURN_CODE,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from tractor.msg import Aid
|
||||
from tractor.discovery._addr import (
|
||||
UnwrappedAddress,
|
||||
)
|
||||
|
||||
def test_abort_on_sigint(daemon):
|
||||
|
||||
_non_linux: bool = platform.system() != 'Linux'
|
||||
|
||||
|
||||
def test_abort_on_sigint(
|
||||
daemon: subprocess.Popen,
|
||||
):
|
||||
assert daemon.returncode is None
|
||||
time.sleep(0.1)
|
||||
sig_prog(daemon, _INT_SIGNAL)
|
||||
|
|
@ -30,39 +53,131 @@ def test_abort_on_sigint(daemon):
|
|||
|
||||
|
||||
@tractor_test
|
||||
async def test_cancel_remote_arbiter(daemon, reg_addr):
|
||||
assert not tractor.current_actor().is_arbiter
|
||||
async def test_cancel_remote_registrar(
|
||||
daemon: subprocess.Popen,
|
||||
reg_addr: UnwrappedAddress,
|
||||
):
|
||||
assert not current_actor().is_registrar
|
||||
async with tractor.get_registry(reg_addr) as portal:
|
||||
await portal.cancel_actor()
|
||||
|
||||
time.sleep(0.1)
|
||||
# the arbiter channel server is cancelled but not its main task
|
||||
# the registrar channel server is cancelled but not its main task
|
||||
assert daemon.returncode is None
|
||||
|
||||
# no arbiter socket should exist
|
||||
# no registrar socket should exist
|
||||
with pytest.raises(OSError):
|
||||
async with tractor.get_registry(reg_addr) as portal:
|
||||
pass
|
||||
|
||||
|
||||
def test_register_duplicate_name(daemon, reg_addr):
|
||||
|
||||
def test_register_duplicate_name(
|
||||
daemon: subprocess.Popen,
|
||||
reg_addr: UnwrappedAddress,
|
||||
):
|
||||
async def main():
|
||||
|
||||
async with tractor.open_nursery(
|
||||
registry_addrs=[reg_addr],
|
||||
) as n:
|
||||
) as an:
|
||||
|
||||
assert not tractor.current_actor().is_arbiter
|
||||
assert not current_actor().is_registrar
|
||||
|
||||
p1 = await n.start_actor('doggy')
|
||||
p2 = await n.start_actor('doggy')
|
||||
p1 = await an.start_actor('doggy')
|
||||
p2 = await an.start_actor('doggy')
|
||||
|
||||
async with tractor.wait_for_actor('doggy') as portal:
|
||||
assert portal.channel.uid in (p2.channel.uid, p1.channel.uid)
|
||||
|
||||
await n.cancel()
|
||||
await an.cancel()
|
||||
|
||||
# run it manually since we want to start **after**
|
||||
# the other "daemon" program
|
||||
# XXX, run manually since we want to start this root **after**
|
||||
# the other "daemon" program with it's own root.
|
||||
trio.run(main)
|
||||
|
||||
|
||||
@tractor.context
|
||||
async def get_root_portal(
|
||||
ctx: Context,
|
||||
):
|
||||
'''
|
||||
Connect back to the root actor manually (using `._discovery` API)
|
||||
and ensure it's contact info is the same as our immediate parent.
|
||||
|
||||
'''
|
||||
sub: Actor = current_actor()
|
||||
rtvs: dict = _state._runtime_vars
|
||||
raddrs: list[UnwrappedAddress] = rtvs['_root_addrs']
|
||||
|
||||
# await tractor.pause()
|
||||
# XXX, in case the sub->root discovery breaks you might need
|
||||
# this (i know i did Xp)!!
|
||||
# from tractor.devx import mk_pdb
|
||||
# mk_pdb().set_trace()
|
||||
|
||||
assert (
|
||||
len(raddrs) == 1
|
||||
and
|
||||
list(sub._parent_chan.raddr.unwrap()) in raddrs
|
||||
)
|
||||
|
||||
# connect back to our immediate parent which should also
|
||||
# be the actor-tree's root.
|
||||
from tractor.discovery._api import get_root
|
||||
ptl: Portal
|
||||
async with get_root() as ptl:
|
||||
root_aid: Aid = ptl.chan.aid
|
||||
parent_ptl: Portal = current_actor().get_parent()
|
||||
assert (
|
||||
root_aid.name == 'root'
|
||||
and
|
||||
parent_ptl.chan.aid == root_aid
|
||||
)
|
||||
await ctx.started()
|
||||
|
||||
|
||||
def test_non_registrar_spawns_child(
|
||||
daemon: subprocess.Popen,
|
||||
reg_addr: UnwrappedAddress,
|
||||
loglevel: str,
|
||||
debug_mode: bool,
|
||||
ci_env: bool,
|
||||
):
|
||||
'''
|
||||
Ensure a non-regristar (serving) root actor can spawn a sub and
|
||||
that sub can connect back (manually) to it's rent that is the
|
||||
root without issue.
|
||||
|
||||
More or less this audits the global contact info in
|
||||
`._state._runtime_vars`.
|
||||
|
||||
'''
|
||||
async def main():
|
||||
|
||||
# XXX, since apparently on macos in GH's CI it can be a race
|
||||
# with the `daemon` registrar on grabbing the socket-addr..
|
||||
if ci_env and _non_linux:
|
||||
await trio.sleep(.5)
|
||||
|
||||
async with tractor.open_nursery(
|
||||
registry_addrs=[reg_addr],
|
||||
loglevel=loglevel,
|
||||
debug_mode=debug_mode,
|
||||
) as an:
|
||||
|
||||
actor: Actor = tractor.current_actor()
|
||||
assert not actor.is_registrar
|
||||
sub_ptl: Portal = await an.start_actor(
|
||||
name='sub',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
|
||||
async with sub_ptl.open_context(
|
||||
get_root_portal,
|
||||
) as (ctx, _):
|
||||
print('Waiting for `sub` to connect back to us..')
|
||||
|
||||
await an.cancel()
|
||||
|
||||
# XXX, run manually since we want to start this root **after**
|
||||
# the other "daemon" program with it's own root.
|
||||
trio.run(main)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,14 @@ import tractor
|
|||
from tractor.experimental import msgpub
|
||||
from tractor._testing import tractor_test
|
||||
|
||||
pytestmark = pytest.mark.skipon_spawn_backend(
|
||||
'subint',
|
||||
reason=(
|
||||
'XXX SUBINT HANGING TEST XXX\n'
|
||||
'See oustanding issue(s)\n'
|
||||
# TODO, put issue link!
|
||||
)
|
||||
)
|
||||
|
||||
def test_type_checks():
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,333 @@
|
|||
'''
|
||||
Verify that externally registered remote actor error
|
||||
types are correctly relayed, boxed, and re-raised across
|
||||
IPC actor hops via `reg_err_types()`.
|
||||
|
||||
Also ensure that when custom error types are NOT registered
|
||||
the framework indicates the lookup failure to the user.
|
||||
|
||||
'''
|
||||
import pytest
|
||||
import trio
|
||||
import tractor
|
||||
from tractor import (
|
||||
Context,
|
||||
Portal,
|
||||
RemoteActorError,
|
||||
)
|
||||
from tractor._exceptions import (
|
||||
get_err_type,
|
||||
reg_err_types,
|
||||
)
|
||||
|
||||
|
||||
# -- custom app-level errors for testing --
|
||||
class CustomAppError(Exception):
|
||||
'''
|
||||
A hypothetical user-app error that should be
|
||||
boxed+relayed by `tractor` IPC when registered.
|
||||
|
||||
'''
|
||||
|
||||
|
||||
class AnotherAppError(Exception):
|
||||
'''
|
||||
A second custom error for multi-type registration.
|
||||
|
||||
'''
|
||||
|
||||
|
||||
class UnregisteredAppError(Exception):
|
||||
'''
|
||||
A custom error that is intentionally NEVER
|
||||
registered via `reg_err_types()` so we can
|
||||
verify the framework's failure indication.
|
||||
|
||||
'''
|
||||
|
||||
|
||||
# -- remote-task endpoints --
|
||||
@tractor.context
|
||||
async def raise_custom_err(
|
||||
ctx: Context,
|
||||
) -> None:
|
||||
'''
|
||||
Remote ep that raises a `CustomAppError`
|
||||
after sync-ing with the caller.
|
||||
|
||||
'''
|
||||
await ctx.started()
|
||||
raise CustomAppError(
|
||||
'the app exploded remotely'
|
||||
)
|
||||
|
||||
|
||||
@tractor.context
|
||||
async def raise_another_err(
|
||||
ctx: Context,
|
||||
) -> None:
|
||||
'''
|
||||
Remote ep that raises `AnotherAppError`.
|
||||
|
||||
'''
|
||||
await ctx.started()
|
||||
raise AnotherAppError(
|
||||
'another app-level kaboom'
|
||||
)
|
||||
|
||||
|
||||
@tractor.context
|
||||
async def raise_unreg_err(
|
||||
ctx: Context,
|
||||
) -> None:
|
||||
'''
|
||||
Remote ep that raises an `UnregisteredAppError`
|
||||
which has NOT been `reg_err_types()`-registered.
|
||||
|
||||
'''
|
||||
await ctx.started()
|
||||
raise UnregisteredAppError(
|
||||
'this error type is unknown to tractor'
|
||||
)
|
||||
|
||||
|
||||
# -- unit tests for the type-registry plumbing --
|
||||
|
||||
class TestRegErrTypesPlumbing:
|
||||
'''
|
||||
Low-level checks on `reg_err_types()` and
|
||||
`get_err_type()` without requiring IPC.
|
||||
|
||||
'''
|
||||
|
||||
def test_unregistered_type_returns_none(self):
|
||||
'''
|
||||
An unregistered custom error name should yield
|
||||
`None` from `get_err_type()`.
|
||||
|
||||
'''
|
||||
result = get_err_type('CustomAppError')
|
||||
assert result is None
|
||||
|
||||
def test_register_and_lookup(self):
|
||||
'''
|
||||
After `reg_err_types()`, the custom type should
|
||||
be discoverable via `get_err_type()`.
|
||||
|
||||
'''
|
||||
reg_err_types([CustomAppError])
|
||||
result = get_err_type('CustomAppError')
|
||||
assert result is CustomAppError
|
||||
|
||||
def test_register_multiple_types(self):
|
||||
'''
|
||||
Registering a list of types should make each
|
||||
one individually resolvable.
|
||||
|
||||
'''
|
||||
reg_err_types([
|
||||
CustomAppError,
|
||||
AnotherAppError,
|
||||
])
|
||||
assert (
|
||||
get_err_type('CustomAppError')
|
||||
is CustomAppError
|
||||
)
|
||||
assert (
|
||||
get_err_type('AnotherAppError')
|
||||
is AnotherAppError
|
||||
)
|
||||
|
||||
def test_builtin_types_always_resolve(self):
|
||||
'''
|
||||
Builtin error types like `RuntimeError` and
|
||||
`ValueError` should always be found without
|
||||
any prior registration.
|
||||
|
||||
'''
|
||||
assert (
|
||||
get_err_type('RuntimeError')
|
||||
is RuntimeError
|
||||
)
|
||||
assert (
|
||||
get_err_type('ValueError')
|
||||
is ValueError
|
||||
)
|
||||
|
||||
def test_tractor_native_types_resolve(self):
|
||||
'''
|
||||
`tractor`-internal exc types (e.g.
|
||||
`ContextCancelled`) should always resolve.
|
||||
|
||||
'''
|
||||
assert (
|
||||
get_err_type('ContextCancelled')
|
||||
is tractor.ContextCancelled
|
||||
)
|
||||
|
||||
def test_boxed_type_str_without_ipc_msg(self):
|
||||
'''
|
||||
When a `RemoteActorError` is constructed
|
||||
without an IPC msg (and no resolvable type),
|
||||
`.boxed_type_str` should return `'<unknown>'`.
|
||||
|
||||
'''
|
||||
rae = RemoteActorError('test')
|
||||
assert rae.boxed_type_str == '<unknown>'
|
||||
|
||||
|
||||
# -- IPC-level integration tests --
|
||||
|
||||
def test_registered_custom_err_relayed(
|
||||
debug_mode: bool,
|
||||
tpt_proto: str,
|
||||
):
|
||||
'''
|
||||
When a custom error type is registered via
|
||||
`reg_err_types()` on BOTH sides of an IPC dialog,
|
||||
the parent should receive a `RemoteActorError`
|
||||
whose `.boxed_type` matches the original custom
|
||||
error type.
|
||||
|
||||
'''
|
||||
reg_err_types([CustomAppError])
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(
|
||||
debug_mode=debug_mode,
|
||||
enable_transports=[tpt_proto],
|
||||
) as an:
|
||||
ptl: Portal = await an.start_actor(
|
||||
'custom-err-raiser',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
async with ptl.open_context(
|
||||
raise_custom_err,
|
||||
) as (ctx, sent):
|
||||
assert not sent
|
||||
try:
|
||||
await ctx.wait_for_result()
|
||||
except RemoteActorError as rae:
|
||||
assert rae.boxed_type is CustomAppError
|
||||
assert rae.src_type is CustomAppError
|
||||
assert 'the app exploded remotely' in str(
|
||||
rae.tb_str
|
||||
)
|
||||
raise
|
||||
|
||||
with pytest.raises(RemoteActorError) as excinfo:
|
||||
trio.run(main)
|
||||
|
||||
rae = excinfo.value
|
||||
assert rae.boxed_type is CustomAppError
|
||||
|
||||
|
||||
def test_registered_another_err_relayed(
|
||||
debug_mode: bool,
|
||||
tpt_proto: str,
|
||||
):
|
||||
'''
|
||||
Same as above but for a different custom error
|
||||
type to verify multi-type registration works
|
||||
end-to-end over IPC.
|
||||
|
||||
'''
|
||||
reg_err_types([AnotherAppError])
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(
|
||||
debug_mode=debug_mode,
|
||||
enable_transports=[tpt_proto],
|
||||
) as an:
|
||||
ptl: Portal = await an.start_actor(
|
||||
'another-err-raiser',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
async with ptl.open_context(
|
||||
raise_another_err,
|
||||
) as (ctx, sent):
|
||||
assert not sent
|
||||
try:
|
||||
await ctx.wait_for_result()
|
||||
except RemoteActorError as rae:
|
||||
assert (
|
||||
rae.boxed_type
|
||||
is AnotherAppError
|
||||
)
|
||||
raise
|
||||
|
||||
await an.cancel()
|
||||
|
||||
with pytest.raises(RemoteActorError) as excinfo:
|
||||
trio.run(main)
|
||||
|
||||
rae = excinfo.value
|
||||
assert rae.boxed_type is AnotherAppError
|
||||
|
||||
|
||||
def test_unregistered_err_still_relayed(
|
||||
debug_mode: bool,
|
||||
tpt_proto: str,
|
||||
):
|
||||
'''
|
||||
Verify that even when a custom error type is NOT registered via
|
||||
`reg_err_types()`, the remote error is still relayed as
|
||||
a `RemoteActorError` with all string-level info preserved
|
||||
(traceback, type name, source actor uid).
|
||||
|
||||
The `.boxed_type` will be `None` (type obj can't be resolved) but
|
||||
`.boxed_type_str` and `.src_type_str` still report the original
|
||||
type name from the IPC msg.
|
||||
|
||||
This documents the expected limitation: without `reg_err_types()`
|
||||
the `.boxed_type` property can NOT resolve to the original Python
|
||||
type.
|
||||
|
||||
'''
|
||||
# NOTE: intentionally do NOT call
|
||||
# `reg_err_types([UnregisteredAppError])`
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(
|
||||
debug_mode=debug_mode,
|
||||
enable_transports=[tpt_proto],
|
||||
) as an:
|
||||
ptl: Portal = await an.start_actor(
|
||||
'unreg-err-raiser',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
async with ptl.open_context(
|
||||
raise_unreg_err,
|
||||
) as (ctx, sent):
|
||||
assert not sent
|
||||
await ctx.wait_for_result()
|
||||
|
||||
await an.cancel()
|
||||
|
||||
with pytest.raises(RemoteActorError) as excinfo:
|
||||
trio.run(main)
|
||||
|
||||
rae = excinfo.value
|
||||
|
||||
# the error IS relayed even without
|
||||
# registration; type obj is unresolvable but
|
||||
# all string-level info is preserved.
|
||||
assert rae.boxed_type is None # NOT `UnregisteredAppError`
|
||||
assert rae.src_type is None
|
||||
|
||||
# string names survive the IPC round-trip
|
||||
# via the `Error` msg fields.
|
||||
assert (
|
||||
rae.src_type_str
|
||||
==
|
||||
'UnregisteredAppError'
|
||||
)
|
||||
assert (
|
||||
rae.boxed_type_str
|
||||
==
|
||||
'UnregisteredAppError'
|
||||
)
|
||||
|
||||
# original traceback content is preserved
|
||||
assert 'this error type is unknown' in rae.tb_str
|
||||
assert 'UnregisteredAppError' in rae.tb_str
|
||||
|
|
@ -12,14 +12,14 @@ import trio
|
|||
import tractor
|
||||
from tractor.trionics import (
|
||||
maybe_open_context,
|
||||
collapse_eg,
|
||||
)
|
||||
from tractor.log import (
|
||||
get_console_log,
|
||||
get_logger,
|
||||
)
|
||||
log = get_logger(__name__)
|
||||
|
||||
|
||||
log = get_logger()
|
||||
|
||||
_resource: int = 0
|
||||
|
||||
|
|
@ -213,9 +213,12 @@ def test_open_local_sub_to_stream(
|
|||
N local tasks using `trionics.maybe_open_context()`.
|
||||
|
||||
'''
|
||||
timeout: float = 3.6
|
||||
if platform.system() == "Windows":
|
||||
timeout: float = 10
|
||||
from .conftest import cpu_scaling_factor
|
||||
timeout: float = (
|
||||
4
|
||||
if not platform.system() == "Windows"
|
||||
else 10
|
||||
) * cpu_scaling_factor()
|
||||
|
||||
if debug_mode:
|
||||
timeout = 999
|
||||
|
|
@ -319,7 +322,7 @@ def test_open_local_sub_to_stream(
|
|||
|
||||
|
||||
@acm
|
||||
async def cancel_outer_cs(
|
||||
async def maybe_cancel_outer_cs(
|
||||
cs: trio.CancelScope|None = None,
|
||||
delay: float = 0,
|
||||
):
|
||||
|
|
@ -333,12 +336,31 @@ async def cancel_outer_cs(
|
|||
if cs:
|
||||
log.info('task calling cs.cancel()')
|
||||
cs.cancel()
|
||||
trio.lowlevel.checkpoint()
|
||||
|
||||
yield
|
||||
await trio.sleep_forever()
|
||||
|
||||
if cs:
|
||||
await trio.sleep_forever()
|
||||
|
||||
# XXX, if not cancelled we'll leak this inf-blocking
|
||||
# subtask to the actor's service tn..
|
||||
else:
|
||||
await trio.lowlevel.checkpoint()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'delay',
|
||||
[0.05, 0.5, 1],
|
||||
ids="pre_sleep_delay={}".format,
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
'cancel_by_cs',
|
||||
[True, False],
|
||||
ids="cancel_by_cs={}".format,
|
||||
)
|
||||
def test_lock_not_corrupted_on_fast_cancel(
|
||||
delay: float,
|
||||
cancel_by_cs: bool,
|
||||
debug_mode: bool,
|
||||
loglevel: str,
|
||||
):
|
||||
|
|
@ -355,17 +377,14 @@ def test_lock_not_corrupted_on_fast_cancel(
|
|||
due to it having erronously exited without calling
|
||||
`lock.release()`.
|
||||
|
||||
|
||||
'''
|
||||
delay: float = 1.
|
||||
|
||||
async def use_moc(
|
||||
cs: trio.CancelScope|None,
|
||||
delay: float,
|
||||
cs: trio.CancelScope|None = None,
|
||||
):
|
||||
log.info('task entering moc')
|
||||
async with maybe_open_context(
|
||||
cancel_outer_cs,
|
||||
maybe_cancel_outer_cs,
|
||||
kwargs={
|
||||
'cs': cs,
|
||||
'delay': delay,
|
||||
|
|
@ -376,7 +395,13 @@ def test_lock_not_corrupted_on_fast_cancel(
|
|||
else:
|
||||
log.info('1st task entered')
|
||||
|
||||
await trio.sleep_forever()
|
||||
if cs:
|
||||
await trio.sleep_forever()
|
||||
|
||||
else:
|
||||
await trio.sleep(delay)
|
||||
|
||||
# ^END, exit shared ctx.
|
||||
|
||||
async def main():
|
||||
with trio.fail_after(delay + 2):
|
||||
|
|
@ -385,6 +410,7 @@ def test_lock_not_corrupted_on_fast_cancel(
|
|||
debug_mode=debug_mode,
|
||||
loglevel=loglevel,
|
||||
),
|
||||
# ?TODO, pass this as the parent tn?
|
||||
trio.open_nursery() as tn,
|
||||
):
|
||||
get_console_log('info')
|
||||
|
|
@ -392,15 +418,206 @@ def test_lock_not_corrupted_on_fast_cancel(
|
|||
cs = tn.cancel_scope
|
||||
tn.start_soon(
|
||||
use_moc,
|
||||
cs,
|
||||
delay,
|
||||
cs if cancel_by_cs else None,
|
||||
name='child',
|
||||
)
|
||||
with trio.CancelScope() as rent_cs:
|
||||
await use_moc(
|
||||
cs=rent_cs,
|
||||
delay=delay,
|
||||
cs=rent_cs if cancel_by_cs else None,
|
||||
)
|
||||
|
||||
trio.run(main)
|
||||
|
||||
|
||||
@acm
|
||||
async def acm_with_resource(resource_id: str):
|
||||
'''
|
||||
Yield `resource_id` as the cached value.
|
||||
|
||||
Used to verify per-`ctx_key` isolation when the same
|
||||
`acm_func` is called with different kwargs.
|
||||
|
||||
'''
|
||||
yield resource_id
|
||||
|
||||
|
||||
def test_per_ctx_key_resource_lifecycle(
|
||||
debug_mode: bool,
|
||||
loglevel: str,
|
||||
):
|
||||
'''
|
||||
Verify that `maybe_open_context()` correctly isolates resource
|
||||
lifecycle **per `ctx_key`** when the same `acm_func` is called
|
||||
with different kwargs.
|
||||
|
||||
Previously `_Cache.users` was a single global `int` and
|
||||
`_Cache.locks` was keyed on `fid` (function ID), so calling
|
||||
the same `acm_func` with different kwargs (producing different
|
||||
`ctx_key`s) meant:
|
||||
|
||||
- teardown for one key was skipped bc the *other* key's users
|
||||
kept the global count > 0,
|
||||
- and re-entry could hit the old
|
||||
`assert not resources.get(ctx_key)` crash during the
|
||||
teardown window.
|
||||
|
||||
This was the root cause of a long-standing bug in piker's
|
||||
`brokerd.kraken` backend.
|
||||
|
||||
'''
|
||||
timeout: float = 6
|
||||
if debug_mode:
|
||||
timeout = 999
|
||||
|
||||
async def main():
|
||||
a_ready = trio.Event()
|
||||
a_exit = trio.Event()
|
||||
|
||||
async def hold_resource_a():
|
||||
'''
|
||||
Open resource 'a' and keep it alive until signalled.
|
||||
|
||||
'''
|
||||
async with maybe_open_context(
|
||||
acm_with_resource,
|
||||
kwargs={'resource_id': 'a'},
|
||||
) as (cache_hit, value):
|
||||
assert not cache_hit
|
||||
assert value == 'a'
|
||||
log.info("resource 'a' entered (holding)")
|
||||
a_ready.set()
|
||||
await a_exit.wait()
|
||||
log.info("resource 'a' exiting")
|
||||
|
||||
with trio.fail_after(timeout):
|
||||
async with (
|
||||
tractor.open_root_actor(
|
||||
debug_mode=debug_mode,
|
||||
loglevel=loglevel,
|
||||
),
|
||||
trio.open_nursery() as tn,
|
||||
):
|
||||
# Phase 1: bg task holds resource 'a' open.
|
||||
tn.start_soon(hold_resource_a)
|
||||
await a_ready.wait()
|
||||
|
||||
# Phase 2: open resource 'b' (different kwargs,
|
||||
# same acm_func) then exit it while 'a' is still
|
||||
# alive.
|
||||
async with maybe_open_context(
|
||||
acm_with_resource,
|
||||
kwargs={'resource_id': 'b'},
|
||||
) as (cache_hit, value):
|
||||
assert not cache_hit
|
||||
assert value == 'b'
|
||||
log.info("resource 'b' entered")
|
||||
|
||||
log.info("resource 'b' exited, waiting for teardown")
|
||||
await trio.lowlevel.checkpoint()
|
||||
|
||||
# Phase 3: re-open 'b'; must be a fresh cache MISS
|
||||
# proving 'b' was torn down independently of 'a'.
|
||||
#
|
||||
# With the old global `_Cache.users` counter this
|
||||
# would be a stale cache HIT (leaked resource) or
|
||||
# trigger `assert not resources.get(ctx_key)`.
|
||||
async with maybe_open_context(
|
||||
acm_with_resource,
|
||||
kwargs={'resource_id': 'b'},
|
||||
) as (cache_hit, value):
|
||||
assert not cache_hit, (
|
||||
"resource 'b' was NOT torn down despite "
|
||||
"having zero users! (global user count bug)"
|
||||
)
|
||||
assert value == 'b'
|
||||
log.info(
|
||||
"resource 'b' re-entered "
|
||||
"(cache miss, correct)"
|
||||
)
|
||||
|
||||
# Phase 4: let 'a' exit, clean shutdown.
|
||||
a_exit.set()
|
||||
|
||||
trio.run(main)
|
||||
|
||||
|
||||
def test_moc_reentry_during_teardown(
|
||||
debug_mode: bool,
|
||||
loglevel: str,
|
||||
):
|
||||
'''
|
||||
Reproduce the piker `open_cached_client('kraken')` race:
|
||||
|
||||
- same `acm_func`, NO kwargs (identical `ctx_key`)
|
||||
- multiple tasks share the cached resource
|
||||
- all users exit -> teardown starts
|
||||
- a NEW task enters during `_Cache.run_ctx.__aexit__`
|
||||
- `values[ctx_key]` is gone (popped in inner finally)
|
||||
but `resources[ctx_key]` still exists (outer finally
|
||||
hasn't run yet bc the acm cleanup has checkpoints)
|
||||
- old code: `assert not resources.get(ctx_key)` FIRES
|
||||
|
||||
This models the real-world scenario where `brokerd.kraken`
|
||||
tasks concurrently call `open_cached_client('kraken')`
|
||||
(same `acm_func`, empty kwargs, shared `ctx_key`) and
|
||||
the teardown/re-entry race triggers intermittently.
|
||||
|
||||
'''
|
||||
async def main():
|
||||
in_aexit = trio.Event()
|
||||
|
||||
@acm
|
||||
async def cached_client():
|
||||
'''
|
||||
Simulates `kraken.api.get_client()`:
|
||||
- no params (all callers share one `ctx_key`)
|
||||
- slow-ish cleanup to widen the race window
|
||||
between `values.pop()` and `resources.pop()`
|
||||
inside `_Cache.run_ctx`.
|
||||
|
||||
'''
|
||||
yield 'the-client'
|
||||
# Signal that we're in __aexit__ — at this
|
||||
# point `values` has already been popped by
|
||||
# `run_ctx`'s inner finally, but `resources`
|
||||
# is still alive (outer finally hasn't run).
|
||||
in_aexit.set()
|
||||
await trio.sleep(10)
|
||||
|
||||
first_done = trio.Event()
|
||||
|
||||
async def use_and_exit():
|
||||
async with maybe_open_context(
|
||||
cached_client,
|
||||
) as (cache_hit, value):
|
||||
assert value == 'the-client'
|
||||
first_done.set()
|
||||
|
||||
async def reenter_during_teardown():
|
||||
'''
|
||||
Wait for the acm's `__aexit__` to start (meaning
|
||||
`values` is popped but `resources` still exists),
|
||||
then re-enter — triggering the assert.
|
||||
|
||||
'''
|
||||
await in_aexit.wait()
|
||||
async with maybe_open_context(
|
||||
cached_client,
|
||||
) as (cache_hit, value):
|
||||
assert value == 'the-client'
|
||||
|
||||
with trio.fail_after(5):
|
||||
async with (
|
||||
tractor.open_root_actor(
|
||||
debug_mode=debug_mode,
|
||||
loglevel=loglevel,
|
||||
),
|
||||
collapse_eg(),
|
||||
trio.open_nursery() as tn,
|
||||
):
|
||||
tn.start_soon(use_and_exit)
|
||||
tn.start_soon(reenter_during_teardown)
|
||||
|
||||
trio.run(main)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@ import trio
|
|||
import pytest
|
||||
|
||||
import tractor
|
||||
|
||||
# XXX `cffi` dun build on py3.14 yet..
|
||||
cffi = pytest.importorskip("cffi")
|
||||
|
||||
from tractor.ipc._ringbuf import (
|
||||
open_ringbuf,
|
||||
RBToken,
|
||||
|
|
@ -14,7 +18,7 @@ from tractor._testing.samples import (
|
|||
generate_sample_messages,
|
||||
)
|
||||
|
||||
# in case you don't want to melt your cores, uncomment dis!
|
||||
# XXX, in case you want to melt your cores, comment this skip line XD
|
||||
pytestmark = pytest.mark.skip
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ def test_infected_root_actor(
|
|||
),
|
||||
to_asyncio.open_channel_from(
|
||||
aio_echo_server,
|
||||
) as (first, chan),
|
||||
) as (chan, first),
|
||||
):
|
||||
assert first == 'start'
|
||||
|
||||
|
|
@ -91,13 +91,12 @@ def test_infected_root_actor(
|
|||
async def sync_and_err(
|
||||
# just signature placeholders for compat with
|
||||
# ``to_asyncio.open_channel_from()``
|
||||
to_trio: trio.MemorySendChannel,
|
||||
from_trio: asyncio.Queue,
|
||||
chan: tractor.to_asyncio.LinkedTaskChannel,
|
||||
ev: asyncio.Event,
|
||||
|
||||
):
|
||||
if to_trio:
|
||||
to_trio.send_nowait('start')
|
||||
if chan:
|
||||
chan.started_nowait('start')
|
||||
|
||||
await ev.wait()
|
||||
raise RuntimeError('asyncio-side')
|
||||
|
|
@ -174,7 +173,7 @@ def test_trio_prestarted_task_bubbles(
|
|||
sync_and_err,
|
||||
ev=aio_ev,
|
||||
)
|
||||
) as (first, chan),
|
||||
) as (chan, first),
|
||||
):
|
||||
|
||||
for i in range(5):
|
||||
|
|
|
|||
|
|
@ -94,15 +94,15 @@ def test_runtime_vars_unset(
|
|||
after the root actor-runtime exits!
|
||||
|
||||
'''
|
||||
assert not tractor._state._runtime_vars['_debug_mode']
|
||||
assert not tractor.runtime._state._runtime_vars['_debug_mode']
|
||||
async def main():
|
||||
assert not tractor._state._runtime_vars['_debug_mode']
|
||||
assert not tractor.runtime._state._runtime_vars['_debug_mode']
|
||||
async with tractor.open_nursery(
|
||||
debug_mode=True,
|
||||
):
|
||||
assert tractor._state._runtime_vars['_debug_mode']
|
||||
assert tractor.runtime._state._runtime_vars['_debug_mode']
|
||||
|
||||
# after runtime closure, should be reverted!
|
||||
assert not tractor._state._runtime_vars['_debug_mode']
|
||||
assert not tractor.runtime._state._runtime_vars['_debug_mode']
|
||||
|
||||
trio.run(main)
|
||||
|
|
|
|||
|
|
@ -110,7 +110,7 @@ def test_rpc_errors(
|
|||
) as n:
|
||||
|
||||
actor = tractor.current_actor()
|
||||
assert actor.is_arbiter
|
||||
assert actor.is_registrar
|
||||
await n.run_in_actor(
|
||||
sleep_back_actor,
|
||||
actor_name=subactor_requests_to,
|
||||
|
|
|
|||
|
|
@ -22,6 +22,10 @@ def unlink_file():
|
|||
async def crash_and_clean_tmpdir(
|
||||
tmp_file_path: str,
|
||||
error: bool = True,
|
||||
rent_cancel: bool = True,
|
||||
|
||||
# XXX unused, but do we really need to test these cases?
|
||||
self_cancel: bool = False,
|
||||
):
|
||||
global _file_path
|
||||
_file_path = tmp_file_path
|
||||
|
|
@ -32,43 +36,75 @@ async def crash_and_clean_tmpdir(
|
|||
assert os.path.isfile(tmp_file_path)
|
||||
await trio.sleep(0.1)
|
||||
if error:
|
||||
print('erroring in subactor!')
|
||||
assert 0
|
||||
else:
|
||||
|
||||
elif self_cancel:
|
||||
print('SELF-cancelling subactor!')
|
||||
actor.cancel_soon()
|
||||
|
||||
elif rent_cancel:
|
||||
await trio.sleep_forever()
|
||||
|
||||
print('subactor exiting task!')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'error_in_child',
|
||||
[True, False],
|
||||
ids='error_in_child={}'.format,
|
||||
)
|
||||
@tractor_test
|
||||
async def test_lifetime_stack_wipes_tmpfile(
|
||||
tmp_path,
|
||||
error_in_child: bool,
|
||||
loglevel: str,
|
||||
# log: tractor.log.StackLevelAdapter,
|
||||
# ^TODO, once landed via macos support!
|
||||
):
|
||||
child_tmp_file = tmp_path / "child.txt"
|
||||
child_tmp_file.touch()
|
||||
assert child_tmp_file.exists()
|
||||
path = str(child_tmp_file)
|
||||
|
||||
# NOTE, this is expected to cancel the sub
|
||||
# in the `error_in_child=False` case!
|
||||
timeout: float = (
|
||||
1.6 if error_in_child
|
||||
else 1
|
||||
)
|
||||
try:
|
||||
with trio.move_on_after(0.5):
|
||||
async with tractor.open_nursery() as n:
|
||||
await ( # inlined portal
|
||||
await n.run_in_actor(
|
||||
crash_and_clean_tmpdir,
|
||||
tmp_file_path=path,
|
||||
error=error_in_child,
|
||||
)
|
||||
).result()
|
||||
|
||||
with trio.move_on_after(timeout) as cs:
|
||||
async with tractor.open_nursery(
|
||||
loglevel=loglevel,
|
||||
) as an:
|
||||
await ( # inlined `tractor.Portal`
|
||||
await an.run_in_actor(
|
||||
crash_and_clean_tmpdir,
|
||||
tmp_file_path=path,
|
||||
error=error_in_child,
|
||||
)
|
||||
).result()
|
||||
except (
|
||||
tractor.RemoteActorError,
|
||||
# tractor.BaseExceptionGroup,
|
||||
BaseExceptionGroup,
|
||||
):
|
||||
pass
|
||||
) as _exc:
|
||||
exc = _exc
|
||||
from tractor.log import get_console_log
|
||||
log = get_console_log(
|
||||
level=loglevel,
|
||||
name=__name__,
|
||||
)
|
||||
log.exception(
|
||||
f'Subactor failed as expected with {type(exc)!r}\n'
|
||||
)
|
||||
|
||||
# tmp file should have been wiped by
|
||||
# teardown stack.
|
||||
assert not child_tmp_file.exists()
|
||||
|
||||
if error_in_child:
|
||||
assert not cs.cancel_called
|
||||
else:
|
||||
# expect timeout in some cases?
|
||||
assert cs.cancel_called
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
Shared mem primitives and APIs.
|
||||
|
||||
"""
|
||||
import platform
|
||||
import uuid
|
||||
|
||||
# import numpy
|
||||
|
|
@ -13,6 +14,14 @@ from tractor.ipc._shm import (
|
|||
attach_shm_list,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.skipon_spawn_backend(
|
||||
'subint',
|
||||
reason=(
|
||||
'XXX SUBINT GIL-CONTENTION HANGING TEST XXX\n'
|
||||
'See oustanding issue(s)\n'
|
||||
# TODO, put issue link!
|
||||
)
|
||||
)
|
||||
|
||||
@tractor.context
|
||||
async def child_attach_shml_alot(
|
||||
|
|
@ -53,7 +62,18 @@ def test_child_attaches_alot():
|
|||
shm_key=shml.key,
|
||||
) as (ctx, start_val),
|
||||
):
|
||||
assert start_val == key
|
||||
assert (_key := shml.key) == start_val
|
||||
|
||||
if platform.system() != 'Darwin':
|
||||
# XXX, macOS has a char limit..
|
||||
# see `ipc._shm._shorten_key_for_macos`
|
||||
assert (
|
||||
start_val
|
||||
==
|
||||
key
|
||||
==
|
||||
_key
|
||||
)
|
||||
await ctx.result()
|
||||
|
||||
await portal.cancel_actor()
|
||||
|
|
|
|||
|
|
@ -1,5 +1,12 @@
|
|||
"""
|
||||
Spawning basics
|
||||
Spawning basics including audit of,
|
||||
|
||||
- subproc bootstrap, such as subactor runtime-data/config inheritance,
|
||||
- basic (and mostly legacy) `ActorNursery` subactor starting and
|
||||
cancel APIs.
|
||||
|
||||
Simple (and generally legacy) examples from the original
|
||||
API design.
|
||||
|
||||
"""
|
||||
from functools import partial
|
||||
|
|
@ -33,13 +40,13 @@ async def spawn(
|
|||
assert actor is None # no runtime yet
|
||||
async with (
|
||||
tractor.open_root_actor(
|
||||
arbiter_addr=reg_addr,
|
||||
registry_addrs=[reg_addr],
|
||||
),
|
||||
tractor.open_nursery() as an,
|
||||
):
|
||||
# now runtime exists
|
||||
actor: tractor.Actor = tractor.current_actor()
|
||||
assert actor.is_arbiter == should_be_root
|
||||
assert actor.is_registrar == should_be_root
|
||||
|
||||
# spawns subproc here
|
||||
portal: tractor.Portal = await an.run_in_actor(
|
||||
|
|
@ -68,7 +75,7 @@ async def spawn(
|
|||
assert result == 10
|
||||
return result
|
||||
else:
|
||||
assert actor.is_arbiter == should_be_root
|
||||
assert actor.is_registrar == should_be_root
|
||||
return 10
|
||||
|
||||
|
||||
|
|
@ -98,7 +105,9 @@ async def movie_theatre_question():
|
|||
|
||||
|
||||
@tractor_test
|
||||
async def test_movie_theatre_convo(start_method):
|
||||
async def test_movie_theatre_convo(
|
||||
start_method: str,
|
||||
):
|
||||
'''
|
||||
The main ``tractor`` routine.
|
||||
|
||||
|
|
@ -151,13 +160,16 @@ async def test_most_beautiful_word(
|
|||
name='some_linguist',
|
||||
)
|
||||
|
||||
print(await portal.result())
|
||||
res: Any = await portal.wait_for_result()
|
||||
assert res == return_value
|
||||
# The ``async with`` will unblock here since the 'some_linguist'
|
||||
# actor has completed its main task ``cellar_door``.
|
||||
|
||||
# this should pull the cached final result already captured during
|
||||
# the nursery block exit.
|
||||
print(await portal.result())
|
||||
res: Any = await portal.wait_for_result()
|
||||
assert res == return_value
|
||||
print(res)
|
||||
|
||||
|
||||
async def check_loglevel(level):
|
||||
|
|
@ -168,22 +180,30 @@ async def check_loglevel(level):
|
|||
log.critical('yoyoyo')
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'level', [
|
||||
'debug',
|
||||
'cancel',
|
||||
'critical'
|
||||
],
|
||||
ids='loglevel={}'.format,
|
||||
)
|
||||
def test_loglevel_propagated_to_subactor(
|
||||
start_method,
|
||||
capfd,
|
||||
reg_addr,
|
||||
capfd: pytest.CaptureFixture,
|
||||
start_method: str,
|
||||
reg_addr: tuple,
|
||||
level: str,
|
||||
):
|
||||
if start_method == 'mp_forkserver':
|
||||
pytest.skip(
|
||||
"a bug with `capfd` seems to make forkserver capture not work?")
|
||||
|
||||
level = 'critical'
|
||||
"a bug with `capfd` seems to make forkserver capture not work?"
|
||||
)
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(
|
||||
name='arbiter',
|
||||
name='registrar',
|
||||
start_method=start_method,
|
||||
arbiter_addr=reg_addr,
|
||||
registry_addrs=[reg_addr],
|
||||
|
||||
) as tn:
|
||||
await tn.run_in_actor(
|
||||
|
|
@ -197,3 +217,121 @@ def test_loglevel_propagated_to_subactor(
|
|||
# ensure subactor spits log message on stderr
|
||||
captured = capfd.readouterr()
|
||||
assert 'yoyoyo' in captured.err
|
||||
|
||||
|
||||
async def check_parent_main_inheritance(
|
||||
expect_inherited: bool,
|
||||
) -> bool:
|
||||
'''
|
||||
Assert that the child actor's ``_parent_main_data`` matches the
|
||||
``inherit_parent_main`` flag it was spawned with.
|
||||
|
||||
With the trio spawn backend the parent's ``__main__`` bootstrap
|
||||
data is captured and forwarded to each child so it can replay
|
||||
the parent's ``__main__`` as ``__mp_main__``, mirroring the
|
||||
stdlib ``multiprocessing`` bootstrap:
|
||||
https://docs.python.org/3/library/multiprocessing.html#the-spawn-and-forkserver-start-methods
|
||||
|
||||
When ``inherit_parent_main=False`` the data dict is empty
|
||||
(``{}``) so no fixup ever runs and the child keeps its own
|
||||
``__main__`` untouched.
|
||||
|
||||
NOTE: under `pytest` the parent ``__main__`` is
|
||||
``pytest.__main__`` whose ``_fixup_main_from_name()`` is a no-op
|
||||
(the name ends with ``.__main__``), so we cannot observe
|
||||
a difference in ``sys.modules['__main__'].__name__`` between the
|
||||
two modes. Checking ``_parent_main_data`` directly is the most
|
||||
reliable verification that the flag is threaded through
|
||||
correctly; a ``RemoteActorError[AssertionError]`` propagates on
|
||||
mismatch.
|
||||
|
||||
'''
|
||||
import tractor
|
||||
actor: tractor.Actor = tractor.current_actor()
|
||||
has_data: bool = bool(actor._parent_main_data)
|
||||
assert has_data == expect_inherited, (
|
||||
f'Expected _parent_main_data to be '
|
||||
f'{"non-empty" if expect_inherited else "empty"}, '
|
||||
f'got: {actor._parent_main_data!r}'
|
||||
)
|
||||
return has_data
|
||||
|
||||
|
||||
def test_run_in_actor_can_skip_parent_main_inheritance(
|
||||
start_method: str, # <- only support on `trio` backend rn.
|
||||
):
|
||||
'''
|
||||
Verify ``inherit_parent_main=False`` on ``run_in_actor()``
|
||||
prevents parent ``__main__`` data from reaching the child.
|
||||
|
||||
'''
|
||||
if start_method != 'trio':
|
||||
pytest.skip(
|
||||
'parent main-inheritance opt-out only affects the trio backend'
|
||||
)
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(start_method='trio') as an:
|
||||
|
||||
# Default: child receives parent __main__ bootstrap data
|
||||
replaying = await an.run_in_actor(
|
||||
check_parent_main_inheritance,
|
||||
name='replaying-parent-main',
|
||||
expect_inherited=True,
|
||||
)
|
||||
await replaying.result()
|
||||
|
||||
# Opt-out: child gets no parent __main__ data
|
||||
isolated = await an.run_in_actor(
|
||||
check_parent_main_inheritance,
|
||||
name='isolated-parent-main',
|
||||
inherit_parent_main=False,
|
||||
expect_inherited=False,
|
||||
)
|
||||
await isolated.result()
|
||||
|
||||
trio.run(main)
|
||||
|
||||
|
||||
def test_start_actor_can_skip_parent_main_inheritance(
|
||||
start_method: str, # <- only support on `trio` backend rn.
|
||||
):
|
||||
'''
|
||||
Verify ``inherit_parent_main=False`` on ``start_actor()``
|
||||
prevents parent ``__main__`` data from reaching the child.
|
||||
|
||||
'''
|
||||
if start_method != 'trio':
|
||||
pytest.skip(
|
||||
'parent main-inheritance opt-out only affects the trio backend'
|
||||
)
|
||||
|
||||
async def main():
|
||||
async with tractor.open_nursery(start_method='trio') as an:
|
||||
|
||||
# Default: child receives parent __main__ bootstrap data
|
||||
replaying = await an.start_actor(
|
||||
'replaying-parent-main',
|
||||
enable_modules=[__name__],
|
||||
)
|
||||
result = await replaying.run(
|
||||
check_parent_main_inheritance,
|
||||
expect_inherited=True,
|
||||
)
|
||||
assert result is True
|
||||
await replaying.cancel_actor()
|
||||
|
||||
# Opt-out: child gets no parent __main__ data
|
||||
isolated = await an.start_actor(
|
||||
'isolated-parent-main',
|
||||
enable_modules=[__name__],
|
||||
inherit_parent_main=False,
|
||||
)
|
||||
result = await isolated.run(
|
||||
check_parent_main_inheritance,
|
||||
expect_inherited=False,
|
||||
)
|
||||
assert result is False
|
||||
await isolated.cancel_actor()
|
||||
|
||||
trio.run(main)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,245 @@
|
|||
'''
|
||||
Cancellation + hard-kill semantics audit for the `subint` spawn
|
||||
backend.
|
||||
|
||||
Exercises the escape-hatch machinery added to
|
||||
`tractor.spawn._subint` (module-level `_HARD_KILL_TIMEOUT`,
|
||||
bounded shields around the soft-kill / thread-join sites, daemon
|
||||
driver-thread abandonment) so that future stdlib regressions or
|
||||
our own refactors don't silently re-introduce the hangs first
|
||||
diagnosed during the Phase B.2/B.3 bringup (issue #379).
|
||||
|
||||
Every test in this module:
|
||||
- is wrapped in `trio.fail_after()` for a deterministic per-test
|
||||
wall-clock ceiling (the whole point of these tests is to fail
|
||||
fast when our escape hatches regress; an unbounded test would
|
||||
defeat itself),
|
||||
- arms `tractor.devx.dump_on_hang()` to capture a stack dump on
|
||||
failure — without it, a hang here is opaque because pytest's
|
||||
stderr capture swallows `faulthandler` output by default
|
||||
(hard-won lesson from the original diagnosis),
|
||||
- skips on py<3.13 (no `_interpreters`) and on any
|
||||
`--spawn-backend` other than `'subint'` (these tests are
|
||||
subint-specific by design — they'd be nonsense under `trio` or
|
||||
`mp_*`).
|
||||
|
||||
'''
|
||||
from __future__ import annotations
|
||||
from functools import partial
|
||||
|
||||
import pytest
|
||||
import trio
|
||||
import tractor
|
||||
from tractor.devx import dump_on_hang
|
||||
|
||||
|
||||
# Gate: the `subint` backend requires py3.14+. Check the
|
||||
# public stdlib wrapper's presence (added in 3.14) rather than
|
||||
# the private `_interpreters` module (which exists on 3.13 but
|
||||
# wedges under tractor's usage — see `tractor.spawn._subint`).
|
||||
pytest.importorskip('concurrent.interpreters')
|
||||
|
||||
# Subint-only: read the spawn method that `pytest_configure`
|
||||
# committed via `try_set_start_method()`. By the time this module
|
||||
# imports, the CLI backend choice has been applied.
|
||||
from tractor.spawn._spawn import _spawn_method # noqa: E402
|
||||
|
||||
if _spawn_method != 'subint':
|
||||
pytestmark = pytest.mark.skip(
|
||||
reason=(
|
||||
"subint-specific cancellation audit — "
|
||||
"pass `--spawn-backend=subint` to run."
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# child-side task bodies (run inside the spawned subint)
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
|
||||
async def _trivial_rpc() -> str:
|
||||
'''
|
||||
Minimal RPC body for the baseline happy-teardown test.
|
||||
'''
|
||||
return 'hello from subint'
|
||||
|
||||
|
||||
async def _spin_without_trio_checkpoints() -> None:
|
||||
'''
|
||||
Block the main task with NO trio-visible checkpoints so any
|
||||
`Portal.cancel_actor()` arriving over IPC has nothing to hand
|
||||
off to.
|
||||
|
||||
`threading.Event.wait(timeout)` releases the GIL (so other
|
||||
threads — including trio's IO/RPC tasks — can progress) but
|
||||
does NOT insert a trio checkpoint, so the subactor's main
|
||||
task never notices cancellation.
|
||||
|
||||
This is the exact "stuck subint" scenario the hard-kill
|
||||
shields exist to survive.
|
||||
'''
|
||||
import threading
|
||||
never_set = threading.Event()
|
||||
while not never_set.is_set():
|
||||
# 1s re-check granularity; low enough not to waste CPU,
|
||||
# high enough that even a pathologically slow
|
||||
# `_HARD_KILL_TIMEOUT` won't accidentally align with a
|
||||
# wake.
|
||||
never_set.wait(timeout=1.0)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# parent-side harnesses (driven inside `trio.run(...)`)
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
|
||||
async def _happy_path(
|
||||
reg_addr: tuple[str, int|str],
|
||||
deadline: float,
|
||||
) -> None:
|
||||
with trio.fail_after(deadline):
|
||||
async with (
|
||||
tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
),
|
||||
tractor.open_nursery() as an,
|
||||
):
|
||||
portal: tractor.Portal = await an.run_in_actor(
|
||||
_trivial_rpc,
|
||||
name='subint-happy',
|
||||
)
|
||||
result: str = await portal.wait_for_result()
|
||||
assert result == 'hello from subint'
|
||||
|
||||
|
||||
async def _spawn_stuck_then_cancel(
|
||||
reg_addr: tuple[str, int|str],
|
||||
deadline: float,
|
||||
) -> None:
|
||||
with trio.fail_after(deadline):
|
||||
async with (
|
||||
tractor.open_root_actor(
|
||||
registry_addrs=[reg_addr],
|
||||
),
|
||||
tractor.open_nursery() as an,
|
||||
):
|
||||
await an.run_in_actor(
|
||||
_spin_without_trio_checkpoints,
|
||||
name='subint-stuck',
|
||||
)
|
||||
# Give the child time to reach its non-checkpointing
|
||||
# loop before we cancel; the precise value doesn't
|
||||
# matter as long as it's a handful of trio schedule
|
||||
# ticks.
|
||||
await trio.sleep(0.5)
|
||||
an.cancel_scope.cancel()
|
||||
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# tests
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
|
||||
def test_subint_happy_teardown(
|
||||
reg_addr: tuple[str, int|str],
|
||||
) -> None:
|
||||
'''
|
||||
Baseline: spawn a subactor, do one portal RPC, close nursery
|
||||
cleanly. No cancel, no faults.
|
||||
|
||||
If this regresses we know something's wrong at the
|
||||
spawn/teardown layer unrelated to the hard-kill escape
|
||||
hatches.
|
||||
|
||||
'''
|
||||
deadline: float = 10.0
|
||||
with dump_on_hang(
|
||||
seconds=deadline,
|
||||
path='/tmp/subint_cancellation_happy.dump',
|
||||
):
|
||||
trio.run(partial(_happy_path, reg_addr, deadline))
|
||||
|
||||
|
||||
@pytest.mark.skipon_spawn_backend(
|
||||
'subint',
|
||||
reason=(
|
||||
'XXX SUBINT HANGING TEST XXX\n'
|
||||
'See oustanding issue(s)\n'
|
||||
# TODO, put issue link!
|
||||
)
|
||||
)
|
||||
# Wall-clock bound via `pytest-timeout` (`method='thread'`)
|
||||
# as defense-in-depth over the inner `trio.fail_after(15)`.
|
||||
# Under the orphaned-channel hang class described in
|
||||
# `ai/conc-anal/subint_cancel_delivery_hang_issue.md`, SIGINT
|
||||
# is still deliverable and this test *should* be unwedgeable
|
||||
# by the inner trio timeout — but sibling subint-backend
|
||||
# tests in this repo have also exhibited the
|
||||
# `subint_sigint_starvation_issue.md` GIL-starvation flavor,
|
||||
# so `method='thread'` keeps us safe in case ordering or
|
||||
# load shifts the failure mode.
|
||||
@pytest.mark.timeout(
|
||||
3, # NOTE never passes pre-3.14+ subints support.
|
||||
method='thread',
|
||||
)
|
||||
def test_subint_non_checkpointing_child(
|
||||
reg_addr: tuple[str, int|str],
|
||||
) -> None:
|
||||
'''
|
||||
Cancel a subactor whose main task is stuck in a non-
|
||||
checkpointing Python loop.
|
||||
|
||||
`Portal.cancel_actor()` may be delivered over IPC but the
|
||||
main task never checkpoints to observe the Cancelled —
|
||||
so the subint's `trio.run()` can't exit gracefully.
|
||||
|
||||
The parent `subint_proc` bounded-shield + daemon-driver-
|
||||
thread combo should abandon the thread after
|
||||
`_HARD_KILL_TIMEOUT` and let the parent return cleanly.
|
||||
|
||||
Wall-clock budget:
|
||||
- ~0.5s: settle time for child to enter the stuck loop
|
||||
- ~3s: `_HARD_KILL_TIMEOUT` (soft-kill wait)
|
||||
- ~3s: `_HARD_KILL_TIMEOUT` (thread-join wait)
|
||||
- margin
|
||||
|
||||
KNOWN ISSUE (Ctrl-C-able hang):
|
||||
-------------------------------
|
||||
This test currently hangs past the hard-kill timeout for
|
||||
reasons unrelated to the subint teardown itself — after
|
||||
the subint is destroyed, a parent-side trio task appears
|
||||
to park on an orphaned IPC channel (no clean EOF
|
||||
delivered to a waiting receive). Unlike the
|
||||
SIGINT-starvation sibling case in
|
||||
`test_stale_entry_is_deleted`, this hang IS Ctrl-C-able
|
||||
(`strace` shows SIGINT wakeup-fd `write() = 1`, not
|
||||
`EAGAIN`) — i.e. the main trio loop is still iterating
|
||||
normally. That makes this *our* bug to fix, not a
|
||||
CPython-level limitation.
|
||||
|
||||
See `ai/conc-anal/subint_cancel_delivery_hang_issue.md`
|
||||
for the full analysis + candidate fix directions
|
||||
(explicit parent-side channel abort in `subint_proc`
|
||||
teardown being the most likely surgical fix).
|
||||
|
||||
The sibling `ai/conc-anal/subint_sigint_starvation_issue.md`
|
||||
documents the *other* hang class (abandoned-legacy-subint
|
||||
thread + shared-GIL starvation → signal-wakeup-fd pipe
|
||||
fills → SIGINT silently dropped) — that one is
|
||||
structurally blocked on msgspec PEP 684 adoption and is
|
||||
NOT what this test is hitting.
|
||||
|
||||
'''
|
||||
deadline: float = 15.0
|
||||
with dump_on_hang(
|
||||
seconds=deadline,
|
||||
path='/tmp/subint_cancellation_stuck.dump',
|
||||
):
|
||||
trio.run(
|
||||
partial(
|
||||
_spawn_stuck_then_cancel,
|
||||
reg_addr,
|
||||
deadline,
|
||||
),
|
||||
)
|
||||
|
|
@ -75,7 +75,7 @@ async def open_sequence_streamer(
|
|||
) -> tractor.MsgStream:
|
||||
|
||||
async with tractor.open_nursery(
|
||||
arbiter_addr=reg_addr,
|
||||
registry_addrs=[reg_addr],
|
||||
start_method=start_method,
|
||||
) as an:
|
||||
|
||||
|
|
|
|||
|
|
@ -30,21 +30,23 @@ from ._streaming import (
|
|||
MsgStream as MsgStream,
|
||||
stream as stream,
|
||||
)
|
||||
from ._discovery import (
|
||||
from .discovery._api import (
|
||||
get_registry as get_registry,
|
||||
find_actor as find_actor,
|
||||
wait_for_actor as wait_for_actor,
|
||||
query_actor as query_actor,
|
||||
)
|
||||
from ._supervise import (
|
||||
from .runtime._supervise import (
|
||||
open_nursery as open_nursery,
|
||||
ActorNursery as ActorNursery,
|
||||
)
|
||||
from ._state import (
|
||||
from .runtime._state import (
|
||||
RuntimeVars as RuntimeVars,
|
||||
current_actor as current_actor,
|
||||
is_root_process as is_root_process,
|
||||
current_ipc_ctx as current_ipc_ctx,
|
||||
debug_mode as debug_mode
|
||||
debug_mode as debug_mode,
|
||||
get_runtime_vars as get_runtime_vars,
|
||||
is_root_process as is_root_process,
|
||||
)
|
||||
from ._exceptions import (
|
||||
ContextCancelled as ContextCancelled,
|
||||
|
|
@ -65,6 +67,10 @@ from ._root import (
|
|||
open_root_actor as open_root_actor,
|
||||
)
|
||||
from .ipc import Channel as Channel
|
||||
from ._portal import Portal as Portal
|
||||
from ._runtime import Actor as Actor
|
||||
from .runtime._portal import Portal as Portal
|
||||
from .runtime._runtime import Actor as Actor
|
||||
from .discovery._registry import (
|
||||
Registrar as Registrar,
|
||||
Arbiter as Arbiter,
|
||||
)
|
||||
# from . import hilevel as hilevel
|
||||
|
|
|
|||
|
|
@ -15,15 +15,22 @@
|
|||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
This is the "bootloader" for actors started using the native trio backend.
|
||||
The "bootloader" for sub-actors spawned via the native `trio`
|
||||
backend (the default `python -m tractor._child` CLI entry) and
|
||||
the in-process `subint` backend (`tractor.spawn._subint`).
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
|
||||
from ast import literal_eval
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from ._runtime import Actor
|
||||
from ._entry import _trio_main
|
||||
from .runtime._runtime import Actor
|
||||
from .spawn._entry import _trio_main
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .discovery._addr import UnwrappedAddress
|
||||
from .spawn._spawn import SpawnMethodKey
|
||||
|
||||
|
||||
def parse_uid(arg):
|
||||
|
|
@ -39,6 +46,36 @@ def parse_ipaddr(arg):
|
|||
return arg
|
||||
|
||||
|
||||
def _actor_child_main(
|
||||
uid: tuple[str, str],
|
||||
loglevel: str | None,
|
||||
parent_addr: UnwrappedAddress | None,
|
||||
infect_asyncio: bool,
|
||||
spawn_method: SpawnMethodKey = 'trio',
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Construct the child `Actor` and dispatch to `_trio_main()`.
|
||||
|
||||
Shared entry shape used by both the `python -m tractor._child`
|
||||
CLI (trio/mp subproc backends) and the `subint` backend, which
|
||||
invokes this from inside a fresh `concurrent.interpreters`
|
||||
sub-interpreter via `Interpreter.call()`.
|
||||
|
||||
'''
|
||||
subactor = Actor(
|
||||
name=uid[0],
|
||||
uuid=uid[1],
|
||||
loglevel=loglevel,
|
||||
spawn_method=spawn_method,
|
||||
)
|
||||
_trio_main(
|
||||
subactor,
|
||||
parent_addr=parent_addr,
|
||||
infect_asyncio=infect_asyncio,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
__tracebackhide__: bool = True
|
||||
|
||||
|
|
@ -49,15 +86,10 @@ if __name__ == "__main__":
|
|||
parser.add_argument("--asyncio", action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
subactor = Actor(
|
||||
name=args.uid[0],
|
||||
uuid=args.uid[1],
|
||||
_actor_child_main(
|
||||
uid=args.uid,
|
||||
loglevel=args.loglevel,
|
||||
spawn_method="trio"
|
||||
)
|
||||
|
||||
_trio_main(
|
||||
subactor,
|
||||
parent_addr=args.parent_addr,
|
||||
infect_asyncio=args.asyncio,
|
||||
spawn_method='trio',
|
||||
)
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ async def open_actor_cluster(
|
|||
trio.open_nursery() as tn,
|
||||
tractor.trionics.maybe_raise_from_masking_exc()
|
||||
):
|
||||
uid = tractor.current_actor().uid
|
||||
uid = tractor.current_actor().aid.uid
|
||||
|
||||
async def _start(name: str) -> None:
|
||||
name = f'{uid[0]}.{name}'
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
'''
|
||||
(Hot) coad (re-)load utils for python.
|
||||
|
||||
'''
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from types import ModuleType
|
||||
|
||||
# ?TODO, move this into internal libs?
|
||||
# -[ ] we already use it in `modden.config._pymod` as well
|
||||
def load_module_from_path(
|
||||
path: Path,
|
||||
module_name: str|None = None,
|
||||
) -> ModuleType:
|
||||
'''
|
||||
Taken from SO,
|
||||
https://stackoverflow.com/a/67208147
|
||||
|
||||
which is based on stdlib docs,
|
||||
https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
|
||||
|
||||
'''
|
||||
module_name = module_name or path.stem
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
module_name,
|
||||
str(path),
|
||||
)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules[module_name] = module
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
|
@ -70,6 +70,7 @@ from ._exceptions import (
|
|||
MsgTypeError,
|
||||
RemoteActorError,
|
||||
StreamOverrun,
|
||||
TransportClosed,
|
||||
pack_from_raise,
|
||||
unpack_error,
|
||||
)
|
||||
|
|
@ -96,7 +97,7 @@ from ._streaming import (
|
|||
MsgStream,
|
||||
open_stream_from_ctx,
|
||||
)
|
||||
from ._state import (
|
||||
from .runtime._state import (
|
||||
current_actor,
|
||||
debug_mode,
|
||||
_ctxvar_Context,
|
||||
|
|
@ -106,14 +107,14 @@ from .trionics import (
|
|||
)
|
||||
# ------ - ------
|
||||
if TYPE_CHECKING:
|
||||
from ._portal import Portal
|
||||
from ._runtime import Actor
|
||||
from .runtime._portal import Portal
|
||||
from .runtime._runtime import Actor
|
||||
from .ipc._transport import MsgTransport
|
||||
from .devx._frame_stack import (
|
||||
CallerInfo,
|
||||
)
|
||||
|
||||
log = get_logger(__name__)
|
||||
log = get_logger()
|
||||
|
||||
|
||||
class Unresolved:
|
||||
|
|
@ -462,10 +463,11 @@ class Context:
|
|||
|
||||
# self._cancel_called = val
|
||||
|
||||
# TODO, use the `Actor.aid: Aid` instead!
|
||||
@property
|
||||
def canceller(self) -> tuple[str, str]|None:
|
||||
'''
|
||||
`Actor.uid: tuple[str, str]` of the (remote)
|
||||
`Actor.aid.uid: tuple[str, str]` of the (remote)
|
||||
actor-process who's task was cancelled thus causing this
|
||||
(side of the) context to also be cancelled.
|
||||
|
||||
|
|
@ -498,12 +500,12 @@ class Context:
|
|||
if from_uid := re.src_uid:
|
||||
from_uid: tuple = tuple(from_uid)
|
||||
|
||||
our_uid: tuple = self._actor.uid
|
||||
our_uid: tuple = self._actor.aid.uid
|
||||
our_canceller = self.canceller
|
||||
|
||||
return bool(
|
||||
isinstance((ctxc := re), ContextCancelled)
|
||||
and from_uid == self.chan.uid
|
||||
and from_uid == self.chan.aid.uid
|
||||
and ctxc.canceller == our_uid
|
||||
and our_canceller == our_uid
|
||||
)
|
||||
|
|
@ -514,7 +516,7 @@ class Context:
|
|||
Records whether the task on the remote side of this IPC
|
||||
context acknowledged a cancel request via a relayed
|
||||
`ContextCancelled` with the `.canceller` attr set to the
|
||||
`Actor.uid` of the local actor who's task entered
|
||||
`Actor.aid.uid` of the local actor who's task entered
|
||||
`Portal.open_context()`.
|
||||
|
||||
This will only be `True` when `.cancel()` is called and
|
||||
|
|
@ -788,8 +790,8 @@ class Context:
|
|||
# appropriately.
|
||||
log.runtime(
|
||||
'Setting remote error for ctx\n\n'
|
||||
f'<= {self.peer_side!r}: {self.chan.uid}\n'
|
||||
f'=> {self.side!r}: {self._actor.uid}\n\n'
|
||||
f'<= {self.peer_side!r}: {self.chan.aid.reprol()}\n'
|
||||
f'=> {self.side!r}: {self._actor.aid.reprol()}\n\n'
|
||||
f'{error!r}'
|
||||
)
|
||||
self._remote_error: BaseException = error
|
||||
|
|
@ -810,7 +812,7 @@ class Context:
|
|||
# cancelled.
|
||||
#
|
||||
# !TODO, switching to `Actor.aid` here!
|
||||
if (canc := error.canceller) == self._actor.uid:
|
||||
if (canc := error.canceller) == self._actor.aid.uid:
|
||||
whom: str = 'us'
|
||||
self._canceller = canc
|
||||
else:
|
||||
|
|
@ -1035,7 +1037,7 @@ class Context:
|
|||
---------
|
||||
- after the far end cancels, the `.cancel()` calling side
|
||||
should receive a `ContextCancelled` with the
|
||||
`.canceller: tuple` uid set to the current `Actor.uid`.
|
||||
`.canceller: tuple` uid set to the current `Actor.aid.uid`.
|
||||
|
||||
- timeout (quickly) on failure to rx this ACK error-msg in
|
||||
an attempt to sidestep 2-generals when the transport
|
||||
|
|
@ -1064,9 +1066,9 @@ class Context:
|
|||
)
|
||||
reminfo: str = (
|
||||
# ' =>\n'
|
||||
# f'Context.cancel() => {self.chan.uid}\n'
|
||||
# f'Context.cancel() => {self.chan.aid.uid}\n'
|
||||
f'\n'
|
||||
f'c)=> {self.chan.uid}\n'
|
||||
f'c)=> {self.chan.aid.reprol()}\n'
|
||||
f' |_[{self.dst_maddr}\n'
|
||||
f' >> {self.repr_rpc}\n'
|
||||
# f' >> {self._nsf}() -> {codec}[dict]:\n\n'
|
||||
|
|
@ -1210,7 +1212,7 @@ class Context:
|
|||
|
||||
'''
|
||||
__tracebackhide__: bool = hide_tb
|
||||
peer_uid: tuple = self.chan.uid
|
||||
peer_uid: tuple = self.chan.aid.uid
|
||||
|
||||
# XXX NOTE XXX: `ContextCancelled`/`StreamOverrun` absorption
|
||||
# for "graceful cancellation" case(s):
|
||||
|
|
@ -1227,7 +1229,7 @@ class Context:
|
|||
# (`ContextCancelled`) as an expected
|
||||
# error-msg-is-cancellation-ack IFF said
|
||||
# `remote_error: ContextCancelled` has `.canceller`
|
||||
# set to the `Actor.uid` of THIS task (i.e. the
|
||||
# set to the `Actor.aid.uid` of THIS task (i.e. the
|
||||
# cancellation requesting task's actor is the actor
|
||||
# checking whether it should absorb the ctxc).
|
||||
self_ctxc: bool = self._is_self_cancelled(remote_error)
|
||||
|
|
@ -1678,7 +1680,7 @@ class Context:
|
|||
|
||||
elif self._started_called:
|
||||
raise RuntimeError(
|
||||
f'called `.started()` twice on context with {self.chan.uid}'
|
||||
f'called `.started()` twice on context with {self.chan.aid.uid}'
|
||||
)
|
||||
|
||||
started_msg = Started(
|
||||
|
|
@ -1811,7 +1813,7 @@ class Context:
|
|||
'''
|
||||
cid: str = self.cid
|
||||
chan: Channel = self.chan
|
||||
from_uid: tuple[str, str] = chan.uid
|
||||
from_uid: tuple[str, str] = chan.aid.uid
|
||||
send_chan: trio.MemorySendChannel = self._send_chan
|
||||
nsf: NamespacePath = self._nsf
|
||||
|
||||
|
|
@ -1952,20 +1954,22 @@ class Context:
|
|||
# overrun state and that msg isn't stuck in an
|
||||
# overflow queue what happens?!?
|
||||
|
||||
local_uid = self._actor.uid
|
||||
local_aid = self._actor.aid
|
||||
txt: str = (
|
||||
'on IPC context:\n'
|
||||
|
||||
f'<= sender: {from_uid}\n'
|
||||
f' |_ {self._nsf}()\n\n'
|
||||
|
||||
f'=> overrun: {local_uid}\n'
|
||||
f'=> overrun: {local_aid.reprol()!r}\n'
|
||||
f' |_cid: {cid}\n'
|
||||
f' |_task: {self._task}\n'
|
||||
)
|
||||
if not self._stream_opened:
|
||||
txt += (
|
||||
f'\n*** No stream open on `{local_uid[0]}` side! ***\n\n'
|
||||
f'\n'
|
||||
f'*** No stream open on `{local_aid.name}` side! ***\n'
|
||||
f'\n'
|
||||
f'{msg}\n'
|
||||
)
|
||||
|
||||
|
|
@ -2114,7 +2118,11 @@ async def open_context_from_portal(
|
|||
# XXX NOTE XXX: currenly we do NOT allow opening a contex
|
||||
# with "self" since the local feeder mem-chan processing
|
||||
# is not built for it.
|
||||
if (uid := portal.channel.uid) == portal.actor.uid:
|
||||
if (
|
||||
(uid := portal.channel.aid.uid)
|
||||
==
|
||||
portal.actor.aid.uid
|
||||
):
|
||||
raise RuntimeError(
|
||||
'** !! Invalid Operation !! **\n'
|
||||
'Can not open an IPC ctx with the local actor!\n'
|
||||
|
|
@ -2328,7 +2336,7 @@ async def open_context_from_portal(
|
|||
and
|
||||
ctxc is ctx._remote_error
|
||||
and
|
||||
ctxc.canceller == portal.actor.uid
|
||||
ctxc.canceller == portal.actor.aid.uid
|
||||
):
|
||||
log.cancel(
|
||||
f'Context (cid=[{ctx.cid[-6:]}..] cancelled gracefully with:\n'
|
||||
|
|
@ -2391,19 +2399,21 @@ async def open_context_from_portal(
|
|||
case trio.Cancelled():
|
||||
logmeth = log.cancel
|
||||
cause: str = 'cancelled'
|
||||
msg: str = (
|
||||
f'ctx {ctx.side!r}-side {cause!r} with,\n'
|
||||
f'{ctx.repr_outcome()!r}\n'
|
||||
)
|
||||
|
||||
# XXX explicitly report on any non-graceful-taskc cases
|
||||
case _:
|
||||
cause: str = 'errored'
|
||||
logmeth = log.exception
|
||||
msg: str = f'ctx {ctx.side!r}-side {cause!r} with,\n'
|
||||
|
||||
logmeth(
|
||||
f'ctx {ctx.side!r}-side {cause!r} with,\n'
|
||||
f'{ctx.repr_outcome()!r}\n'
|
||||
)
|
||||
logmeth(msg)
|
||||
|
||||
if debug_mode():
|
||||
# async with debug.acquire_debug_lock(portal.actor.uid):
|
||||
# async with debug.acquire_debug_lock(portal.actor.aid.uid):
|
||||
# pass
|
||||
# TODO: factor ^ into below for non-root cases?
|
||||
#
|
||||
|
|
@ -2426,10 +2436,7 @@ async def open_context_from_portal(
|
|||
try:
|
||||
# await pause(shield=True)
|
||||
await ctx.cancel()
|
||||
except (
|
||||
trio.BrokenResourceError,
|
||||
trio.ClosedResourceError,
|
||||
):
|
||||
except TransportClosed:
|
||||
log.warning(
|
||||
'IPC connection for context is broken?\n'
|
||||
f'task: {ctx.cid}\n'
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ from msgspec import (
|
|||
ValidationError,
|
||||
)
|
||||
|
||||
from tractor._state import current_actor
|
||||
from tractor.runtime._state import current_actor
|
||||
from tractor.log import get_logger
|
||||
from tractor.msg import (
|
||||
Error,
|
||||
|
|
@ -187,7 +187,31 @@ _body_fields: list[str] = list(
|
|||
)
|
||||
|
||||
|
||||
def get_err_type(type_name: str) -> BaseException|None:
|
||||
def reg_err_types(
|
||||
exc_types: list[Type[Exception]],
|
||||
) -> None:
|
||||
'''
|
||||
Register custom exception types for local lookup.
|
||||
|
||||
Such that error types can be registered by an external
|
||||
`tractor`-use-app code base which are expected to be raised
|
||||
remotely; enables them being re-raised on the receiver side of
|
||||
some inter-actor IPC dialog.
|
||||
|
||||
'''
|
||||
for exc_type in exc_types:
|
||||
log.debug(
|
||||
f'Register custom exception,\n'
|
||||
f'{exc_type!r}\n'
|
||||
)
|
||||
setattr(
|
||||
_this_mod,
|
||||
exc_type.__name__,
|
||||
exc_type,
|
||||
)
|
||||
|
||||
|
||||
def get_err_type(type_name: str) -> Type[BaseException]|None:
|
||||
'''
|
||||
Look up an exception type by name from the set of locally known
|
||||
namespaces:
|
||||
|
|
@ -301,7 +325,8 @@ class RemoteActorError(Exception):
|
|||
# also pertains to our long long oustanding issue XD
|
||||
# https://github.com/goodboy/tractor/issues/5
|
||||
self._boxed_type: BaseException = boxed_type
|
||||
self._src_type: BaseException|None = None
|
||||
self._src_type: Type[BaseException]|None = None
|
||||
self._src_type_resolved: bool = False
|
||||
self._ipc_msg: Error|None = ipc_msg
|
||||
self._extra_msgdata = extra_msgdata
|
||||
|
||||
|
|
@ -410,24 +435,41 @@ class RemoteActorError(Exception):
|
|||
return self._ipc_msg.src_type_str
|
||||
|
||||
@property
|
||||
def src_type(self) -> str:
|
||||
def src_type(self) -> Type[BaseException]|None:
|
||||
'''
|
||||
Error type raised by original remote faulting actor.
|
||||
Error type raised by original remote faulting
|
||||
actor.
|
||||
|
||||
When the error has only been relayed a single actor-hop
|
||||
this will be the same as the `.boxed_type`.
|
||||
When the error has only been relayed a single
|
||||
actor-hop this will be the same as
|
||||
`.boxed_type`.
|
||||
|
||||
If the type can not be resolved locally (i.e.
|
||||
it was not registered via `reg_err_types()`)
|
||||
a warning is logged and `None` is returned;
|
||||
all string-level error info (`.src_type_str`,
|
||||
`.tb_str`, etc.) remains available.
|
||||
|
||||
'''
|
||||
if self._src_type is None:
|
||||
if not self._src_type_resolved:
|
||||
self._src_type_resolved = True
|
||||
|
||||
if self._ipc_msg is None:
|
||||
return None
|
||||
|
||||
self._src_type = get_err_type(
|
||||
self._ipc_msg.src_type_str
|
||||
)
|
||||
|
||||
if not self._src_type:
|
||||
raise TypeError(
|
||||
f'Failed to lookup src error type with '
|
||||
f'`tractor._exceptions.get_err_type()` :\n'
|
||||
f'{self.src_type_str}'
|
||||
log.warning(
|
||||
f'Failed to lookup src error type via\n'
|
||||
f'`tractor._exceptions.get_err_type()`:\n'
|
||||
f'\n'
|
||||
f'`{self._ipc_msg.src_type_str}`'
|
||||
f' is not registered!\n'
|
||||
f'\n'
|
||||
f'Call `reg_err_types()` to enable'
|
||||
f' full type reconstruction.\n'
|
||||
)
|
||||
|
||||
return self._src_type
|
||||
|
|
@ -435,20 +477,30 @@ class RemoteActorError(Exception):
|
|||
@property
|
||||
def boxed_type_str(self) -> str:
|
||||
'''
|
||||
String-name of the (last hop's) boxed error type.
|
||||
String-name of the (last hop's) boxed error
|
||||
type.
|
||||
|
||||
Falls back to the IPC-msg-encoded type-name
|
||||
str when the type can not be resolved locally
|
||||
(e.g. unregistered custom errors).
|
||||
|
||||
'''
|
||||
# TODO, maybe support also serializing the
|
||||
# `ExceptionGroup.exeptions: list[BaseException]` set under
|
||||
# certain conditions?
|
||||
# `ExceptionGroup.exceptions: list[BaseException]`
|
||||
# set under certain conditions?
|
||||
bt: Type[BaseException] = self.boxed_type
|
||||
if bt:
|
||||
return str(bt.__name__)
|
||||
|
||||
return ''
|
||||
# fallback to the str name from the IPC msg
|
||||
# when the type obj can't be resolved.
|
||||
if self._ipc_msg:
|
||||
return self._ipc_msg.boxed_type_str
|
||||
|
||||
return '<unknown>'
|
||||
|
||||
@property
|
||||
def boxed_type(self) -> Type[BaseException]:
|
||||
def boxed_type(self) -> Type[BaseException]|None:
|
||||
'''
|
||||
Error type boxed by last actor IPC hop.
|
||||
|
||||
|
|
@ -677,10 +729,22 @@ class RemoteActorError(Exception):
|
|||
failing actor's remote env.
|
||||
|
||||
'''
|
||||
# TODO: better tb insertion and all the fancier dunder
|
||||
# metadata stuff as per `.__context__` etc. and friends:
|
||||
# TODO: better tb insertion and all the fancier
|
||||
# dunder metadata stuff as per `.__context__`
|
||||
# etc. and friends:
|
||||
# https://github.com/python-trio/trio/issues/611
|
||||
src_type_ref: Type[BaseException] = self.src_type
|
||||
src_type_ref: Type[BaseException]|None = (
|
||||
self.src_type
|
||||
)
|
||||
if src_type_ref is None:
|
||||
# unresolvable type: fall back to
|
||||
# a `RuntimeError` preserving original
|
||||
# traceback + type name.
|
||||
return RuntimeError(
|
||||
f'{self.src_type_str}: '
|
||||
f'{self.tb_str}'
|
||||
)
|
||||
|
||||
return src_type_ref(self.tb_str)
|
||||
|
||||
# TODO: local recontruction of nested inception for a given
|
||||
|
|
@ -982,6 +1046,7 @@ class TransportClosed(Exception):
|
|||
'''
|
||||
__tracebackhide__: bool = hide_tb
|
||||
message: str = message or self.message
|
||||
|
||||
# when a cause is set, slap it onto the log emission.
|
||||
if cause := self.src_exc:
|
||||
cause_tb_str: str = ''.join(
|
||||
|
|
@ -989,7 +1054,7 @@ class TransportClosed(Exception):
|
|||
)
|
||||
message += (
|
||||
f'{cause_tb_str}\n' # tb
|
||||
f' {cause}\n' # exc repr
|
||||
f'{cause!r}\n' # exc repr
|
||||
)
|
||||
|
||||
getattr(
|
||||
|
|
@ -1208,14 +1273,31 @@ def unpack_error(
|
|||
if not isinstance(msg, Error):
|
||||
return None
|
||||
|
||||
# try to lookup a suitable error type from the local runtime
|
||||
# env then use it to construct a local instance.
|
||||
# boxed_type_str: str = error_dict['boxed_type_str']
|
||||
# try to lookup a suitable error type from the
|
||||
# local runtime env then use it to construct a
|
||||
# local instance.
|
||||
boxed_type_str: str = msg.boxed_type_str
|
||||
boxed_type: Type[BaseException] = get_err_type(boxed_type_str)
|
||||
boxed_type: Type[BaseException]|None = get_err_type(
|
||||
boxed_type_str
|
||||
)
|
||||
|
||||
# retrieve the error's msg-encoded remotoe-env info
|
||||
message: str = f'remote task raised a {msg.boxed_type_str!r}\n'
|
||||
if boxed_type is None:
|
||||
log.warning(
|
||||
f'Failed to resolve remote error type\n'
|
||||
f'`{boxed_type_str}` - boxing as\n'
|
||||
f'`RemoteActorError` with original\n'
|
||||
f'traceback preserved.\n'
|
||||
f'\n'
|
||||
f'Call `reg_err_types()` to enable\n'
|
||||
f'full type reconstruction.\n'
|
||||
)
|
||||
|
||||
# retrieve the error's msg-encoded remote-env
|
||||
# info
|
||||
message: str = (
|
||||
f'remote task raised a '
|
||||
f'{msg.boxed_type_str!r}\n'
|
||||
)
|
||||
|
||||
# TODO: do we even really need these checks for RAEs?
|
||||
if boxed_type_str in [
|
||||
|
|
|
|||
|
|
@ -1,116 +0,0 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
Helpers pulled mostly verbatim from ``multiprocessing.spawn``
|
||||
to aid with "fixing up" the ``__main__`` module in subprocesses.
|
||||
|
||||
These helpers are needed for any spawing backend that doesn't already
|
||||
handle this. For example when using ``trio_run_in_process`` it is needed
|
||||
but obviously not when we're already using ``multiprocessing``.
|
||||
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import platform
|
||||
import types
|
||||
import runpy
|
||||
|
||||
|
||||
ORIGINAL_DIR = os.path.abspath(os.getcwd())
|
||||
|
||||
|
||||
def _mp_figure_out_main() -> dict[str, str]:
|
||||
"""Taken from ``multiprocessing.spawn.get_preparation_data()``.
|
||||
|
||||
Retrieve parent actor `__main__` module data.
|
||||
"""
|
||||
d = {}
|
||||
# Figure out whether to initialise main in the subprocess as a module
|
||||
# or through direct execution (or to leave it alone entirely)
|
||||
main_module = sys.modules['__main__']
|
||||
main_mod_name = getattr(main_module.__spec__, "name", None)
|
||||
if main_mod_name is not None:
|
||||
d['init_main_from_name'] = main_mod_name
|
||||
# elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
|
||||
elif platform.system() != 'Windows':
|
||||
main_path = getattr(main_module, '__file__', None)
|
||||
if main_path is not None:
|
||||
if (
|
||||
not os.path.isabs(main_path) and (
|
||||
ORIGINAL_DIR is not None)
|
||||
):
|
||||
# process.ORIGINAL_DIR is not None):
|
||||
# main_path = os.path.join(process.ORIGINAL_DIR, main_path)
|
||||
main_path = os.path.join(ORIGINAL_DIR, main_path)
|
||||
d['init_main_from_path'] = os.path.normpath(main_path)
|
||||
|
||||
return d
|
||||
|
||||
|
||||
# Multiprocessing module helpers to fix up the main module in
|
||||
# spawned subprocesses
|
||||
def _fixup_main_from_name(mod_name: str) -> None:
|
||||
# __main__.py files for packages, directories, zip archives, etc, run
|
||||
# their "main only" code unconditionally, so we don't even try to
|
||||
# populate anything in __main__, nor do we make any changes to
|
||||
# __main__ attributes
|
||||
current_main = sys.modules['__main__']
|
||||
if mod_name == "__main__" or mod_name.endswith(".__main__"):
|
||||
return
|
||||
|
||||
# If this process was forked, __main__ may already be populated
|
||||
if getattr(current_main.__spec__, "name", None) == mod_name:
|
||||
return
|
||||
|
||||
# Otherwise, __main__ may contain some non-main code where we need to
|
||||
# support unpickling it properly. We rerun it as __mp_main__ and make
|
||||
# the normal __main__ an alias to that
|
||||
# old_main_modules.append(current_main)
|
||||
main_module = types.ModuleType("__mp_main__")
|
||||
main_content = runpy.run_module(mod_name,
|
||||
run_name="__mp_main__",
|
||||
alter_sys=True) # type: ignore
|
||||
main_module.__dict__.update(main_content)
|
||||
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
||||
|
||||
|
||||
def _fixup_main_from_path(main_path: str) -> None:
|
||||
# If this process was forked, __main__ may already be populated
|
||||
current_main = sys.modules['__main__']
|
||||
|
||||
# Unfortunately, the main ipython launch script historically had no
|
||||
# "if __name__ == '__main__'" guard, so we work around that
|
||||
# by treating it like a __main__.py file
|
||||
# See https://github.com/ipython/ipython/issues/4698
|
||||
main_name = os.path.splitext(os.path.basename(main_path))[0]
|
||||
if main_name == 'ipython':
|
||||
return
|
||||
|
||||
# Otherwise, if __file__ already has the setting we expect,
|
||||
# there's nothing more to do
|
||||
if getattr(current_main, '__file__', None) == main_path:
|
||||
return
|
||||
|
||||
# If the parent process has sent a path through rather than a module
|
||||
# name we assume it is an executable script that may contain
|
||||
# non-main code that needs to be executed
|
||||
# old_main_modules.append(current_main)
|
||||
main_module = types.ModuleType("__mp_main__")
|
||||
main_content = runpy.run_path(main_path,
|
||||
run_name="__mp_main__") # type: ignore
|
||||
main_module.__dict__.update(main_content)
|
||||
sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
|
||||
|
|
@ -1,151 +0,0 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
'''
|
||||
Multiaddress parser and utils according the spec(s) defined by
|
||||
`libp2p` and used in dependent project such as `ipfs`:
|
||||
|
||||
- https://docs.libp2p.io/concepts/fundamentals/addressing/
|
||||
- https://github.com/libp2p/specs/blob/master/addressing/README.md
|
||||
|
||||
'''
|
||||
from typing import Iterator
|
||||
|
||||
from bidict import bidict
|
||||
|
||||
# TODO: see if we can leverage libp2p ecosys projects instead of
|
||||
# rolling our own (parser) impls of the above addressing specs:
|
||||
# - https://github.com/libp2p/py-libp2p
|
||||
# - https://docs.libp2p.io/concepts/nat/circuit-relay/#relay-addresses
|
||||
# prots: bidict[int, str] = bidict({
|
||||
prots: bidict[int, str] = {
|
||||
'ipv4': 3,
|
||||
'ipv6': 3,
|
||||
'wg': 3,
|
||||
|
||||
'tcp': 4,
|
||||
'udp': 4,
|
||||
|
||||
# TODO: support the next-gen shite Bo
|
||||
# 'quic': 4,
|
||||
# 'ssh': 7, # via rsyscall bootstrapping
|
||||
}
|
||||
|
||||
prot_params: dict[str, tuple[str]] = {
|
||||
'ipv4': ('addr',),
|
||||
'ipv6': ('addr',),
|
||||
'wg': ('addr', 'port', 'pubkey'),
|
||||
|
||||
'tcp': ('port',),
|
||||
'udp': ('port',),
|
||||
|
||||
# 'quic': ('port',),
|
||||
# 'ssh': ('port',),
|
||||
}
|
||||
|
||||
|
||||
def iter_prot_layers(
|
||||
multiaddr: str,
|
||||
) -> Iterator[
|
||||
tuple[
|
||||
int,
|
||||
list[str]
|
||||
]
|
||||
]:
|
||||
'''
|
||||
Unpack a libp2p style "multiaddress" into multiple "segments"
|
||||
for each "layer" of the protocoll stack (in OSI terms).
|
||||
|
||||
'''
|
||||
tokens: list[str] = multiaddr.split('/')
|
||||
root, tokens = tokens[0], tokens[1:]
|
||||
assert not root # there is a root '/' on LHS
|
||||
itokens = iter(tokens)
|
||||
|
||||
prot: str | None = None
|
||||
params: list[str] = []
|
||||
for token in itokens:
|
||||
# every prot path should start with a known
|
||||
# key-str.
|
||||
if token in prots:
|
||||
if prot is None:
|
||||
prot: str = token
|
||||
else:
|
||||
yield prot, params
|
||||
prot = token
|
||||
|
||||
params = []
|
||||
|
||||
elif token not in prots:
|
||||
params.append(token)
|
||||
|
||||
else:
|
||||
yield prot, params
|
||||
|
||||
|
||||
def parse_maddr(
|
||||
multiaddr: str,
|
||||
) -> dict[str, str | int | dict]:
|
||||
'''
|
||||
Parse a libp2p style "multiaddress" into its distinct protocol
|
||||
segments where each segment is of the form:
|
||||
|
||||
`../<protocol>/<param0>/<param1>/../<paramN>`
|
||||
|
||||
and is loaded into a (order preserving) `layers: dict[str,
|
||||
dict[str, Any]` which holds each protocol-layer-segment of the
|
||||
original `str` path as a separate entry according to its approx
|
||||
OSI "layer number".
|
||||
|
||||
Any `paramN` in the path must be distinctly defined by a str-token in the
|
||||
(module global) `prot_params` table.
|
||||
|
||||
For eg. for wireguard which requires an address, port number and publickey
|
||||
the protocol params are specified as the entry:
|
||||
|
||||
'wg': ('addr', 'port', 'pubkey'),
|
||||
|
||||
and are thus parsed from a maddr in that order:
|
||||
`'/wg/1.1.1.1/51820/<pubkey>'`
|
||||
|
||||
'''
|
||||
layers: dict[str, str | int | dict] = {}
|
||||
for (
|
||||
prot_key,
|
||||
params,
|
||||
) in iter_prot_layers(multiaddr):
|
||||
|
||||
layer: int = prots[prot_key] # OSI layer used for sorting
|
||||
ep: dict[str, int | str] = {'layer': layer}
|
||||
layers[prot_key] = ep
|
||||
|
||||
# TODO; validation and resolving of names:
|
||||
# - each param via a validator provided as part of the
|
||||
# prot_params def? (also see `"port"` case below..)
|
||||
# - do a resolv step that will check addrs against
|
||||
# any loaded network.resolv: dict[str, str]
|
||||
rparams: list = list(reversed(params))
|
||||
for key in prot_params[prot_key]:
|
||||
val: str | int = rparams.pop()
|
||||
|
||||
# TODO: UGHH, dunno what we should do for validation
|
||||
# here, put it in the params spec somehow?
|
||||
if key == 'port':
|
||||
val = int(val)
|
||||
|
||||
ep[key] = val
|
||||
|
||||
return layers
|
||||
147
tractor/_root.py
147
tractor/_root.py
|
|
@ -37,19 +37,20 @@ import warnings
|
|||
|
||||
import trio
|
||||
|
||||
from . import _runtime
|
||||
from .runtime import _runtime
|
||||
from .discovery._registry import Registrar
|
||||
from .devx import (
|
||||
debug,
|
||||
_frame_stack,
|
||||
pformat as _pformat,
|
||||
)
|
||||
from . import _spawn
|
||||
from . import _state
|
||||
from .spawn import _spawn
|
||||
from .runtime import _state
|
||||
from . import log
|
||||
from .ipc import (
|
||||
_connect_chan,
|
||||
)
|
||||
from ._addr import (
|
||||
from .discovery._addr import (
|
||||
Address,
|
||||
UnwrappedAddress,
|
||||
default_lo_addrs,
|
||||
|
|
@ -88,7 +89,8 @@ async def maybe_block_bp(
|
|||
bp_blocked: bool
|
||||
if (
|
||||
debug_mode
|
||||
and maybe_enable_greenback
|
||||
and
|
||||
maybe_enable_greenback
|
||||
and (
|
||||
maybe_mod := await debug.maybe_init_greenback(
|
||||
raise_not_found=False,
|
||||
|
|
@ -143,11 +145,16 @@ async def maybe_block_bp(
|
|||
@acm
|
||||
async def open_root_actor(
|
||||
*,
|
||||
# defaults are above
|
||||
registry_addrs: list[UnwrappedAddress]|None = None,
|
||||
tpt_bind_addrs: list[
|
||||
Address # `Address.get_random()` case
|
||||
|UnwrappedAddress # registrar case `= uw_reg_addrs`
|
||||
]|None = None,
|
||||
|
||||
# defaults are above
|
||||
arbiter_addr: tuple[UnwrappedAddress]|None = None,
|
||||
registry_addrs: list[
|
||||
Address
|
||||
|UnwrappedAddress
|
||||
]|None = None,
|
||||
|
||||
enable_transports: list[
|
||||
# TODO, this should eventually be the pairs as
|
||||
|
|
@ -266,16 +273,7 @@ async def open_root_actor(
|
|||
if start_method is not None:
|
||||
_spawn.try_set_start_method(start_method)
|
||||
|
||||
# TODO! remove this ASAP!
|
||||
if arbiter_addr is not None:
|
||||
warnings.warn(
|
||||
'`arbiter_addr` is now deprecated\n'
|
||||
'Use `registry_addrs: list[tuple]` instead..',
|
||||
DeprecationWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
uw_reg_addrs = [arbiter_addr]
|
||||
|
||||
# XXX expect pre-unwrapped registrar addrs.
|
||||
uw_reg_addrs = registry_addrs
|
||||
if not uw_reg_addrs:
|
||||
uw_reg_addrs: list[UnwrappedAddress] = default_lo_addrs(
|
||||
|
|
@ -288,11 +286,12 @@ async def open_root_actor(
|
|||
wrap_address(uw_addr)
|
||||
for uw_addr in uw_reg_addrs
|
||||
]
|
||||
|
||||
loglevel = (
|
||||
loglevel: str = (
|
||||
loglevel
|
||||
or log._default_loglevel
|
||||
).upper()
|
||||
or
|
||||
log._default_loglevel
|
||||
)
|
||||
loglevel: str = loglevel.upper()
|
||||
|
||||
if (
|
||||
debug_mode
|
||||
|
|
@ -323,7 +322,10 @@ async def open_root_actor(
|
|||
)
|
||||
|
||||
assert loglevel
|
||||
_log = log.get_console_log(loglevel)
|
||||
_log = log.get_console_log(
|
||||
level=loglevel,
|
||||
name='tractor',
|
||||
)
|
||||
assert _log
|
||||
|
||||
# TODO: factor this into `.devx._stackscope`!!
|
||||
|
|
@ -380,10 +382,17 @@ async def open_root_actor(
|
|||
addr,
|
||||
)
|
||||
|
||||
trans_bind_addrs: list[UnwrappedAddress] = []
|
||||
if tpt_bind_addrs is None:
|
||||
tpt_bind_addrs: list[Address] = []
|
||||
else:
|
||||
input_bind_addrs = list(tpt_bind_addrs)
|
||||
tpt_bind_addrs: list[Address] = []
|
||||
for addr in input_bind_addrs:
|
||||
addr: Address = wrap_address(addr)
|
||||
tpt_bind_addrs.append(addr)
|
||||
|
||||
# Create a new local root-actor instance which IS NOT THE
|
||||
# REGISTRAR
|
||||
# ------ NON-REGISTRAR ------
|
||||
# create a new root-actor instance.
|
||||
if ponged_addrs:
|
||||
if ensure_registry:
|
||||
raise RuntimeError(
|
||||
|
|
@ -391,7 +400,7 @@ async def open_root_actor(
|
|||
'registry socket(s) already bound'
|
||||
)
|
||||
|
||||
# we were able to connect to an arbiter
|
||||
# we were able to connect to a registrar
|
||||
logger.info(
|
||||
f'Registry(s) seem(s) to exist @ {ponged_addrs}'
|
||||
)
|
||||
|
|
@ -408,14 +417,26 @@ async def open_root_actor(
|
|||
# a new NON-registrar, ROOT-actor.
|
||||
#
|
||||
# XXX INSTEAD, bind random addrs using the same tpt
|
||||
# proto.
|
||||
for addr in ponged_addrs:
|
||||
trans_bind_addrs.append(
|
||||
addr.get_random(
|
||||
bindspace=addr.bindspace,
|
||||
# proto if not already provided.
|
||||
if not tpt_bind_addrs:
|
||||
for addr in ponged_addrs:
|
||||
tpt_bind_addrs.append(
|
||||
# XXX, these are `Address` NOT `UnwrappedAddress`.
|
||||
#
|
||||
# NOTE, in the case of posix/berkley socket
|
||||
# protos we allocate port=0 such that the system
|
||||
# allocates a random value at bind time; this
|
||||
# happens in the `.ipc.*` stack's backend.
|
||||
addr.get_random(
|
||||
bindspace=addr.bindspace,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
header: str = '-> Contacting existing registry @ '
|
||||
|
||||
# ------ REGISTRAR ------
|
||||
# create a new "registry providing" root-actor instance.
|
||||
#
|
||||
# Start this local actor as the "registrar", aka a regular
|
||||
# actor who manages the local registry of "mailboxes" of
|
||||
# other process-tree-local sub-actors.
|
||||
|
|
@ -424,7 +445,11 @@ async def open_root_actor(
|
|||
# following init steps are taken:
|
||||
# - the tranport layer server is bound to each addr
|
||||
# pair defined in provided registry_addrs, or the default.
|
||||
trans_bind_addrs = uw_reg_addrs
|
||||
tpt_bind_addrs = list(set(
|
||||
tpt_bind_addrs
|
||||
+
|
||||
[wrap_address(a) for a in uw_reg_addrs]
|
||||
))
|
||||
|
||||
# - it is normally desirable for any registrar to stay up
|
||||
# indefinitely until either all registered (child/sub)
|
||||
|
|
@ -435,8 +460,7 @@ async def open_root_actor(
|
|||
# https://github.com/goodboy/tractor/pull/348
|
||||
# https://github.com/goodboy/tractor/issues/296
|
||||
|
||||
# TODO: rename as `RootActor` or is that even necessary?
|
||||
actor = _runtime.Arbiter(
|
||||
actor = Registrar(
|
||||
name=name or 'registrar',
|
||||
uuid=mk_uuid(),
|
||||
registry_addrs=registry_addrs,
|
||||
|
|
@ -444,19 +468,10 @@ async def open_root_actor(
|
|||
enable_modules=enable_modules,
|
||||
)
|
||||
# XXX, in case the root actor runtime was actually run from
|
||||
# `tractor.to_asyncio.run_as_asyncio_guest()` and NOt
|
||||
# `tractor.to_asyncio.run_as_asyncio_guest()` and NOT
|
||||
# `.trio.run()`.
|
||||
actor._infected_aio = _state._runtime_vars['_is_infected_aio']
|
||||
|
||||
# NOTE, only set the loopback addr for the
|
||||
# process-tree-global "root" mailbox since all sub-actors
|
||||
# should be able to speak to their root actor over that
|
||||
# channel.
|
||||
raddrs: list[Address] = _state._runtime_vars['_root_addrs']
|
||||
raddrs.extend(trans_bind_addrs)
|
||||
# TODO, remove once we have also removed all usage;
|
||||
# eventually all (root-)registry apis should expect > 1 addr.
|
||||
_state._runtime_vars['_root_mailbox'] = raddrs[0]
|
||||
header: str = '-> Opening new registry @ '
|
||||
|
||||
# Start up main task set via core actor-runtime nurseries.
|
||||
try:
|
||||
|
|
@ -468,7 +483,7 @@ async def open_root_actor(
|
|||
report: str = f'Starting actor-runtime for {actor.aid.reprol()!r}\n'
|
||||
if reg_addrs := actor.registry_addrs:
|
||||
report += (
|
||||
'-> Opening new registry @ '
|
||||
header
|
||||
+
|
||||
'\n'.join(
|
||||
f'{addr}' for addr in reg_addrs
|
||||
|
|
@ -494,14 +509,39 @@ async def open_root_actor(
|
|||
# "actor runtime" primitives are SC-compat and thus all
|
||||
# transitively spawned actors/processes must be as
|
||||
# well.
|
||||
await root_tn.start(
|
||||
accept_addrs: list[UnwrappedAddress]
|
||||
reg_addrs: list[UnwrappedAddress]
|
||||
(
|
||||
accept_addrs,
|
||||
reg_addrs,
|
||||
) = await root_tn.start(
|
||||
partial(
|
||||
_runtime.async_main,
|
||||
actor,
|
||||
accept_addrs=trans_bind_addrs,
|
||||
accept_addrs=tpt_bind_addrs,
|
||||
parent_addr=None
|
||||
)
|
||||
)
|
||||
# NOTE, only set a local-host addr (i.e. like
|
||||
# `lo`-loopback for TCP) for the process-tree-global
|
||||
# "root"-process (its tree-wide "mailbox") since all
|
||||
# sub-actors should be able to speak to their root
|
||||
# actor over that channel.
|
||||
#
|
||||
# ?TODO, per-OS non-network-proto alt options?
|
||||
# -[ ] on linux we should be able to always use UDS?
|
||||
#
|
||||
raddrs: list[UnwrappedAddress] = _state._runtime_vars['_root_addrs']
|
||||
raddrs.extend(
|
||||
accept_addrs,
|
||||
)
|
||||
# TODO, remove once we have also removed all usage;
|
||||
# eventually all (root-)registry apis should expect > 1 addr.
|
||||
_state._runtime_vars['_root_mailbox'] = raddrs[0]
|
||||
# if 'chart' in actor.aid.name:
|
||||
# from tractor.devx import mk_pdb
|
||||
# mk_pdb().set_trace()
|
||||
|
||||
try:
|
||||
yield actor
|
||||
except (
|
||||
|
|
@ -583,6 +623,13 @@ async def open_root_actor(
|
|||
):
|
||||
_state._runtime_vars['_debug_mode'] = False
|
||||
|
||||
# !XXX, clear ALL prior contact info state, this is MEGA
|
||||
# important if you are opening the runtime multiple times
|
||||
# from the same parent process (like in our test
|
||||
# harness)!
|
||||
_state._runtime_vars['_root_addrs'].clear()
|
||||
_state._runtime_vars['_root_mailbox'] = None
|
||||
|
||||
_state._current_actor = None
|
||||
_state._last_actor_terminated = actor
|
||||
|
||||
|
|
|
|||
|
|
@ -1,843 +0,0 @@
|
|||
# tractor: structured concurrent "actors".
|
||||
# Copyright 2018-eternity Tyler Goodlet.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
"""
|
||||
Machinery for actor process spawning using multiple backends.
|
||||
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import multiprocessing as mp
|
||||
import sys
|
||||
import platform
|
||||
from typing import (
|
||||
Any,
|
||||
Awaitable,
|
||||
Literal,
|
||||
Callable,
|
||||
TypeVar,
|
||||
TYPE_CHECKING,
|
||||
)
|
||||
|
||||
import trio
|
||||
from trio import TaskStatus
|
||||
|
||||
from .devx import (
|
||||
debug,
|
||||
pformat as _pformat
|
||||
)
|
||||
from tractor._state import (
|
||||
current_actor,
|
||||
is_main_process,
|
||||
is_root_process,
|
||||
debug_mode,
|
||||
_runtime_vars,
|
||||
)
|
||||
from tractor.log import get_logger
|
||||
from tractor._addr import UnwrappedAddress
|
||||
from tractor._portal import Portal
|
||||
from tractor._runtime import Actor
|
||||
from tractor._entry import _mp_main
|
||||
from tractor._exceptions import ActorFailure
|
||||
from tractor.msg import (
|
||||
types as msgtypes,
|
||||
pretty_struct,
|
||||
)
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ipc import (
|
||||
_server,
|
||||
Channel,
|
||||
)
|
||||
from ._supervise import ActorNursery
|
||||
ProcessType = TypeVar('ProcessType', mp.Process, trio.Process)
|
||||
|
||||
|
||||
log = get_logger('tractor')
|
||||
|
||||
# placeholder for an mp start context if so using that backend
|
||||
_ctx: mp.context.BaseContext | None = None
|
||||
SpawnMethodKey = Literal[
|
||||
'trio', # supported on all platforms
|
||||
'mp_spawn',
|
||||
'mp_forkserver', # posix only
|
||||
]
|
||||
_spawn_method: SpawnMethodKey = 'trio'
|
||||
|
||||
|
||||
if platform.system() == 'Windows':
|
||||
|
||||
_ctx = mp.get_context("spawn")
|
||||
|
||||
async def proc_waiter(proc: mp.Process) -> None:
|
||||
await trio.lowlevel.WaitForSingleObject(proc.sentinel)
|
||||
else:
|
||||
# *NIX systems use ``trio`` primitives as our default as well
|
||||
|
||||
async def proc_waiter(proc: mp.Process) -> None:
|
||||
await trio.lowlevel.wait_readable(proc.sentinel)
|
||||
|
||||
|
||||
def try_set_start_method(
|
||||
key: SpawnMethodKey
|
||||
|
||||
) -> mp.context.BaseContext | None:
|
||||
'''
|
||||
Attempt to set the method for process starting, aka the "actor
|
||||
spawning backend".
|
||||
|
||||
If the desired method is not supported this function will error.
|
||||
On Windows only the ``multiprocessing`` "spawn" method is offered
|
||||
besides the default ``trio`` which uses async wrapping around
|
||||
``subprocess.Popen``.
|
||||
|
||||
'''
|
||||
import multiprocessing as mp
|
||||
global _ctx
|
||||
global _spawn_method
|
||||
|
||||
mp_methods = mp.get_all_start_methods()
|
||||
if 'fork' in mp_methods:
|
||||
# forking is incompatible with ``trio``s global task tree
|
||||
mp_methods.remove('fork')
|
||||
|
||||
match key:
|
||||
case 'mp_forkserver':
|
||||
from . import _forkserver_override
|
||||
_forkserver_override.override_stdlib()
|
||||
_ctx = mp.get_context('forkserver')
|
||||
|
||||
case 'mp_spawn':
|
||||
_ctx = mp.get_context('spawn')
|
||||
|
||||
case 'trio':
|
||||
_ctx = None
|
||||
|
||||
case _:
|
||||
raise ValueError(
|
||||
f'Spawn method `{key}` is invalid!\n'
|
||||
f'Please choose one of {SpawnMethodKey}'
|
||||
)
|
||||
|
||||
_spawn_method = key
|
||||
return _ctx
|
||||
|
||||
|
||||
async def exhaust_portal(
|
||||
|
||||
portal: Portal,
|
||||
actor: Actor
|
||||
|
||||
) -> Any:
|
||||
'''
|
||||
Pull final result from portal (assuming it has one).
|
||||
|
||||
If the main task is an async generator do our best to consume
|
||||
what's left of it.
|
||||
'''
|
||||
__tracebackhide__ = True
|
||||
try:
|
||||
log.debug(
|
||||
f'Waiting on final result from {actor.uid}'
|
||||
)
|
||||
|
||||
# XXX: streams should never be reaped here since they should
|
||||
# always be established and shutdown using a context manager api
|
||||
final: Any = await portal.wait_for_result()
|
||||
|
||||
except (
|
||||
Exception,
|
||||
BaseExceptionGroup,
|
||||
) as err:
|
||||
# we reraise in the parent task via a ``BaseExceptionGroup``
|
||||
return err
|
||||
|
||||
except trio.Cancelled as err:
|
||||
# lol, of course we need this too ;P
|
||||
# TODO: merge with above?
|
||||
log.warning(
|
||||
'Cancelled portal result waiter task:\n'
|
||||
f'uid: {portal.channel.aid}\n'
|
||||
f'error: {err}\n'
|
||||
)
|
||||
return err
|
||||
|
||||
else:
|
||||
log.debug(
|
||||
f'Returning final result from portal:\n'
|
||||
f'uid: {portal.channel.aid}\n'
|
||||
f'result: {final}\n'
|
||||
)
|
||||
return final
|
||||
|
||||
|
||||
async def cancel_on_completion(
|
||||
|
||||
portal: Portal,
|
||||
actor: Actor,
|
||||
errors: dict[tuple[str, str], Exception],
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Cancel actor gracefully once its "main" portal's
|
||||
result arrives.
|
||||
|
||||
Should only be called for actors spawned via the
|
||||
`Portal.run_in_actor()` API.
|
||||
|
||||
=> and really this API will be deprecated and should be
|
||||
re-implemented as a `.hilevel.one_shot_task_nursery()`..)
|
||||
|
||||
'''
|
||||
# if this call errors we store the exception for later
|
||||
# in ``errors`` which will be reraised inside
|
||||
# an exception group and we still send out a cancel request
|
||||
result: Any|Exception = await exhaust_portal(
|
||||
portal,
|
||||
actor,
|
||||
)
|
||||
if isinstance(result, Exception):
|
||||
errors[actor.uid]: Exception = result
|
||||
log.cancel(
|
||||
'Cancelling subactor runtime due to error:\n\n'
|
||||
f'Portal.cancel_actor() => {portal.channel.uid}\n\n'
|
||||
f'error: {result}\n'
|
||||
)
|
||||
|
||||
else:
|
||||
log.runtime(
|
||||
'Cancelling subactor gracefully:\n\n'
|
||||
f'Portal.cancel_actor() => {portal.channel.uid}\n\n'
|
||||
f'result: {result}\n'
|
||||
)
|
||||
|
||||
# cancel the process now that we have a final result
|
||||
await portal.cancel_actor()
|
||||
|
||||
|
||||
async def hard_kill(
|
||||
proc: trio.Process,
|
||||
|
||||
terminate_after: int = 1.6,
|
||||
# NOTE: for mucking with `.pause()`-ing inside the runtime
|
||||
# whilst also hacking on it XD
|
||||
# terminate_after: int = 99999,
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Un-gracefully terminate an OS level `trio.Process` after timeout.
|
||||
|
||||
Used in 2 main cases:
|
||||
|
||||
- "unknown remote runtime state": a hanging/stalled actor that
|
||||
isn't responding after sending a (graceful) runtime cancel
|
||||
request via an IPC msg.
|
||||
- "cancelled during spawn": a process who's actor runtime was
|
||||
cancelled before full startup completed (such that
|
||||
cancel-request-handling machinery was never fully
|
||||
initialized) and thus a "cancel request msg" is never going
|
||||
to be handled.
|
||||
|
||||
'''
|
||||
log.cancel(
|
||||
'Terminating sub-proc\n'
|
||||
f'>x)\n'
|
||||
f' |_{proc}\n'
|
||||
)
|
||||
# NOTE: this timeout used to do nothing since we were shielding
|
||||
# the ``.wait()`` inside ``new_proc()`` which will pretty much
|
||||
# never release until the process exits, now it acts as
|
||||
# a hard-kill time ultimatum.
|
||||
with trio.move_on_after(terminate_after) as cs:
|
||||
|
||||
# NOTE: code below was copied verbatim from the now deprecated
|
||||
# (in 0.20.0) ``trio._subrocess.Process.aclose()``, orig doc
|
||||
# string:
|
||||
#
|
||||
# Close any pipes we have to the process (both input and output)
|
||||
# and wait for it to exit. If cancelled, kills the process and
|
||||
# waits for it to finish exiting before propagating the
|
||||
# cancellation.
|
||||
#
|
||||
# This code was originally triggred by ``proc.__aexit__()``
|
||||
# but now must be called manually.
|
||||
with trio.CancelScope(shield=True):
|
||||
if proc.stdin is not None:
|
||||
await proc.stdin.aclose()
|
||||
if proc.stdout is not None:
|
||||
await proc.stdout.aclose()
|
||||
if proc.stderr is not None:
|
||||
await proc.stderr.aclose()
|
||||
try:
|
||||
await proc.wait()
|
||||
finally:
|
||||
if proc.returncode is None:
|
||||
proc.kill()
|
||||
with trio.CancelScope(shield=True):
|
||||
await proc.wait()
|
||||
|
||||
# XXX NOTE XXX: zombie squad dispatch:
|
||||
# (should ideally never, but) If we do get here it means
|
||||
# graceful termination of a process failed and we need to
|
||||
# resort to OS level signalling to interrupt and cancel the
|
||||
# (presumably stalled or hung) actor. Since we never allow
|
||||
# zombies (as a feature) we ask the OS to do send in the
|
||||
# removal swad as the last resort.
|
||||
if cs.cancelled_caught:
|
||||
|
||||
# TODO? attempt at intermediary-rent-sub
|
||||
# with child in debug lock?
|
||||
# |_https://github.com/goodboy/tractor/issues/320
|
||||
#
|
||||
# if not is_root_process():
|
||||
# log.warning(
|
||||
# 'Attempting to acquire debug-REPL-lock before zombie reap!'
|
||||
# )
|
||||
# with trio.CancelScope(shield=True):
|
||||
# async with debug.acquire_debug_lock(
|
||||
# subactor_uid=current_actor().uid,
|
||||
# ) as _ctx:
|
||||
# log.warning(
|
||||
# 'Acquired debug lock, child ready to be killed ??\n'
|
||||
# )
|
||||
|
||||
# TODO: toss in the skynet-logo face as ascii art?
|
||||
log.critical(
|
||||
# 'Well, the #ZOMBIE_LORD_IS_HERE# to collect\n'
|
||||
'#T-800 deployed to collect zombie B0\n'
|
||||
f'>x)\n'
|
||||
f' |_{proc}\n'
|
||||
)
|
||||
proc.kill()
|
||||
|
||||
|
||||
async def soft_kill(
|
||||
proc: ProcessType,
|
||||
wait_func: Callable[
|
||||
[ProcessType],
|
||||
Awaitable,
|
||||
],
|
||||
portal: Portal,
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Wait for proc termination but **don't yet** teardown
|
||||
std-streams since it will clobber any ongoing pdb REPL
|
||||
session.
|
||||
|
||||
This is our "soft"/graceful, and thus itself also cancellable,
|
||||
join/reap on an actor-runtime-in-process shutdown; it is
|
||||
**not** the same as a "hard kill" via an OS signal (for that
|
||||
see `.hard_kill()`).
|
||||
|
||||
'''
|
||||
chan: Channel = portal.channel
|
||||
peer_aid: msgtypes.Aid = chan.aid
|
||||
try:
|
||||
log.cancel(
|
||||
f'Soft killing sub-actor via portal request\n'
|
||||
f'\n'
|
||||
f'c)=> {peer_aid.reprol()}@[{chan.maddr}]\n'
|
||||
f' |_{proc}\n'
|
||||
)
|
||||
# wait on sub-proc to signal termination
|
||||
await wait_func(proc)
|
||||
|
||||
except trio.Cancelled:
|
||||
with trio.CancelScope(shield=True):
|
||||
await debug.maybe_wait_for_debugger(
|
||||
child_in_debug=_runtime_vars.get(
|
||||
'_debug_mode', False
|
||||
),
|
||||
header_msg=(
|
||||
'Delaying `soft_kill()` subproc reaper while debugger locked..\n'
|
||||
),
|
||||
# TODO: need a diff value then default?
|
||||
# poll_steps=9999999,
|
||||
)
|
||||
|
||||
# if cancelled during a soft wait, cancel the child
|
||||
# actor before entering the hard reap sequence
|
||||
# below. This means we try to do a graceful teardown
|
||||
# via sending a cancel message before getting out
|
||||
# zombie killing tools.
|
||||
async with trio.open_nursery() as n:
|
||||
n.cancel_scope.shield = True
|
||||
|
||||
async def cancel_on_proc_deth():
|
||||
'''
|
||||
"Cancel-the-cancel" request: if we detect that the
|
||||
underlying sub-process exited prior to
|
||||
a `Portal.cancel_actor()` call completing .
|
||||
|
||||
'''
|
||||
await wait_func(proc)
|
||||
n.cancel_scope.cancel()
|
||||
|
||||
# start a task to wait on the termination of the
|
||||
# process by itself waiting on a (caller provided) wait
|
||||
# function which should unblock when the target process
|
||||
# has terminated.
|
||||
n.start_soon(cancel_on_proc_deth)
|
||||
|
||||
# send the actor-runtime a cancel request.
|
||||
await portal.cancel_actor()
|
||||
|
||||
if proc.poll() is None: # type: ignore
|
||||
log.warning(
|
||||
'Subactor still alive after cancel request?\n\n'
|
||||
f'uid: {peer_aid}\n'
|
||||
f'|_{proc}\n'
|
||||
)
|
||||
n.cancel_scope.cancel()
|
||||
raise
|
||||
|
||||
|
||||
async def new_proc(
|
||||
name: str,
|
||||
actor_nursery: ActorNursery,
|
||||
subactor: Actor,
|
||||
errors: dict[tuple[str, str], Exception],
|
||||
|
||||
# passed through to actor main
|
||||
bind_addrs: list[UnwrappedAddress],
|
||||
parent_addr: UnwrappedAddress,
|
||||
_runtime_vars: dict[str, Any], # serialized and sent to _child
|
||||
|
||||
*,
|
||||
|
||||
infect_asyncio: bool = False,
|
||||
task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED,
|
||||
proc_kwargs: dict[str, any] = {}
|
||||
|
||||
) -> None:
|
||||
|
||||
# lookup backend spawning target
|
||||
target: Callable = _methods[_spawn_method]
|
||||
|
||||
# mark the new actor with the global spawn method
|
||||
subactor._spawn_method = _spawn_method
|
||||
|
||||
await target(
|
||||
name,
|
||||
actor_nursery,
|
||||
subactor,
|
||||
errors,
|
||||
bind_addrs,
|
||||
parent_addr,
|
||||
_runtime_vars, # run time vars
|
||||
infect_asyncio=infect_asyncio,
|
||||
task_status=task_status,
|
||||
proc_kwargs=proc_kwargs
|
||||
)
|
||||
|
||||
|
||||
async def trio_proc(
|
||||
name: str,
|
||||
actor_nursery: ActorNursery,
|
||||
subactor: Actor,
|
||||
errors: dict[tuple[str, str], Exception],
|
||||
|
||||
# passed through to actor main
|
||||
bind_addrs: list[UnwrappedAddress],
|
||||
parent_addr: UnwrappedAddress,
|
||||
_runtime_vars: dict[str, Any], # serialized and sent to _child
|
||||
*,
|
||||
infect_asyncio: bool = False,
|
||||
task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED,
|
||||
proc_kwargs: dict[str, any] = {}
|
||||
|
||||
) -> None:
|
||||
'''
|
||||
Create a new ``Process`` using a "spawn method" as (configured using
|
||||
``try_set_start_method()``).
|
||||
|
||||
This routine should be started in a actor runtime task and the logic
|
||||
here is to be considered the core supervision strategy.
|
||||
|
||||
'''
|
||||
spawn_cmd = [
|
||||
sys.executable,
|
||||
"-m",
|
||||
# Hardcode this (instead of using ``_child.__name__`` to avoid a
|
||||
# double import warning: https://stackoverflow.com/a/45070583
|
||||
"tractor._child",
|
||||
# We provide the child's unique identifier on this exec/spawn
|
||||
# line for debugging purposes when viewing the process tree from
|
||||
# the OS; it otherwise can be passed via the parent channel if
|
||||
# we prefer in the future (for privacy).
|
||||
"--uid",
|
||||
# TODO, how to pass this over "wire" encodings like
|
||||
# cmdline args?
|
||||
# -[ ] maybe we can add an `msgtypes.Aid.min_tuple()` ?
|
||||
str(subactor.uid),
|
||||
# Address the child must connect to on startup
|
||||
"--parent_addr",
|
||||
str(parent_addr)
|
||||
]
|
||||
|
||||
if subactor.loglevel:
|
||||
spawn_cmd += [
|
||||
"--loglevel",
|
||||
subactor.loglevel
|
||||
]
|
||||
# Tell child to run in guest mode on top of ``asyncio`` loop
|
||||
if infect_asyncio:
|
||||
spawn_cmd.append("--asyncio")
|
||||
|
||||
cancelled_during_spawn: bool = False
|
||||
proc: trio.Process|None = None
|
||||
ipc_server: _server.Server = actor_nursery._actor.ipc_server
|
||||
try:
|
||||
try:
|
||||
proc: trio.Process = await trio.lowlevel.open_process(spawn_cmd, **proc_kwargs)
|
||||
log.runtime(
|
||||
f'Started new child subproc\n'
|
||||
f'(>\n'
|
||||
f' |_{proc}\n'
|
||||
)
|
||||
|
||||
# wait for actor to spawn and connect back to us
|
||||
# channel should have handshake completed by the
|
||||
# local actor by the time we get a ref to it
|
||||
event, chan = await ipc_server.wait_for_peer(
|
||||
subactor.uid
|
||||
)
|
||||
|
||||
except trio.Cancelled:
|
||||
cancelled_during_spawn = True
|
||||
# we may cancel before the child connects back in which
|
||||
# case avoid clobbering the pdb tty.
|
||||
if debug_mode():
|
||||
with trio.CancelScope(shield=True):
|
||||
# don't clobber an ongoing pdb
|
||||
if is_root_process():
|
||||
await debug.maybe_wait_for_debugger()
|
||||
|
||||
elif proc is not None:
|
||||
async with debug.acquire_debug_lock(subactor.uid):
|
||||
# soft wait on the proc to terminate
|
||||
with trio.move_on_after(0.5):
|
||||
await proc.wait()
|
||||
raise
|
||||
|
||||
# a sub-proc ref **must** exist now
|
||||
assert proc
|
||||
|
||||
portal = Portal(chan)
|
||||
actor_nursery._children[subactor.uid] = (
|
||||
subactor,
|
||||
proc,
|
||||
portal,
|
||||
)
|
||||
|
||||
# send a "spawning specification" which configures the
|
||||
# initial runtime state of the child.
|
||||
sspec = msgtypes.SpawnSpec(
|
||||
_parent_main_data=subactor._parent_main_data,
|
||||
enable_modules=subactor.enable_modules,
|
||||
reg_addrs=subactor.reg_addrs,
|
||||
bind_addrs=bind_addrs,
|
||||
_runtime_vars=_runtime_vars,
|
||||
)
|
||||
log.runtime(
|
||||
f'Sending spawn spec to child\n'
|
||||
f'{{}}=> {chan.aid.reprol()!r}\n'
|
||||
f'\n'
|
||||
f'{pretty_struct.pformat(sspec)}\n'
|
||||
)
|
||||
await chan.send(sspec)
|
||||
|
||||
# track subactor in current nursery
|
||||
curr_actor: Actor = current_actor()
|
||||
curr_actor._actoruid2nursery[subactor.uid] = actor_nursery
|
||||
|
||||
# resume caller at next checkpoint now that child is up
|
||||
task_status.started(portal)
|
||||
|
||||
# wait for ActorNursery.wait() to be called
|
||||
with trio.CancelScope(shield=True):
|
||||
await actor_nursery._join_procs.wait()
|
||||
|
||||
async with trio.open_nursery() as nursery:
|
||||
if portal in actor_nursery._cancel_after_result_on_exit:
|
||||
nursery.start_soon(
|
||||
cancel_on_completion,
|
||||
portal,
|
||||
subactor,
|
||||
errors
|
||||
)
|
||||
|
||||
# This is a "soft" (cancellable) join/reap which
|
||||
# will remote cancel the actor on a ``trio.Cancelled``
|
||||
# condition.
|
||||
await soft_kill(
|
||||
proc,
|
||||
trio.Process.wait, # XXX, uses `pidfd_open()` below.
|
||||
portal
|
||||
)
|
||||
|
||||
# cancel result waiter that may have been spawned in
|
||||
# tandem if not done already
|
||||
log.cancel(
|
||||
'Cancelling portal result reaper task\n'
|
||||
f'c)> {subactor.aid.reprol()!r}\n'
|
||||
)
|
||||
nursery.cancel_scope.cancel()
|
||||
|
||||
finally:
|
||||
# XXX NOTE XXX: The "hard" reap since no actor zombies are
|
||||
# allowed! Do this **after** cancellation/teardown to avoid
|
||||
# killing the process too early.
|
||||
if proc:
|
||||
reap_repr: str = _pformat.nest_from_op(
|
||||
input_op='>x)',
|
||||
text=subactor.pformat(),
|
||||
)
|
||||
log.cancel(
|
||||
f'Hard reap sequence starting for subactor\n'
|
||||
f'{reap_repr}'
|
||||
)
|
||||
|
||||
with trio.CancelScope(shield=True):
|
||||
# don't clobber an ongoing pdb
|
||||
if cancelled_during_spawn:
|
||||
# Try again to avoid TTY clobbering.
|
||||
async with debug.acquire_debug_lock(subactor.uid):
|
||||
with trio.move_on_after(0.5):
|
||||
await proc.wait()
|
||||
|
||||
await debug.maybe_wait_for_debugger(
|
||||
child_in_debug=_runtime_vars.get(
|
||||
'_debug_mode', False
|
||||
),
|
||||
header_msg=(
|
||||
'Delaying subproc reaper while debugger locked..\n'
|
||||
),
|
||||
|
||||
# TODO: need a diff value then default?
|
||||
# poll_steps=9999999,
|
||||
)
|
||||
# TODO: solve the following issue where we need
|
||||
# to do a similar wait like this but in an
|
||||
# "intermediary" parent actor that itself isn't
|
||||
# in debug but has a child that is, and we need
|
||||
# to hold off on relaying SIGINT until that child
|
||||
# is complete.
|
||||
# https://github.com/goodboy/tractor/issues/320
|
||||
# -[ ] we need to handle non-root parent-actors specially
|
||||
# by somehow determining if a child is in debug and then
|
||||
# avoiding cancel/kill of said child by this
|
||||
# (intermediary) parent until such a time as the root says
|
||||
# the pdb lock is released and we are good to tear down
|
||||
# (our children)..
|
||||
#
|
||||
# -[ ] so maybe something like this where we try to
|
||||
# acquire the lock and get notified of who has it,
|
||||
# check that uid against our known children?
|
||||
# this_uid: tuple[str, str] = current_actor().uid
|
||||
# await debug.acquire_debug_lock(this_uid)
|
||||
|
||||
if proc.poll() is None:
|
||||
log.cancel(f"Attempting to hard kill {proc}")
|
||||
await hard_kill(proc)
|
||||
|
||||
log.debug(f"Joined {proc}")
|
||||
else:
|
||||
log.warning('Nursery cancelled before sub-proc started')
|
||||
|
||||
if not cancelled_during_spawn:
|
||||
# pop child entry to indicate we no longer managing this
|
||||
# subactor
|
||||
actor_nursery._children.pop(subactor.uid)
|
||||
|
||||
|
||||
async def mp_proc(
|
||||
name: str,
|
||||
actor_nursery: ActorNursery, # type: ignore # noqa
|
||||
subactor: Actor,
|
||||
errors: dict[tuple[str, str], Exception],
|
||||
# passed through to actor main
|
||||
bind_addrs: list[UnwrappedAddress],
|
||||
parent_addr: UnwrappedAddress,
|
||||
_runtime_vars: dict[str, Any], # serialized and sent to _child
|
||||
*,
|
||||
infect_asyncio: bool = False,
|
||||
task_status: TaskStatus[Portal] = trio.TASK_STATUS_IGNORED,
|
||||
proc_kwargs: dict[str, any] = {}
|
||||
|
||||
) -> None:
|
||||
|
||||
# uggh zone
|
||||
try:
|
||||
from multiprocessing import semaphore_tracker # type: ignore
|
||||
resource_tracker = semaphore_tracker
|
||||
resource_tracker._resource_tracker = resource_tracker._semaphore_tracker # noqa
|
||||
except ImportError:
|
||||
# 3.8 introduces a more general version that also tracks shared mems
|
||||
from multiprocessing import resource_tracker # type: ignore
|
||||
|
||||
assert _ctx
|
||||
start_method = _ctx.get_start_method()
|
||||
if start_method == 'forkserver':
|
||||
|
||||
from multiprocessing import forkserver # type: ignore
|
||||
# XXX do our hackery on the stdlib to avoid multiple
|
||||
# forkservers (one at each subproc layer).
|
||||
fs = forkserver._forkserver
|
||||
curr_actor = current_actor()
|
||||
if is_main_process() and not curr_actor._forkserver_info:
|
||||
# if we're the "main" process start the forkserver
|
||||
# only once and pass its ipc info to downstream
|
||||
# children
|
||||
# forkserver.set_forkserver_preload(enable_modules)
|
||||
forkserver.ensure_running()
|
||||
fs_info = (
|
||||
fs._forkserver_address, # type: ignore # noqa
|
||||
fs._forkserver_alive_fd, # type: ignore # noqa
|
||||
getattr(fs, '_forkserver_pid', None),
|
||||
getattr(
|
||||
resource_tracker._resource_tracker, '_pid', None),
|
||||
resource_tracker._resource_tracker._fd,
|
||||
)
|
||||
else: # request to forkerserver to fork a new child
|
||||
assert curr_actor._forkserver_info
|
||||
fs_info = (
|
||||
fs._forkserver_address, # type: ignore # noqa
|
||||
fs._forkserver_alive_fd, # type: ignore # noqa
|
||||
fs._forkserver_pid, # type: ignore # noqa
|
||||
resource_tracker._resource_tracker._pid,
|
||||
resource_tracker._resource_tracker._fd,
|
||||
) = curr_actor._forkserver_info
|
||||
else:
|
||||
# spawn method
|
||||
fs_info = (None, None, None, None, None)
|
||||
|
||||
proc: mp.Process = _ctx.Process( # type: ignore
|
||||
target=_mp_main,
|
||||
args=(
|
||||
subactor,
|
||||
bind_addrs,
|
||||
fs_info,
|
||||
_spawn_method,
|
||||
parent_addr,
|
||||
infect_asyncio,
|
||||
),
|
||||
# daemon=True,
|
||||
name=name,
|
||||
)
|
||||
|
||||
# `multiprocessing` only (since no async interface):
|
||||
# register the process before start in case we get a cancel
|
||||
# request before the actor has fully spawned - then we can wait
|
||||
# for it to fully come up before sending a cancel request
|
||||
actor_nursery._children[subactor.uid] = (subactor, proc, None)
|
||||
|
||||
proc.start()
|
||||
if not proc.is_alive():
|
||||
raise ActorFailure("Couldn't start sub-actor?")
|
||||
|
||||
log.runtime(f"Started {proc}")
|
||||
|
||||
ipc_server: _server.Server = actor_nursery._actor.ipc_server
|
||||
try:
|
||||
# wait for actor to spawn and connect back to us
|
||||
# channel should have handshake completed by the
|
||||
# local actor by the time we get a ref to it
|
||||
event, chan = await ipc_server.wait_for_peer(
|
||||
subactor.uid,
|
||||
)
|
||||
|
||||
# XXX: monkey patch poll API to match the ``subprocess`` API..
|
||||
# not sure why they don't expose this but kk.
|
||||
proc.poll = lambda: proc.exitcode # type: ignore
|
||||
|
||||
# except:
|
||||
# TODO: in the case we were cancelled before the sub-proc
|
||||
# registered itself back we must be sure to try and clean
|
||||
# any process we may have started.
|
||||
|
||||
portal = Portal(chan)
|
||||
actor_nursery._children[subactor.uid] = (subactor, proc, portal)
|
||||
|
||||
# unblock parent task
|
||||
task_status.started(portal)
|
||||
|
||||
# wait for ``ActorNursery`` block to signal that
|
||||
# subprocesses can be waited upon.
|
||||
# This is required to ensure synchronization
|
||||
# with user code that may want to manually await results
|
||||
# from nursery spawned sub-actors. We don't want the
|
||||
# containing nurseries here to collect results or error
|
||||
# while user code is still doing it's thing. Only after the
|
||||
# nursery block closes do we allow subactor results to be
|
||||
# awaited and reported upwards to the supervisor.
|
||||
with trio.CancelScope(shield=True):
|
||||
await actor_nursery._join_procs.wait()
|
||||
|
||||
async with trio.open_nursery() as nursery:
|
||||
if portal in actor_nursery._cancel_after_result_on_exit:
|
||||
nursery.start_soon(
|
||||
cancel_on_completion,
|
||||
portal,
|
||||
subactor,
|
||||
errors
|
||||
)
|
||||
|
||||
# This is a "soft" (cancellable) join/reap which
|
||||
# will remote cancel the actor on a ``trio.Cancelled``
|
||||
# condition.
|
||||
await soft_kill(
|
||||
proc,
|
||||
proc_waiter,
|
||||
portal
|
||||
)
|
||||
|
||||
# cancel result waiter that may have been spawned in
|
||||
# tandem if not done already
|
||||
log.warning(
|
||||
"Cancelling existing result waiter task for "
|
||||
f"{subactor.uid}")
|
||||
nursery.cancel_scope.cancel()
|
||||
|
||||
finally:
|
||||
# hard reap sequence
|
||||
if proc.is_alive():
|
||||
log.cancel(f"Attempting to hard kill {proc}")
|
||||
with trio.move_on_after(0.1) as cs:
|
||||
cs.shield = True
|
||||
await proc_waiter(proc)
|
||||
|
||||
if cs.cancelled_caught:
|
||||
proc.terminate()
|
||||
|
||||
proc.join()
|
||||
log.debug(f"Joined {proc}")
|
||||
|
||||
# pop child entry to indicate we are no longer managing subactor
|
||||
actor_nursery._children.pop(subactor.uid)
|
||||
|
||||
# TODO: prolly report to ``mypy`` how this causes all sorts of
|
||||
# false errors..
|
||||
# subactor, proc, portal = actor_nursery._children.pop(subactor.uid)
|
||||
|
||||
|
||||
# proc spawning backend target map
|
||||
_methods: dict[SpawnMethodKey, Callable] = {
|
||||
'trio': trio_proc,
|
||||
'mp_spawn': mp_proc,
|
||||
'mp_forkserver': mp_proc,
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue