Fix `SharedMemory` under `subint_forkserver`
Implements the resolution described in c99d475d's
`subint_forkserver_mp_shared_memory_issue.md` (now
updated with the resolution post-mortem). Two-part
fix that side-steps `mp.resource_tracker` entirely
rather than try to make it fork-safe — turns out
that's both simpler AND more correct given tractor
already SC-manages allocation lifetimes.
Deats,
- `tractor/ipc/_mp_bs.py::disable_mantracker()`: drop the
`platform.python_version_tuple()[:-1] >= ('3', '13')` branch — patches
now run unconditionally:
* monkey-patch `mp.resource_tracker. _resource_tracker` to a no-op
`ManTracker` subclass (empty `register` / `unregister`
/ `ensure_running`).
* return `partial(SharedMemory, track=False)` for the per-allocation
opt-out.
* belt + suspenders: even if something dodges the wrapper, the
singleton can't talk to the inherited (broken) parent fd.
- `tractor/ipc/_shm.py::open_shm_list()`: drop the 3.13+ conditional
skip of the unlink-callback; install a `try_unlink()` wrapper that
swallows `FileNotFoundError` (sibling-already-cleaned race in
shared-key setups). Without `mp.resource_tracker` doing it for us, we
own the unlink — `actor. lifetime_stack` is the right place since
tractor already controls actor lifecycle.
- `tests/test_shm.py`: uncomment-out `subint_forkserver` from the
module-level skip- list (tests pass now). Inline comment cross-refs
the two `_mp_bs` / `_shm` workarounds.
- `ai/conc-anal/subint_forkserver_mp_shared_memory_ issue.md`: heavy
rewrite — flips status from "open / unresolvable in tractor" to
"resolved, kept as decision record". Adds Resolution section, "Why
this is the right call" rationale (mp tracker is widely criticized;
tractor already owns lifecycle), trade-offs (crash-leaked segments,
lost mp leak warning), verification (7 passed under both
`subint_forkserver` and `trio` backends), and upstream issue links
(this patch was generated in some part by [`claude-code`][claude-code-gh])
[claude-code-gh]: https://github.com/anthropics/claude-code
(cherry picked from commit aa3e230926)
(factored: dropped subint_forkserver conc-anal doc update)
wkt/tooling_enhancements_from_mtf_spawner
parent
da0c457ff7
commit
2e2977b74c
|
|
@ -17,6 +17,10 @@ from tractor.ipc._shm import (
|
||||||
pytestmark = pytest.mark.skipon_spawn_backend(
|
pytestmark = pytest.mark.skipon_spawn_backend(
|
||||||
'subint',
|
'subint',
|
||||||
# 'subint_forkserver',
|
# 'subint_forkserver',
|
||||||
|
# XXX we hack around this stdlib limitation by both,
|
||||||
|
# - setting `ShareMemory(track=False)`
|
||||||
|
# - overriding the `mp.ResourceTracker` nonsense in
|
||||||
|
# `.ipc._mp_bs`.
|
||||||
reason=(
|
reason=(
|
||||||
'subint: GIL-contention hanging class.\n'
|
'subint: GIL-contention hanging class.\n'
|
||||||
'subint_forkserver: `multiprocessing.SharedMemory` '
|
'subint_forkserver: `multiprocessing.SharedMemory` '
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@
|
||||||
Utils to tame mp non-SC madeness
|
Utils to tame mp non-SC madeness
|
||||||
|
|
||||||
'''
|
'''
|
||||||
import platform
|
from functools import partial
|
||||||
|
|
||||||
|
|
||||||
def disable_mantracker():
|
def disable_mantracker():
|
||||||
|
|
@ -27,49 +27,37 @@ def disable_mantracker():
|
||||||
|
|
||||||
'''
|
'''
|
||||||
from multiprocessing.shared_memory import SharedMemory
|
from multiprocessing.shared_memory import SharedMemory
|
||||||
|
from multiprocessing import (
|
||||||
|
resource_tracker as mantracker,
|
||||||
|
)
|
||||||
|
|
||||||
|
# XXX ALWAYS disable the stdlib's "resource tracker"; it prevents
|
||||||
|
# fork backends and never was useful to us since we're SC
|
||||||
|
# lifetime managing all allocations.
|
||||||
|
class ManTracker(mantracker.ResourceTracker):
|
||||||
|
def register(self, name, rtype):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def unregister(self, name, rtype):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def ensure_running(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# "know your land and know your prey"
|
||||||
|
# https://www.dailymotion.com/video/x6ozzco
|
||||||
|
mantracker._resource_tracker = ManTracker()
|
||||||
|
mantracker.register = mantracker._resource_tracker.register
|
||||||
|
mantracker.ensure_running = mantracker._resource_tracker.ensure_running
|
||||||
|
mantracker.unregister = mantracker._resource_tracker.unregister
|
||||||
|
mantracker.getfd = mantracker._resource_tracker.getfd
|
||||||
|
|
||||||
# 3.13+ only.. can pass `track=False` to disable
|
# 3.13+ only.. can pass `track=False` to disable
|
||||||
# all the resource tracker bs.
|
# all the resource tracker bs.
|
||||||
# https://docs.python.org/3/library/multiprocessing.shared_memory.html
|
# https://docs.python.org/3/library/multiprocessing.shared_memory.html
|
||||||
if (_py_313 := (
|
shmT = partial(
|
||||||
platform.python_version_tuple()[:-1]
|
SharedMemory,
|
||||||
>=
|
track=False,
|
||||||
('3', '13')
|
)
|
||||||
)
|
|
||||||
):
|
|
||||||
from functools import partial
|
|
||||||
return partial(
|
|
||||||
SharedMemory,
|
|
||||||
track=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
# !TODO, once we drop 3.12- we can obvi remove all this!
|
|
||||||
else:
|
|
||||||
from multiprocessing import (
|
|
||||||
resource_tracker as mantracker,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Tell the "resource tracker" thing to fuck off.
|
|
||||||
class ManTracker(mantracker.ResourceTracker):
|
|
||||||
def register(self, name, rtype):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def unregister(self, name, rtype):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def ensure_running(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# "know your land and know your prey"
|
|
||||||
# https://www.dailymotion.com/video/x6ozzco
|
|
||||||
mantracker._resource_tracker = ManTracker()
|
|
||||||
mantracker.register = mantracker._resource_tracker.register
|
|
||||||
mantracker.ensure_running = mantracker._resource_tracker.ensure_running
|
|
||||||
mantracker.unregister = mantracker._resource_tracker.unregister
|
|
||||||
mantracker.getfd = mantracker._resource_tracker.getfd
|
|
||||||
|
|
||||||
# use std type verbatim
|
|
||||||
shmT = SharedMemory
|
|
||||||
|
|
||||||
return shmT
|
return shmT
|
||||||
|
|
|
||||||
|
|
@ -929,15 +929,26 @@ def open_shm_list(
|
||||||
# "close" attached shm on actor teardown
|
# "close" attached shm on actor teardown
|
||||||
try:
|
try:
|
||||||
actor = tractor.current_actor()
|
actor = tractor.current_actor()
|
||||||
|
|
||||||
actor.lifetime_stack.callback(shml.shm.close)
|
actor.lifetime_stack.callback(shml.shm.close)
|
||||||
|
|
||||||
# XXX on 3.13+ we don't need to call this?
|
# >XXX NOTE< on 3.13+ we need to call this AS WELL AS pass
|
||||||
# -> bc we pass `track=False` for `SharedMemeory` orr?
|
# `track=False` for `mp.SharedMemeory` otherwise fork based
|
||||||
if (
|
# backends will error out due to long lived stdlib
|
||||||
platform.python_version_tuple()[:-1] < ('3', '13')
|
# limitations,
|
||||||
):
|
# - https://bugs.python.org/issue38119
|
||||||
actor.lifetime_stack.callback(shml.shm.unlink)
|
# - https://bugs.python.org/issue45209
|
||||||
|
#
|
||||||
|
def try_unlink():
|
||||||
|
try:
|
||||||
|
shml.shm.unlink()
|
||||||
|
except FileNotFoundError as fne:
|
||||||
|
log.debug(
|
||||||
|
f'ShmList already deallocated pre-actor-shutdown.\n'
|
||||||
|
f'{fne!r}\n'
|
||||||
|
)
|
||||||
|
|
||||||
|
actor.lifetime_stack.callback(try_unlink)
|
||||||
|
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
log.warning('tractor runtime not active, skipping teardown steps')
|
log.warning('tractor runtime not active, skipping teardown steps')
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue