From 0ef98ba25c1ec6feb6327f234a350a5ceeddedfc Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Mon, 29 Sep 2025 16:41:08 -0400 Subject: [PATCH 01/44] Factor to a new `.tsp._history` sub-mod Cleaning out the `piker.tsp` pkg-mod to be only the (re)exports needed for `._anal`/`._history` refs-use elsewhere! --- piker/tsp/__init__.py | 1429 +-------------------------------------- piker/tsp/_history.py | 1471 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1477 insertions(+), 1423 deletions(-) create mode 100644 piker/tsp/_history.py diff --git a/piker/tsp/__init__.py b/piker/tsp/__init__.py index 121fcbb7..3c49e71b 100644 --- a/piker/tsp/__init__.py +++ b/piker/tsp/__init__.py @@ -28,1435 +28,18 @@ Historical TSP (time-series processing) lowlevel mgmt machinery and biz logic fo stored offline (in a tsdb). ''' -from __future__ import annotations -from datetime import datetime -from functools import partial -from pathlib import Path -from pprint import pformat -from types import ModuleType -from typing import ( - Callable, - Generator, - TYPE_CHECKING, -) - -import trio -from trio_typing import TaskStatus -import tractor -from pendulum import ( - Interval, - DateTime, - Duration, - duration as mk_duration, - from_timestamp, -) -import numpy as np -import polars as pl - -from piker.brokers import NoData -from piker.accounting import ( - MktPair, -) -from piker.data._util import ( - log, -) -from ..data._sharedmem import ( - maybe_open_shm_array, - ShmArray, -) -from ..data._source import def_iohlcv_fields -from ..data._sampling import ( - open_sample_stream, -) from ._anal import ( - get_null_segs as get_null_segs, - iter_null_segs as iter_null_segs, - Frame as Frame, - Seq as Seq, - # codec-ish - np2pl as np2pl, + # `polars` specific + dedupe as dedupe, + detect_time_gaps as detect_time_gaps, pl2np as pl2np, # `numpy` only slice_from_time as slice_from_time, - - # `polars` specific - dedupe as dedupe, - with_dts as with_dts, - detect_time_gaps as detect_time_gaps, - sort_diff as sort_diff, - - # TODO: - detect_price_gaps as detect_price_gaps ) - -# TODO: break up all this shite into submods! -from ..brokers._util import ( - DataUnavailable, +from ._history import ( + iter_dfs_from_shms as iter_dfs_from_shms, + manage_history as manage_history, ) -from ..storage import TimeseriesNotFound - -if TYPE_CHECKING: - from bidict import bidict - from ..service.marketstore import StorageClient - # from .feed import _FeedsBus - - -# `ShmArray` buffer sizing configuration: -_mins_in_day = int(60 * 24) -# how much is probably dependent on lifestyle -# but we reco a buncha times (but only on a -# run-every-other-day kinda week). -_secs_in_day = int(60 * _mins_in_day) -_days_in_week: int = 7 - -_days_worth: int = 3 -_default_hist_size: int = 6 * 365 * _mins_in_day -_hist_buffer_start = int( - _default_hist_size - round(7 * _mins_in_day) -) - -_default_rt_size: int = _days_worth * _secs_in_day -# NOTE: start the append index in rt buffer such that 1 day's worth -# can be appenened before overrun. -_rt_buffer_start = int((_days_worth - 1) * _secs_in_day) - - -def diff_history( - array: np.ndarray, - append_until_dt: datetime | None = None, - prepend_until_dt: datetime | None = None, - -) -> np.ndarray: - - # no diffing with tsdb dt index possible.. - if ( - prepend_until_dt is None - and append_until_dt is None - ): - return array - - times = array['time'] - - if append_until_dt: - return array[times < append_until_dt.timestamp()] - else: - return array[times >= prepend_until_dt.timestamp()] - - -# TODO: can't we just make this a sync func now? -async def shm_push_in_between( - shm: ShmArray, - to_push: np.ndarray, - prepend_index: int, - - update_start_on_prepend: bool = False, - -) -> int: - # XXX: extremely important, there can be no checkpoints - # in the body of this func to avoid entering new ``frames`` - # values while we're pipelining the current ones to - # memory... - shm.push( - to_push, - prepend=True, - - # XXX: only update the ._first index if no tsdb - # segment was previously prepended by the - # parent task. - update_first=update_start_on_prepend, - - # XXX: only prepend from a manually calculated shm - # index if there was already a tsdb history - # segment prepended (since then the - # ._first.value is going to be wayyy in the - # past!) - start=( - prepend_index - if not update_start_on_prepend - else None - ), - ) - - -async def maybe_fill_null_segments( - shm: ShmArray, - timeframe: float, - get_hist: Callable, - sampler_stream: tractor.MsgStream, - mkt: MktPair, - - task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, - -) -> list[Frame]: - - null_segs_detected = trio.Event() - task_status.started(null_segs_detected) - - frame: Frame = shm.array - - null_segs: tuple | None = get_null_segs( - frame, - period=timeframe, - ) - for ( - absi_start, absi_end, - fi_start, fi_end, - start_t, end_t, - start_dt, end_dt, - ) in iter_null_segs( - null_segs=null_segs, - frame=frame, - timeframe=timeframe, - ): - - # XXX NOTE: ?if we get a badly ordered timestamp - # pair, immediately stop backfilling? - if ( - start_dt - and - end_dt < start_dt - ): - await tractor.pause() - break - - ( - array, - next_start_dt, - next_end_dt, - ) = await get_hist( - timeframe, - start_dt=start_dt, - end_dt=end_dt, - ) - - # XXX TODO: pretty sure if i plot tsla, btcusdt.binance - # and mnq.cme.ib this causes a Qt crash XXDDD - - # make sure we don't overrun the buffer start - len_to_push: int = min(absi_end, array.size) - to_push: np.ndarray = array[-len_to_push:] - - await shm_push_in_between( - shm, - to_push, - prepend_index=absi_end, - update_start_on_prepend=False, - ) - # TODO: UI side needs IPC event to update.. - # - make sure the UI actually always handles - # this update! - # - remember that in the display side, only refersh this - # if the respective history is actually "in view". - # loop - try: - await sampler_stream.send({ - 'broadcast_all': { - - # XXX NOTE XXX: see the - # `.ui._display.increment_history_view()` if block - # that looks for this info to FORCE a hard viz - # redraw! - 'backfilling': (mkt.fqme, timeframe), - }, - }) - except tractor.ContextCancelled: - # log.exception - await tractor.pause() - raise - - null_segs_detected.set() - # RECHECK for more null-gaps - frame: Frame = shm.array - null_segs: tuple | None = get_null_segs( - frame, - period=timeframe, - ) - if ( - null_segs - and - len(null_segs[-1]) - ): - ( - iabs_slices, - iabs_zero_rows, - zero_t, - ) = null_segs - log.warning( - f'{len(iabs_slices)} NULL TIME SEGMENTS DETECTED!\n' - f'{pformat(iabs_slices)}' - ) - - # TODO: always backfill gaps with the earliest (price) datum's - # value to avoid the y-ranger including zeros and completely - # stretching the y-axis.. - # array: np.ndarray = shm.array - # zeros = array[array['low'] == 0] - ohlc_fields: list[str] = [ - 'open', - 'high', - 'low', - 'close', - ] - - for istart, istop in iabs_slices: - - # get view into buffer for null-segment - gap: np.ndarray = shm._array[istart:istop] - - # copy the oldest OHLC samples forward - cls: float = shm._array[istart]['close'] - - # TODO: how can we mark this range as being a gap tho? - # -[ ] maybe pg finally supports nulls in ndarray to - # show empty space somehow? - # -[ ] we could put a special value in the vlm or - # another col/field to denote? - gap[ohlc_fields] = cls - - start_t: float = shm._array[istart]['time'] - t_diff: float = (istop - istart)*timeframe - - gap['time'] = np.arange( - start=start_t, - stop=start_t + t_diff, - step=timeframe, - ) - - # TODO: reimpl using the new `.ui._remote_ctl` ctx - # ideally using some kinda decent - # tractory-reverse-lookup-connnection from some other - # `Context` type thingy? - await sampler_stream.send({ - 'broadcast_all': { - - # XXX NOTE XXX: see the - # `.ui._display.increment_history_view()` if block - # that looks for this info to FORCE a hard viz - # redraw! - 'backfilling': (mkt.fqme, timeframe), - }, - }) - - # TODO: interatively step through any remaining - # time-gaps/null-segments and spawn piecewise backfiller - # tasks in a nursery? - # -[ ] not sure that's going to work so well on the ib - # backend but worth a shot? - # -[ ] mk new history connections to make it properly - # parallel possible no matter the backend? - # -[ ] fill algo: do queries in alternating "latest, then - # earliest, then latest.. etc?" - - -async def start_backfill( - get_hist, - def_frame_duration: Duration, - mod: ModuleType, - mkt: MktPair, - shm: ShmArray, - timeframe: float, - - backfill_from_shm_index: int, - backfill_from_dt: datetime, - - sampler_stream: tractor.MsgStream, - - backfill_until_dt: datetime | None = None, - storage: StorageClient | None = None, - - write_tsdb: bool = True, - - task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED, - -) -> int: - - # let caller unblock and deliver latest history frame - # and use to signal that backfilling the shm gap until - # the tsdb end is complete! - bf_done = trio.Event() - task_status.started(bf_done) - - # based on the sample step size, maybe load a certain amount history - update_start_on_prepend: bool = False - if backfill_until_dt is None: - - # TODO: per-provider default history-durations? - # -[ ] inside the `open_history_client()` config allow - # declaring the history duration limits instead of - # guessing and/or applying the same limits to all? - # - # -[ ] allow declaring (default) per-provider backfill - # limits inside a [storage] sub-section in conf.toml? - # - # NOTE, when no tsdb "last datum" is provided, we just - # load some near-term history by presuming a "decently - # large" 60s duration limit and a much shorter 1s range. - periods = { - 1: {'days': 2}, - 60: {'years': 6}, - } - period_duration: int = periods[timeframe] - update_start_on_prepend: bool = True - - # NOTE: manually set the "latest" datetime which we intend to - # backfill history "until" so as to adhere to the history - # settings above when the tsdb is detected as being empty. - backfill_until_dt = backfill_from_dt.subtract(**period_duration) - - # STAGE NOTE: "backward history gap filling": - # - we push to the shm buffer until we have history back - # until the latest entry loaded from the tsdb's table B) - # - after this loop continue to check for other gaps in the - # (tsdb) history and (at least report) maybe fill them - # from new frame queries to the backend? - last_start_dt: datetime = backfill_from_dt - next_prepend_index: int = backfill_from_shm_index - - while last_start_dt > backfill_until_dt: - log.info( - f'Requesting {timeframe}s frame:\n' - f'backfill_until_dt: {backfill_until_dt}\n' - f'last_start_dt: {last_start_dt}\n' - ) - try: - ( - array, - next_start_dt, - next_end_dt, - ) = await get_hist( - timeframe, - end_dt=last_start_dt, - ) - except NoData as _daterr: - orig_last_start_dt: datetime = last_start_dt - gap_report: str = ( - f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n' - f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n' - f'last_start_dt: {orig_last_start_dt}\n\n' - f'bf_until: {backfill_until_dt}\n' - ) - # EMPTY FRAME signal with 3 (likely) causes: - # - # 1. range contains legit gap in venue history - # 2. history actually (edge case) **began** at the - # value `last_start_dt` - # 3. some other unknown error (ib blocking the - # history-query bc they don't want you seeing how - # they cucked all the tinas.. like with options - # hist) - # - if def_frame_duration: - # decrement by a duration's (frame) worth of time - # as maybe indicated by the backend to see if we - # can get older data before this possible - # "history gap". - last_start_dt: datetime = last_start_dt.subtract( - seconds=def_frame_duration.total_seconds() - ) - gap_report += ( - f'Decrementing `end_dt` and retrying with,\n' - f'def_frame_duration: {def_frame_duration}\n' - f'(new) last_start_dt: {last_start_dt}\n' - ) - log.warning(gap_report) - # skip writing to shm/tsdb and try the next - # duration's worth of prior history. - continue - - else: - # await tractor.pause() - raise DataUnavailable(gap_report) - - # broker says there never was or is no more history to pull - except DataUnavailable as due: - message: str = due.args[0] - log.warning( - f'Provider {mod.name!r} halted backfill due to,\n\n' - - f'{message}\n' - - f'fqme: {mkt.fqme}\n' - f'timeframe: {timeframe}\n' - f'last_start_dt: {last_start_dt}\n' - f'bf_until: {backfill_until_dt}\n' - ) - # UGH: what's a better way? - # TODO: backends are responsible for being correct on - # this right!? - # -[ ] in the `ib` case we could maybe offer some way - # to halt the request loop until the condition is - # resolved or should the backend be entirely in - # charge of solving such faults? yes, right? - return - - time: np.ndarray = array['time'] - assert ( - time[0] - == - next_start_dt.timestamp() - ) - - assert time[-1] == next_end_dt.timestamp() - - expected_dur: Interval = last_start_dt - next_start_dt - - # frame's worth of sample-period-steps, in seconds - frame_size_s: float = len(array) * timeframe - recv_frame_dur: Duration = ( - from_timestamp(array[-1]['time']) - - - from_timestamp(array[0]['time']) - ) - if ( - (lt_frame := (recv_frame_dur < expected_dur)) - or - (null_frame := (frame_size_s == 0)) - # ^XXX, should NEVER hit now! - ): - # XXX: query result includes a start point prior to our - # expected "frame size" and thus is likely some kind of - # history gap (eg. market closed period, outage, etc.) - # so just report it to console for now. - if lt_frame: - reason = 'Possible GAP (or first-datum)' - else: - assert null_frame - reason = 'NULL-FRAME' - - missing_dur: Interval = expected_dur.end - recv_frame_dur.end - log.warning( - f'{timeframe}s-series {reason} detected!\n' - f'fqme: {mkt.fqme}\n' - f'last_start_dt: {last_start_dt}\n\n' - f'recv interval: {recv_frame_dur}\n' - f'expected interval: {expected_dur}\n\n' - - f'Missing duration of history of {missing_dur.in_words()!r}\n' - f'{missing_dur}\n' - ) - # await tractor.pause() - - to_push = diff_history( - array, - prepend_until_dt=backfill_until_dt, - ) - ln: int = len(to_push) - if ln: - log.info( - f'{ln} bars for {next_start_dt} -> {last_start_dt}' - ) - - else: - log.warning( - '0 BARS TO PUSH after diff!?\n' - f'{next_start_dt} -> {last_start_dt}' - ) - - # bail gracefully on shm allocation overrun/full - # condition - try: - await shm_push_in_between( - shm, - to_push, - prepend_index=next_prepend_index, - update_start_on_prepend=update_start_on_prepend, - ) - await sampler_stream.send({ - 'broadcast_all': { - 'backfilling': (mkt.fqme, timeframe), - }, - }) - - # decrement next prepend point - next_prepend_index = next_prepend_index - ln - last_start_dt = next_start_dt - - except ValueError as ve: - _ve = ve - log.error( - f'Shm prepend OVERRUN on: {next_start_dt} -> {last_start_dt}?' - ) - - if next_prepend_index < ln: - log.warning( - f'Shm buffer can only hold {next_prepend_index} more rows..\n' - f'Appending those from recent {ln}-sized frame, no more!' - ) - - to_push = to_push[-next_prepend_index + 1:] - await shm_push_in_between( - shm, - to_push, - prepend_index=next_prepend_index, - update_start_on_prepend=update_start_on_prepend, - ) - await sampler_stream.send({ - 'broadcast_all': { - 'backfilling': (mkt.fqme, timeframe), - }, - }) - - # can't push the entire frame? so - # push only the amount that can fit.. - break - - log.info( - f'Shm pushed {ln} frame:\n' - f'{next_start_dt} -> {last_start_dt}' - ) - - # FINALLY, maybe write immediately to the tsdb backend for - # long-term storage. - if ( - storage is not None - and - write_tsdb - ): - log.info( - f'Writing {ln} frame to storage:\n' - f'{next_start_dt} -> {last_start_dt}' - ) - - # NOTE, always drop the src asset token for - # non-currency-pair like market types (for now) - # - # THAT IS, for now our table key schema is NOT - # including the dst[/src] source asset token. SO, - # 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for - # historical reasons ONLY. - if mkt.dst.atype not in { - 'crypto', - 'crypto_currency', - 'fiat', # a "forex pair" - 'perpetual_future', # stupid "perps" from cex land - }: - col_sym_key: str = mkt.get_fqme( - delim_char='', - without_src=True, - ) - else: - col_sym_key: str = mkt.get_fqme( - delim_char='', - ) - - await storage.write_ohlcv( - col_sym_key, - shm.array, - timeframe, - ) - df: pl.DataFrame = await storage.as_df( - fqme=mkt.fqme, - period=timeframe, - load_from_offline=False, - ) - ( - wdts, - deduped, - diff, - ) = dedupe(df) - # if diff: - # sort_diff(df) - - else: - # finally filled gap - log.info( - f'Finished filling gap to tsdb start @ {backfill_until_dt}!' - ) - - # XXX: extremely important, there can be no checkpoints - # in the block above to avoid entering new ``frames`` - # values while we're pipelining the current ones to - # memory... - # await sampler_stream.send('broadcast_all') - - # short-circuit (for now) - bf_done.set() - - -# NOTE: originally this was used to cope with a tsdb (marketstore) -# which could not delivery very large frames of history over gRPC -# (thanks goolag) due to corruption issues. NOW, using apache -# parquet (by default in the local filesys) we don't have this -# requirement since the files can be loaded very quickly in -# entirety to memory via -async def back_load_from_tsdb( - storemod: ModuleType, - storage: StorageClient, - - fqme: str, - - tsdb_history: np.ndarray, - - last_tsdb_dt: datetime, - latest_start_dt: datetime, - latest_end_dt: datetime, - - bf_done: trio.Event, - - timeframe: int, - shm: ShmArray, -): - assert len(tsdb_history) - - # sync to backend history task's query/load completion - # if bf_done: - # await bf_done.wait() - - # TODO: eventually it'd be nice to not require a shm array/buffer - # to accomplish this.. maybe we can do some kind of tsdb direct to - # graphics format eventually in a child-actor? - if storemod.name == 'nativedb': - return - - await tractor.pause() - assert shm._first.value == 0 - - array = shm.array - - # if timeframe == 1: - # times = shm.array['time'] - # assert (times[1] - times[0]) == 1 - - if len(array): - shm_last_dt = from_timestamp( - shm.array[0]['time'] - ) - else: - shm_last_dt = None - - if last_tsdb_dt: - assert shm_last_dt >= last_tsdb_dt - - # do diff against start index of last frame of history and only - # fill in an amount of datums from tsdb allows for most recent - # to be loaded into mem *before* tsdb data. - if ( - last_tsdb_dt - and latest_start_dt - ): - backfilled_size_s: Duration = ( - latest_start_dt - last_tsdb_dt - ).seconds - # if the shm buffer len is not large enough to contain - # all missing data between the most recent backend-queried frame - # and the most recent dt-index in the db we warn that we only - # want to load a portion of the next tsdb query to fill that - # space. - log.info( - f'{backfilled_size_s} seconds worth of {timeframe}s loaded' - ) - - # Load TSDB history into shm buffer (for display) if there is - # remaining buffer space. - - time_key: str = 'time' - if getattr(storemod, 'ohlc_key_map', False): - keymap: bidict = storemod.ohlc_key_map - time_key: str = keymap.inverse['time'] - - # if ( - # not len(tsdb_history) - # ): - # return - - tsdb_last_frame_start: datetime = last_tsdb_dt - # load as much from storage into shm possible (depends on - # user's shm size settings). - while shm._first.value > 0: - - tsdb_history = await storage.read_ohlcv( - fqme, - timeframe=timeframe, - end=tsdb_last_frame_start, - ) - - # # empty query - # if not len(tsdb_history): - # break - - next_start = tsdb_history[time_key][0] - if next_start >= tsdb_last_frame_start: - # no earlier data detected - break - - else: - tsdb_last_frame_start = next_start - - # TODO: see if there's faster multi-field reads: - # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields - # re-index with a `time` and index field - prepend_start = shm._first.value - - to_push = tsdb_history[-prepend_start:] - shm.push( - to_push, - - # insert the history pre a "days worth" of samples - # to leave some real-time buffer space at the end. - prepend=True, - # update_first=False, - # start=prepend_start, - field_map=storemod.ohlc_key_map, - ) - - log.info(f'Loaded {to_push.shape} datums from storage') - tsdb_last_frame_start = tsdb_history[time_key][0] - - # manually trigger step update to update charts/fsps - # which need an incremental update. - # NOTE: the way this works is super duper - # un-intuitive right now: - # - the broadcaster fires a msg to the fsp subsystem. - # - fsp subsys then checks for a sample step diff and - # possibly recomputes prepended history. - # - the fsp then sends back to the parent actor - # (usually a chart showing graphics for said fsp) - # which tells the chart to conduct a manual full - # graphics loop cycle. - # await sampler_stream.send('broadcast_all') - - -async def push_latest_frame( - # box-type only that should get packed with the datetime - # objects received for the latest history frame - dt_eps: list[DateTime, DateTime], - shm: ShmArray, - get_hist: Callable[ - [int, datetime, datetime], - tuple[np.ndarray, str] - ], - timeframe: float, - config: dict, - - task_status: TaskStatus[ - Exception | list[datetime, datetime] - ] = trio.TASK_STATUS_IGNORED, - -) -> list[datetime, datetime] | None: - # get latest query's worth of history all the way - # back to what is recorded in the tsdb - try: - ( - array, - mr_start_dt, - mr_end_dt, - ) = await get_hist( - timeframe, - end_dt=None, - ) - # so caller can access these ep values - dt_eps.extend([ - mr_start_dt, - mr_end_dt, - ]) - task_status.started(dt_eps) - - # XXX: timeframe not supported for backend (since - # above exception type), terminate immediately since - # there's no backfilling possible. - except DataUnavailable: - task_status.started(None) - - if timeframe > 1: - await tractor.pause() - - # prolly tf not supported - return None - - # NOTE: on the first history, most recent history - # frame we PREPEND from the current shm ._last index - # and thus a gap between the earliest datum loaded here - # and the latest loaded from the tsdb may exist! - log.info(f'Pushing {array.size} to shm!') - shm.push( - array, - prepend=True, # append on first frame - ) - - return dt_eps - - -async def load_tsdb_hist( - storage: StorageClient, - mkt: MktPair, - timeframe: float, - - task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED, - -) -> tuple[ - np.ndarray, - DateTime, - DateTime, -] | None: - # loads a (large) frame of data from the tsdb depending - # on the db's query size limit; our "nativedb" (using - # parquet) generally can load the entire history into mem - # but if not then below the remaining history can be lazy - # loaded? - fqme: str = mkt.fqme - tsdb_entry: tuple[ - np.ndarray, - DateTime, - DateTime, - ] - try: - tsdb_entry: tuple | None = await storage.load( - fqme, - timeframe=timeframe, - ) - return tsdb_entry - - except TimeseriesNotFound: - log.warning( - f'No timeseries yet for {timeframe}@{fqme}' - ) - return None - - -async def tsdb_backfill( - mod: ModuleType, - storemod: ModuleType, - - storage: StorageClient, - mkt: MktPair, - shm: ShmArray, - timeframe: float, - - sampler_stream: tractor.MsgStream, - - task_status: TaskStatus[ - tuple[ShmArray, ShmArray] - ] = trio.TASK_STATUS_IGNORED, - -) -> None: - - if timeframe not in (1, 60): - raise ValueError( - '`piker` only needs to support 1m and 1s sampling ' - 'but ur api is trying to deliver a longer ' - f'timeframe of {timeframe} seconds..\n' - 'So yuh.. dun do dat brudder.' - ) - - get_hist: Callable[ - [int, datetime, datetime], - tuple[np.ndarray, str] - ] - config: dict[str, int] - async with ( - mod.open_history_client( - mkt, - ) as (get_hist, config), - - # NOTE: this sub-nursery splits to tasks for the given - # sampling rate to concurrently load offline tsdb - # timeseries as well as new data from the venue backend! - ): - log.info( - f'`{mod}` history client returned backfill config:\n' - f'{pformat(config)}\n' - ) - - # concurrently load the provider's most-recent-frame AND any - # pre-existing tsdb history already saved in `piker` storage. - dt_eps: list[DateTime, DateTime] = [] - async with ( - tractor.trionics.collapse_eg(), - trio.open_nursery() as tn - ): - tn.start_soon( - push_latest_frame, - dt_eps, - shm, - get_hist, - timeframe, - config, - ) - tsdb_entry: tuple = await load_tsdb_hist( - storage, - mkt, - timeframe, - ) - - # tell parent task to continue - # TODO: really we'd want this the other way with the - # tsdb load happening asap and the since the latest - # frame query will normally be the main source of - # latency? - task_status.started() - - # NOTE: iabs to start backfilling from, reverse chronological, - # ONLY AFTER the first history frame has been pushed to - # mem! - backfill_gap_from_shm_index: int = shm._first.value + 1 - - # Prepend any tsdb history into the rt-shm-buffer which - # should NOW be getting filled with the most recent history - # pulled from the data-backend. - if dt_eps: - # well then, unpack the latest (gap) backfilled frame dts - ( - mr_start_dt, - mr_end_dt, - ) = dt_eps - - first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds - calced_frame_size: Duration = mk_duration( - seconds=first_frame_dur_s, - ) - # NOTE, attempt to use the backend declared default frame - # sizing (as allowed by their time-series query APIs) and - # if not provided try to construct a default from the - # first frame received above. - def_frame_durs: dict[ - int, - Duration, - ]|None = config.get('frame_types', None) - - if def_frame_durs: - def_frame_size: Duration = def_frame_durs[timeframe] - - if def_frame_size != calced_frame_size: - log.warning( - f'Expected frame size {def_frame_size}\n' - f'Rxed frame {calced_frame_size}\n' - ) - # await tractor.pause() - else: - # use what we calced from first frame above. - def_frame_size = calced_frame_size - - # NOTE: when there's no offline data, there's 2 cases: - # - data backend doesn't support timeframe/sample - # period (in which case `dt_eps` should be `None` and - # we shouldn't be here!), or - # - no prior history has been stored (yet) and we need - # todo full backfill of the history now. - if tsdb_entry is None: - # indicate to backfill task to fill the whole - # shm buffer as much as it can! - last_tsdb_dt = None - - # there's existing tsdb history from (offline) storage - # so only backfill the gap between the - # most-recent-frame (mrf) and that latest sample. - else: - ( - tsdb_history, - first_tsdb_dt, - last_tsdb_dt, - ) = tsdb_entry - - # if there is a gap to backfill from the first - # history frame until the last datum loaded from the tsdb - # continue that now in the background - async with trio.open_nursery( - strict_exception_groups=False, - ) as tn: - - bf_done = await tn.start( - partial( - start_backfill, - get_hist=get_hist, - def_frame_duration=def_frame_size, - mod=mod, - mkt=mkt, - shm=shm, - timeframe=timeframe, - - backfill_from_shm_index=backfill_gap_from_shm_index, - backfill_from_dt=mr_start_dt, - - sampler_stream=sampler_stream, - backfill_until_dt=last_tsdb_dt, - - storage=storage, - write_tsdb=True, - ) - ) - nulls_detected: trio.Event | None = None - if last_tsdb_dt is not None: - # calc the index from which the tsdb data should be - # prepended, presuming there is a gap between the - # latest frame (loaded/read above) and the latest - # sample loaded from the tsdb. - backfill_diff: Duration = mr_start_dt - last_tsdb_dt - offset_s: float = backfill_diff.in_seconds() - - # XXX EDGE CASEs: the most recent frame overlaps with - # prior tsdb history!! - # - so the latest frame's start time is earlier then - # the tsdb's latest sample. - # - alternatively this may also more generally occur - # when the venue was closed (say over the weeknd) - # causing a timeseries gap, AND the query frames size - # (eg. for ib's 1s we rx 2k datums ~= 33.33m) IS - # GREATER THAN the current venue-market's operating - # session (time) we will receive datums from BEFORE THE - # CLOSURE GAP and thus the `offset_s` value will be - # NEGATIVE! In this case we need to ensure we don't try - # to push datums that have already been recorded in the - # tsdb. In this case we instead only retreive and push - # the series portion missing from the db's data set. - # if offset_s < 0: - # non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt - # non_overlap_offset_s: float = backfill_diff.in_seconds() - - offset_samples: int = round(offset_s / timeframe) - - # TODO: see if there's faster multi-field reads: - # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields - # re-index with a `time` and index field - if offset_s > 0: - # NOTE XXX: ONLY when there is an actual gap - # between the earliest sample in the latest history - # frame do we want to NOT stick the latest tsdb - # history adjacent to that latest frame! - prepend_start = shm._first.value - offset_samples + 1 - to_push = tsdb_history[-prepend_start:] - else: - # when there is overlap we want to remove the - # overlapping samples from the tsdb portion (taking - # instead the latest frame's values since THEY - # SHOULD BE THE SAME) and prepend DIRECTLY adjacent - # to the latest frame! - # TODO: assert the overlap segment array contains - # the same values!?! - prepend_start = shm._first.value - to_push = tsdb_history[-(shm._first.value):offset_samples - 1] - - # tsdb history is so far in the past we can't fit it in - # shm buffer space so simply don't load it! - if prepend_start > 0: - shm.push( - to_push, - - # insert the history pre a "days worth" of samples - # to leave some real-time buffer space at the end. - prepend=True, - # update_first=False, - start=prepend_start, - field_map=storemod.ohlc_key_map, - ) - - log.info(f'Loaded {to_push.shape} datums from storage') - - # NOTE: ASYNC-conduct tsdb timestamp gap detection and backfill any - # seemingly missing (null-time) segments.. - # TODO: ideally these can never exist! - # -[ ] somehow it seems sometimes we're writing zero-ed - # segments to tsdbs during teardown? - # -[ ] can we ensure that the backcfiller tasks do this - # work PREVENTAVELY instead? - # -[ ] fill in non-zero epoch time values ALWAYS! - # await maybe_fill_null_segments( - nulls_detected: trio.Event = await tn.start(partial( - maybe_fill_null_segments, - - shm=shm, - timeframe=timeframe, - get_hist=get_hist, - sampler_stream=sampler_stream, - mkt=mkt, - )) - - # 2nd nursery END - - # TODO: who would want to? - if nulls_detected: - await nulls_detected.wait() - - await bf_done.wait() - # TODO: maybe start history anal and load missing "history - # gaps" via backend.. - - # if len(hist_shm.array) < 2: - # TODO: there's an edge case here to solve where if the last - # frame before market close (at least on ib) was pushed and - # there was only "1 new" row pushed from the first backfill - # query-iteration, then the sample step sizing calcs will - # break upstream from here since you can't diff on at least - # 2 steps... probably should also add logic to compute from - # the tsdb series and stash that somewhere as meta data on - # the shm buffer?.. no se. - - # backload any further data from tsdb (concurrently per - # timeframe) if not all data was able to be loaded (in memory) - # from the ``StorageClient.load()`` call above. - await trio.sleep_forever() - - # XXX NOTE: this is legacy from when we were using - # marketstore and we needed to continue backloading - # incrementally from the tsdb client.. (bc it couldn't - # handle a single large query with gRPC for some - # reason.. classic goolag pos) - # tn.start_soon( - # back_load_from_tsdb, - - # storemod, - # storage, - # fqme, - - # tsdb_history, - # last_tsdb_dt, - # mr_start_dt, - # mr_end_dt, - # bf_done, - - # timeframe, - # shm, - # ) - - -async def manage_history( - mod: ModuleType, - mkt: MktPair, - some_data_ready: trio.Event, - feed_is_live: trio.Event, - timeframe: float = 60, # in seconds - - task_status: TaskStatus[ - tuple[ShmArray, ShmArray] - ] = trio.TASK_STATUS_IGNORED, - -) -> None: - ''' - Load and manage historical data including the loading of any - available series from any connected tsdb as well as conduct - real-time update of both that existing db and the allocated - shared memory buffer. - - Init sequence: - - allocate shm (numpy array) buffers for 60s & 1s sample rates - - configure "zero index" for each buffer: the index where - history will prepended *to* and new live data will be - appened *from*. - - open a ``.storage.StorageClient`` and load any existing tsdb - history as well as (async) start a backfill task which loads - missing (newer) history from the data provider backend: - - tsdb history is loaded first and pushed to shm ASAP. - - the backfill task loads the most recent history before - unblocking its parent task, so that the `ShmArray._last` is - up to date to allow the OHLC sampler to begin writing new - samples as the correct buffer index once the provider feed - engages. - - ''' - # TODO: is there a way to make each shm file key - # actor-tree-discovery-addr unique so we avoid collisions - # when doing tests which also allocate shms for certain instruments - # that may be in use on the system by some other running daemons? - # from tractor._state import _runtime_vars - # port = _runtime_vars['_root_mailbox'][1] - - uid: tuple = tractor.current_actor().uid - name, uuid = uid - service: str = name.rstrip(f'.{mod.name}') - fqme: str = mkt.get_fqme(delim_char='') - - # (maybe) allocate shm array for this broker/symbol which will - # be used for fast near-term history capture and processing. - hist_shm, opened = maybe_open_shm_array( - size=_default_hist_size, - append_start_index=_hist_buffer_start, - - key=f'piker.{service}[{uuid[:16]}].{fqme}.hist', - - # use any broker defined ohlc dtype: - dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields), - - # we expect the sub-actor to write - readonly=False, - ) - hist_zero_index = hist_shm.index - 1 - - # TODO: history validation - if not opened: - raise RuntimeError( - "Persistent shm for sym was already open?!" - ) - - rt_shm, opened = maybe_open_shm_array( - size=_default_rt_size, - append_start_index=_rt_buffer_start, - key=f'piker.{service}[{uuid[:16]}].{fqme}.rt', - - # use any broker defined ohlc dtype: - dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields), - - # we expect the sub-actor to write - readonly=False, - ) - - # (for now) set the rt (hft) shm array with space to prepend - # only a few days worth of 1s history. - days: int = 2 - start_index: int = days*_secs_in_day - rt_shm._first.value = start_index - rt_shm._last.value = start_index - rt_zero_index = rt_shm.index - 1 - - if not opened: - raise RuntimeError( - "Persistent shm for sym was already open?!" - ) - - open_history_client = getattr( - mod, - 'open_history_client', - ) - assert open_history_client - - # TODO: maybe it should be a subpkg of `.data`? - from piker import storage - - async with ( - storage.open_storage_client() as (storemod, client), - - # NOTE: this nursery spawns a task per "timeframe" (aka - # sampling period) data set since normally differently - # sampled timeseries can be loaded / process independently - # ;) - tractor.trionics.collapse_eg(), - trio.open_nursery() as tn, - ): - log.info( - f'Connecting to storage backend `{storemod.name}`:\n' - f'location: {client.address}\n' - f'db cardinality: {client.cardinality}\n' - # TODO: show backend config, eg: - # - network settings - # - storage size with compression - # - number of loaded time series? - ) - - # NOTE: this call ONLY UNBLOCKS once the latest-most frame - # (i.e. history just before the live feed latest datum) of - # history has been loaded and written to the shm buffer: - # - the backfiller task can write in reverse chronological - # to the shm and tsdb - # - the tsdb data can be loaded immediately and the - # backfiller can do a single append from it's end datum and - # then prepends backward to that from the current time - # step. - tf2mem: dict = { - 1: rt_shm, - 60: hist_shm, - } - async with open_sample_stream( - period_s=1., - shms_by_period={ - 1.: rt_shm.token, - 60.: hist_shm.token, - }, - - # NOTE: we want to only open a stream for doing - # broadcasts on backfill operations, not receive the - # sample index-stream (since there's no code in this - # data feed layer that needs to consume it). - open_index_stream=True, - sub_for_broadcasts=False, - - ) as sample_stream: - # register 1s and 1m buffers with the global - # incrementer task - log.info(f'Connected to sampler stream: {sample_stream}') - - for timeframe in [60, 1]: - await tn.start(partial( - tsdb_backfill, - mod=mod, - storemod=storemod, - storage=client, - mkt=mkt, - shm=tf2mem[timeframe], - timeframe=timeframe, - sampler_stream=sample_stream, - )) - - # indicate to caller that feed can be delivered to - # remote requesting client since we've loaded history - # data that can be used. - some_data_ready.set() - - # wait for a live feed before starting the sampler. - await feed_is_live.wait() - - # yield back after client connect with filled shm - task_status.started(( - hist_zero_index, - hist_shm, - rt_zero_index, - rt_shm, - )) - - # history retreival loop depending on user interaction - # and thus a small RPC-prot for remotely controllinlg - # what data is loaded for viewing. - await trio.sleep_forever() - - -def iter_dfs_from_shms( - fqme: str -) -> Generator[ - tuple[Path, ShmArray, pl.DataFrame], - None, - None, -]: - # shm buffer size table based on known sample rates - sizes: dict[str, int] = { - 'hist': _default_hist_size, - 'rt': _default_rt_size, - } - - # load all detected shm buffer files which have the - # passed FQME pattern in the file name. - shmfiles: list[Path] = [] - shmdir = Path('/dev/shm/') - - for shmfile in shmdir.glob(f'*{fqme}*'): - filename: str = shmfile.name - - # skip index files - if ( - '_first' in filename - or '_last' in filename - ): - continue - - assert shmfile.is_file() - log.debug(f'Found matching shm buffer file: {filename}') - shmfiles.append(shmfile) - - for shmfile in shmfiles: - - # lookup array buffer size based on file suffix - # being either .rt or .hist - key: str = shmfile.name.rsplit('.')[-1] - - # skip FSP buffers for now.. - if key not in sizes: - continue - - size: int = sizes[key] - - # attach to any shm buffer, load array into polars df, - # write to local parquet file. - shm, opened = maybe_open_shm_array( - key=shmfile.name, - size=size, - dtype=def_iohlcv_fields, - readonly=True, - ) - assert not opened - ohlcv: np.ndarray = shm.array - df: pl.DataFrame = np2pl(ohlcv) - - yield ( - shmfile, - shm, - df, - ) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py new file mode 100644 index 00000000..a4ee04c2 --- /dev/null +++ b/piker/tsp/_history.py @@ -0,0 +1,1471 @@ +# piker: trading gear for hackers +# Copyright (C) Tyler Goodlet (in stewardship for pikers) + +# This program is free software: you can redistribute it and/or +# modify it under the terms of the GNU Affero General Public +# License as published by the Free Software Foundation, either +# version 3 of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public +# License along with this program. If not, see +# . + +''' +Historical TSP (time-series processing) lowlevel mgmt machinery and biz logic for, + +- hi-level biz logics using the `.storage` subpkg APIs for (I/O) + orchestration and mgmt of tsdb data sets. +- core data-provider history backfilling middleware (as task-funcs) via + (what will eventually be `datad`, but are rn is the) `.brokers` backend + APIs. +- various data set cleaning, repairing and issue-detection/analysis + routines to ensure consistent series whether in shm or when + stored offline (in a tsdb). + +''' +from __future__ import annotations +from datetime import datetime +from functools import partial +from pathlib import Path +from pprint import pformat +from types import ModuleType +from typing import ( + Callable, + Generator, + TYPE_CHECKING, +) + +import trio +from trio_typing import TaskStatus +import tractor +from pendulum import ( + Interval, + DateTime, + Duration, + duration as mk_duration, + from_timestamp, +) +import numpy as np +import polars as pl + +from piker.brokers import NoData +from piker.accounting import ( + MktPair, +) +from piker.data._util import ( + log, +) +from ..data._sharedmem import ( + maybe_open_shm_array, + ShmArray, +) +from ..data._source import def_iohlcv_fields +from ..data._sampling import ( + open_sample_stream, +) + + +from piker.brokers._util import ( + DataUnavailable, +) +from piker.storage import TimeseriesNotFound +from ._anal import ( + + dedupe, + get_null_segs, + iter_null_segs, + Frame, + + # codec-ish + np2pl as np2pl, + + # `polars` specific + # with_dts, + # sort_diff, + + # TODO, use this to correct conc-issues during backfill? + # detect_price_gaps, +) + +if TYPE_CHECKING: + from bidict import bidict + from ..service.marketstore import StorageClient + # from .feed import _FeedsBus + + +# `ShmArray` buffer sizing configuration: +_mins_in_day = int(60 * 24) +# how much is probably dependent on lifestyle +# but we reco a buncha times (but only on a +# run-every-other-day kinda week). +_secs_in_day = int(60 * _mins_in_day) +_days_in_week: int = 7 + +_days_worth: int = 3 +_default_hist_size: int = 6 * 365 * _mins_in_day +_hist_buffer_start = int( + _default_hist_size - round(7 * _mins_in_day) +) + +_default_rt_size: int = _days_worth * _secs_in_day +# NOTE: start the append index in rt buffer such that 1 day's worth +# can be appenened before overrun. +_rt_buffer_start = int((_days_worth - 1) * _secs_in_day) + + +def diff_history( + array: np.ndarray, + append_until_dt: datetime | None = None, + prepend_until_dt: datetime | None = None, + +) -> np.ndarray: + + # no diffing with tsdb dt index possible.. + if ( + prepend_until_dt is None + and append_until_dt is None + ): + return array + + times = array['time'] + + if append_until_dt: + return array[times < append_until_dt.timestamp()] + else: + return array[times >= prepend_until_dt.timestamp()] + + +# TODO: can't we just make this a sync func now? +async def shm_push_in_between( + shm: ShmArray, + to_push: np.ndarray, + prepend_index: int, + + update_start_on_prepend: bool = False, + +) -> int: + # XXX: extremely important, there can be no checkpoints + # in the body of this func to avoid entering new ``frames`` + # values while we're pipelining the current ones to + # memory... + shm.push( + to_push, + prepend=True, + + # XXX: only update the ._first index if no tsdb + # segment was previously prepended by the + # parent task. + update_first=update_start_on_prepend, + + # XXX: only prepend from a manually calculated shm + # index if there was already a tsdb history + # segment prepended (since then the + # ._first.value is going to be wayyy in the + # past!) + start=( + prepend_index + if not update_start_on_prepend + else None + ), + ) + + +async def maybe_fill_null_segments( + shm: ShmArray, + timeframe: float, + get_hist: Callable, + sampler_stream: tractor.MsgStream, + mkt: MktPair, + + task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, + +) -> list[Frame]: + + null_segs_detected = trio.Event() + task_status.started(null_segs_detected) + + frame: Frame = shm.array + + null_segs: tuple | None = get_null_segs( + frame, + period=timeframe, + ) + for ( + absi_start, absi_end, + fi_start, fi_end, + start_t, end_t, + start_dt, end_dt, + ) in iter_null_segs( + null_segs=null_segs, + frame=frame, + timeframe=timeframe, + ): + + # XXX NOTE: ?if we get a badly ordered timestamp + # pair, immediately stop backfilling? + if ( + start_dt + and + end_dt < start_dt + ): + await tractor.pause() + break + + ( + array, + next_start_dt, + next_end_dt, + ) = await get_hist( + timeframe, + start_dt=start_dt, + end_dt=end_dt, + ) + + # XXX TODO: pretty sure if i plot tsla, btcusdt.binance + # and mnq.cme.ib this causes a Qt crash XXDDD + + # make sure we don't overrun the buffer start + len_to_push: int = min(absi_end, array.size) + to_push: np.ndarray = array[-len_to_push:] + + await shm_push_in_between( + shm, + to_push, + prepend_index=absi_end, + update_start_on_prepend=False, + ) + # TODO: UI side needs IPC event to update.. + # - make sure the UI actually always handles + # this update! + # - remember that in the display side, only refersh this + # if the respective history is actually "in view". + # loop + try: + await sampler_stream.send({ + 'broadcast_all': { + + # XXX NOTE XXX: see the + # `.ui._display.increment_history_view()` if block + # that looks for this info to FORCE a hard viz + # redraw! + 'backfilling': (mkt.fqme, timeframe), + }, + }) + except tractor.ContextCancelled: + # log.exception + await tractor.pause() + raise + + null_segs_detected.set() + # RECHECK for more null-gaps + frame: Frame = shm.array + null_segs: tuple | None = get_null_segs( + frame, + period=timeframe, + ) + if ( + null_segs + and + len(null_segs[-1]) + ): + ( + iabs_slices, + iabs_zero_rows, + zero_t, + ) = null_segs + log.warning( + f'{len(iabs_slices)} NULL TIME SEGMENTS DETECTED!\n' + f'{pformat(iabs_slices)}' + ) + + # TODO: always backfill gaps with the earliest (price) datum's + # value to avoid the y-ranger including zeros and completely + # stretching the y-axis.. + # array: np.ndarray = shm.array + # zeros = array[array['low'] == 0] + ohlc_fields: list[str] = [ + 'open', + 'high', + 'low', + 'close', + ] + + for istart, istop in iabs_slices: + + # get view into buffer for null-segment + gap: np.ndarray = shm._array[istart:istop] + + # copy the oldest OHLC samples forward + cls: float = shm._array[istart]['close'] + + # TODO: how can we mark this range as being a gap tho? + # -[ ] maybe pg finally supports nulls in ndarray to + # show empty space somehow? + # -[ ] we could put a special value in the vlm or + # another col/field to denote? + gap[ohlc_fields] = cls + + start_t: float = shm._array[istart]['time'] + t_diff: float = (istop - istart)*timeframe + + gap['time'] = np.arange( + start=start_t, + stop=start_t + t_diff, + step=timeframe, + ) + + # TODO: reimpl using the new `.ui._remote_ctl` ctx + # ideally using some kinda decent + # tractory-reverse-lookup-connnection from some other + # `Context` type thingy? + await sampler_stream.send({ + 'broadcast_all': { + + # XXX NOTE XXX: see the + # `.ui._display.increment_history_view()` if block + # that looks for this info to FORCE a hard viz + # redraw! + 'backfilling': (mkt.fqme, timeframe), + }, + }) + + # TODO: interatively step through any remaining + # time-gaps/null-segments and spawn piecewise backfiller + # tasks in a nursery? + # -[ ] not sure that's going to work so well on the ib + # backend but worth a shot? + # -[ ] mk new history connections to make it properly + # parallel possible no matter the backend? + # -[ ] fill algo: do queries in alternating "latest, then + # earliest, then latest.. etc?" + + +async def start_backfill( + get_hist, + def_frame_duration: Duration, + mod: ModuleType, + mkt: MktPair, + shm: ShmArray, + timeframe: float, + + backfill_from_shm_index: int, + backfill_from_dt: datetime, + + sampler_stream: tractor.MsgStream, + + backfill_until_dt: datetime | None = None, + storage: StorageClient | None = None, + + write_tsdb: bool = True, + + task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED, + +) -> int: + + # let caller unblock and deliver latest history frame + # and use to signal that backfilling the shm gap until + # the tsdb end is complete! + bf_done = trio.Event() + task_status.started(bf_done) + + # based on the sample step size, maybe load a certain amount history + update_start_on_prepend: bool = False + if backfill_until_dt is None: + + # TODO: per-provider default history-durations? + # -[ ] inside the `open_history_client()` config allow + # declaring the history duration limits instead of + # guessing and/or applying the same limits to all? + # + # -[ ] allow declaring (default) per-provider backfill + # limits inside a [storage] sub-section in conf.toml? + # + # NOTE, when no tsdb "last datum" is provided, we just + # load some near-term history by presuming a "decently + # large" 60s duration limit and a much shorter 1s range. + periods = { + 1: {'days': 2}, + 60: {'years': 6}, + } + period_duration: int = periods[timeframe] + update_start_on_prepend: bool = True + + # NOTE: manually set the "latest" datetime which we intend to + # backfill history "until" so as to adhere to the history + # settings above when the tsdb is detected as being empty. + backfill_until_dt = backfill_from_dt.subtract(**period_duration) + + # STAGE NOTE: "backward history gap filling": + # - we push to the shm buffer until we have history back + # until the latest entry loaded from the tsdb's table B) + # - after this loop continue to check for other gaps in the + # (tsdb) history and (at least report) maybe fill them + # from new frame queries to the backend? + last_start_dt: datetime = backfill_from_dt + next_prepend_index: int = backfill_from_shm_index + + while last_start_dt > backfill_until_dt: + log.info( + f'Requesting {timeframe}s frame:\n' + f'backfill_until_dt: {backfill_until_dt}\n' + f'last_start_dt: {last_start_dt}\n' + ) + try: + ( + array, + next_start_dt, + next_end_dt, + ) = await get_hist( + timeframe, + end_dt=last_start_dt, + ) + except NoData as _daterr: + orig_last_start_dt: datetime = last_start_dt + gap_report: str = ( + f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n' + f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n' + f'last_start_dt: {orig_last_start_dt}\n\n' + f'bf_until: {backfill_until_dt}\n' + ) + # EMPTY FRAME signal with 3 (likely) causes: + # + # 1. range contains legit gap in venue history + # 2. history actually (edge case) **began** at the + # value `last_start_dt` + # 3. some other unknown error (ib blocking the + # history-query bc they don't want you seeing how + # they cucked all the tinas.. like with options + # hist) + # + if def_frame_duration: + # decrement by a duration's (frame) worth of time + # as maybe indicated by the backend to see if we + # can get older data before this possible + # "history gap". + last_start_dt: datetime = last_start_dt.subtract( + seconds=def_frame_duration.total_seconds() + ) + gap_report += ( + f'Decrementing `end_dt` and retrying with,\n' + f'def_frame_duration: {def_frame_duration}\n' + f'(new) last_start_dt: {last_start_dt}\n' + ) + log.warning(gap_report) + # skip writing to shm/tsdb and try the next + # duration's worth of prior history. + continue + + else: + # await tractor.pause() + raise DataUnavailable(gap_report) + + # broker says there never was or is no more history to pull + except DataUnavailable as due: + message: str = due.args[0] + log.warning( + f'Provider {mod.name!r} halted backfill due to,\n\n' + + f'{message}\n' + + f'fqme: {mkt.fqme}\n' + f'timeframe: {timeframe}\n' + f'last_start_dt: {last_start_dt}\n' + f'bf_until: {backfill_until_dt}\n' + ) + # UGH: what's a better way? + # TODO: backends are responsible for being correct on + # this right!? + # -[ ] in the `ib` case we could maybe offer some way + # to halt the request loop until the condition is + # resolved or should the backend be entirely in + # charge of solving such faults? yes, right? + return + + time: np.ndarray = array['time'] + assert ( + time[0] + == + next_start_dt.timestamp() + ) + + assert time[-1] == next_end_dt.timestamp() + + expected_dur: Interval = last_start_dt - next_start_dt + + # frame's worth of sample-period-steps, in seconds + frame_size_s: float = len(array) * timeframe + recv_frame_dur: Duration = ( + from_timestamp(array[-1]['time']) + - + from_timestamp(array[0]['time']) + ) + if ( + (lt_frame := (recv_frame_dur < expected_dur)) + or + (null_frame := (frame_size_s == 0)) + # ^XXX, should NEVER hit now! + ): + # XXX: query result includes a start point prior to our + # expected "frame size" and thus is likely some kind of + # history gap (eg. market closed period, outage, etc.) + # so just report it to console for now. + if lt_frame: + reason = 'Possible GAP (or first-datum)' + else: + assert null_frame + reason = 'NULL-FRAME' + + missing_dur: Interval = expected_dur.end - recv_frame_dur.end + log.warning( + f'{timeframe}s-series {reason} detected!\n' + f'fqme: {mkt.fqme}\n' + f'last_start_dt: {last_start_dt}\n\n' + f'recv interval: {recv_frame_dur}\n' + f'expected interval: {expected_dur}\n\n' + + f'Missing duration of history of {missing_dur.in_words()!r}\n' + f'{missing_dur}\n' + ) + # await tractor.pause() + + to_push = diff_history( + array, + prepend_until_dt=backfill_until_dt, + ) + ln: int = len(to_push) + if ln: + log.info( + f'{ln} bars for {next_start_dt} -> {last_start_dt}' + ) + + else: + log.warning( + '0 BARS TO PUSH after diff!?\n' + f'{next_start_dt} -> {last_start_dt}' + ) + + # bail gracefully on shm allocation overrun/full + # condition + try: + await shm_push_in_between( + shm, + to_push, + prepend_index=next_prepend_index, + update_start_on_prepend=update_start_on_prepend, + ) + await sampler_stream.send({ + 'broadcast_all': { + 'backfilling': (mkt.fqme, timeframe), + }, + }) + + # decrement next prepend point + next_prepend_index = next_prepend_index - ln + last_start_dt = next_start_dt + + except ValueError as ve: + _ve = ve + log.error( + f'Shm prepend OVERRUN on: {next_start_dt} -> {last_start_dt}?' + ) + + if next_prepend_index < ln: + log.warning( + f'Shm buffer can only hold {next_prepend_index} more rows..\n' + f'Appending those from recent {ln}-sized frame, no more!' + ) + + to_push = to_push[-next_prepend_index + 1:] + await shm_push_in_between( + shm, + to_push, + prepend_index=next_prepend_index, + update_start_on_prepend=update_start_on_prepend, + ) + await sampler_stream.send({ + 'broadcast_all': { + 'backfilling': (mkt.fqme, timeframe), + }, + }) + + # can't push the entire frame? so + # push only the amount that can fit.. + break + + log.info( + f'Shm pushed {ln} frame:\n' + f'{next_start_dt} -> {last_start_dt}' + ) + + # FINALLY, maybe write immediately to the tsdb backend for + # long-term storage. + if ( + storage is not None + and + write_tsdb + ): + log.info( + f'Writing {ln} frame to storage:\n' + f'{next_start_dt} -> {last_start_dt}' + ) + + # NOTE, always drop the src asset token for + # non-currency-pair like market types (for now) + # + # THAT IS, for now our table key schema is NOT + # including the dst[/src] source asset token. SO, + # 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for + # historical reasons ONLY. + if mkt.dst.atype not in { + 'crypto', + 'crypto_currency', + 'fiat', # a "forex pair" + 'perpetual_future', # stupid "perps" from cex land + }: + col_sym_key: str = mkt.get_fqme( + delim_char='', + without_src=True, + ) + else: + col_sym_key: str = mkt.get_fqme( + delim_char='', + ) + + await storage.write_ohlcv( + col_sym_key, + shm.array, + timeframe, + ) + df: pl.DataFrame = await storage.as_df( + fqme=mkt.fqme, + period=timeframe, + load_from_offline=False, + ) + ( + wdts, + deduped, + diff, + ) = dedupe(df) + # if diff: + # sort_diff(df) + + else: + # finally filled gap + log.info( + f'Finished filling gap to tsdb start @ {backfill_until_dt}!' + ) + + # XXX: extremely important, there can be no checkpoints + # in the block above to avoid entering new ``frames`` + # values while we're pipelining the current ones to + # memory... + # await sampler_stream.send('broadcast_all') + + # short-circuit (for now) + bf_done.set() + + +# NOTE: originally this was used to cope with a tsdb (marketstore) +# which could not delivery very large frames of history over gRPC +# (thanks goolag) due to corruption issues. +# +# NOW, using apache parquet (by default in the local filesys) we +# don't have this requirement since the files can be loaded very +# quickly in entirety to memory via `polars.read_parquet()`. +# +async def back_load_from_tsdb( + storemod: ModuleType, + storage: StorageClient, + + fqme: str, + + tsdb_history: np.ndarray, + + last_tsdb_dt: datetime, + latest_start_dt: datetime, + latest_end_dt: datetime, + + bf_done: trio.Event, + + timeframe: int, + shm: ShmArray, +): + assert len(tsdb_history) + + # sync to backend history task's query/load completion + # if bf_done: + # await bf_done.wait() + + # TODO: eventually it'd be nice to not require a shm array/buffer + # to accomplish this.. maybe we can do some kind of tsdb direct to + # graphics format eventually in a child-actor? + if storemod.name == 'nativedb': + return + + await tractor.pause() + assert shm._first.value == 0 + + array = shm.array + + # if timeframe == 1: + # times = shm.array['time'] + # assert (times[1] - times[0]) == 1 + + if len(array): + shm_last_dt = from_timestamp( + shm.array[0]['time'] + ) + else: + shm_last_dt = None + + if last_tsdb_dt: + assert shm_last_dt >= last_tsdb_dt + + # do diff against start index of last frame of history and only + # fill in an amount of datums from tsdb allows for most recent + # to be loaded into mem *before* tsdb data. + if ( + last_tsdb_dt + and latest_start_dt + ): + backfilled_size_s: Duration = ( + latest_start_dt - last_tsdb_dt + ).seconds + # if the shm buffer len is not large enough to contain + # all missing data between the most recent backend-queried frame + # and the most recent dt-index in the db we warn that we only + # want to load a portion of the next tsdb query to fill that + # space. + log.info( + f'{backfilled_size_s} seconds worth of {timeframe}s loaded' + ) + + # Load TSDB history into shm buffer (for display) if there is + # remaining buffer space. + + time_key: str = 'time' + if getattr(storemod, 'ohlc_key_map', False): + keymap: bidict = storemod.ohlc_key_map + time_key: str = keymap.inverse['time'] + + # if ( + # not len(tsdb_history) + # ): + # return + + tsdb_last_frame_start: datetime = last_tsdb_dt + # load as much from storage into shm possible (depends on + # user's shm size settings). + while shm._first.value > 0: + + tsdb_history = await storage.read_ohlcv( + fqme, + timeframe=timeframe, + end=tsdb_last_frame_start, + ) + + # # empty query + # if not len(tsdb_history): + # break + + next_start = tsdb_history[time_key][0] + if next_start >= tsdb_last_frame_start: + # no earlier data detected + break + + else: + tsdb_last_frame_start = next_start + + # TODO: see if there's faster multi-field reads: + # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields + # re-index with a `time` and index field + prepend_start = shm._first.value + + to_push = tsdb_history[-prepend_start:] + shm.push( + to_push, + + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + # update_first=False, + # start=prepend_start, + field_map=storemod.ohlc_key_map, + ) + + log.info(f'Loaded {to_push.shape} datums from storage') + tsdb_last_frame_start = tsdb_history[time_key][0] + + # manually trigger step update to update charts/fsps + # which need an incremental update. + # NOTE: the way this works is super duper + # un-intuitive right now: + # - the broadcaster fires a msg to the fsp subsystem. + # - fsp subsys then checks for a sample step diff and + # possibly recomputes prepended history. + # - the fsp then sends back to the parent actor + # (usually a chart showing graphics for said fsp) + # which tells the chart to conduct a manual full + # graphics loop cycle. + # await sampler_stream.send('broadcast_all') + + +async def push_latest_frame( + # box-type only that should get packed with the datetime + # objects received for the latest history frame + dt_eps: list[DateTime, DateTime], + shm: ShmArray, + get_hist: Callable[ + [int, datetime, datetime], + tuple[np.ndarray, str] + ], + timeframe: float, + config: dict, + + task_status: TaskStatus[ + Exception | list[datetime, datetime] + ] = trio.TASK_STATUS_IGNORED, + +) -> list[datetime, datetime] | None: + # get latest query's worth of history all the way + # back to what is recorded in the tsdb + try: + ( + array, + mr_start_dt, + mr_end_dt, + ) = await get_hist( + timeframe, + end_dt=None, + ) + # so caller can access these ep values + dt_eps.extend([ + mr_start_dt, + mr_end_dt, + ]) + task_status.started(dt_eps) + + # XXX: timeframe not supported for backend (since + # above exception type), terminate immediately since + # there's no backfilling possible. + except DataUnavailable: + task_status.started(None) + + if timeframe > 1: + await tractor.pause() + + # prolly tf not supported + return None + + # NOTE: on the first history, most recent history + # frame we PREPEND from the current shm ._last index + # and thus a gap between the earliest datum loaded here + # and the latest loaded from the tsdb may exist! + log.info(f'Pushing {array.size} to shm!') + shm.push( + array, + prepend=True, # append on first frame + ) + + return dt_eps + + +async def load_tsdb_hist( + storage: StorageClient, + mkt: MktPair, + timeframe: float, + + task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED, + +) -> tuple[ + np.ndarray, + DateTime, + DateTime, +] | None: + # loads a (large) frame of data from the tsdb depending + # on the db's query size limit; our "nativedb" (using + # parquet) generally can load the entire history into mem + # but if not then below the remaining history can be lazy + # loaded? + fqme: str = mkt.fqme + tsdb_entry: tuple[ + np.ndarray, + DateTime, + DateTime, + ] + try: + tsdb_entry: tuple | None = await storage.load( + fqme, + timeframe=timeframe, + ) + return tsdb_entry + + except TimeseriesNotFound: + log.warning( + f'No timeseries yet for {timeframe}@{fqme}' + ) + return None + + +async def tsdb_backfill( + mod: ModuleType, + storemod: ModuleType, + + storage: StorageClient, + mkt: MktPair, + shm: ShmArray, + timeframe: float, + + sampler_stream: tractor.MsgStream, + + task_status: TaskStatus[ + tuple[ShmArray, ShmArray] + ] = trio.TASK_STATUS_IGNORED, + +) -> None: + + if timeframe not in (1, 60): + raise ValueError( + '`piker` only needs to support 1m and 1s sampling ' + 'but ur api is trying to deliver a longer ' + f'timeframe of {timeframe} seconds..\n' + 'So yuh.. dun do dat brudder.' + ) + + get_hist: Callable[ + [int, datetime, datetime], + tuple[np.ndarray, str] + ] + config: dict[str, int] + async with ( + mod.open_history_client( + mkt, + ) as (get_hist, config), + + # NOTE: this sub-nursery splits to tasks for the given + # sampling rate to concurrently load offline tsdb + # timeseries as well as new data from the venue backend! + ): + log.info( + f'`{mod}` history client returned backfill config:\n' + f'{pformat(config)}\n' + ) + + # concurrently load the provider's most-recent-frame AND any + # pre-existing tsdb history already saved in `piker` storage. + dt_eps: list[DateTime, DateTime] = [] + async with ( + tractor.trionics.collapse_eg(), + trio.open_nursery() as tn + ): + tn.start_soon( + push_latest_frame, + dt_eps, + shm, + get_hist, + timeframe, + config, + ) + tsdb_entry: tuple = await load_tsdb_hist( + storage, + mkt, + timeframe, + ) + + # tell parent task to continue + # TODO: really we'd want this the other way with the + # tsdb load happening asap and the since the latest + # frame query will normally be the main source of + # latency? + task_status.started() + + # NOTE: iabs to start backfilling from, reverse chronological, + # ONLY AFTER the first history frame has been pushed to + # mem! + backfill_gap_from_shm_index: int = shm._first.value + 1 + + # Prepend any tsdb history into the rt-shm-buffer which + # should NOW be getting filled with the most recent history + # pulled from the data-backend. + if dt_eps: + # well then, unpack the latest (gap) backfilled frame dts + ( + mr_start_dt, + mr_end_dt, + ) = dt_eps + + first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds + calced_frame_size: Duration = mk_duration( + seconds=first_frame_dur_s, + ) + # NOTE, attempt to use the backend declared default frame + # sizing (as allowed by their time-series query APIs) and + # if not provided try to construct a default from the + # first frame received above. + def_frame_durs: dict[ + int, + Duration, + ]|None = config.get('frame_types', None) + + if def_frame_durs: + def_frame_size: Duration = def_frame_durs[timeframe] + + if def_frame_size != calced_frame_size: + log.warning( + f'Expected frame size {def_frame_size}\n' + f'Rxed frame {calced_frame_size}\n' + ) + # await tractor.pause() + else: + # use what we calced from first frame above. + def_frame_size = calced_frame_size + + # NOTE: when there's no offline data, there's 2 cases: + # - data backend doesn't support timeframe/sample + # period (in which case `dt_eps` should be `None` and + # we shouldn't be here!), or + # - no prior history has been stored (yet) and we need + # todo full backfill of the history now. + if tsdb_entry is None: + # indicate to backfill task to fill the whole + # shm buffer as much as it can! + last_tsdb_dt = None + + # there's existing tsdb history from (offline) storage + # so only backfill the gap between the + # most-recent-frame (mrf) and that latest sample. + else: + ( + tsdb_history, + first_tsdb_dt, + last_tsdb_dt, + ) = tsdb_entry + + # if there is a gap to backfill from the first + # history frame until the last datum loaded from the tsdb + # continue that now in the background + async with trio.open_nursery( + strict_exception_groups=False, + ) as tn: + + bf_done = await tn.start( + partial( + start_backfill, + get_hist=get_hist, + def_frame_duration=def_frame_size, + mod=mod, + mkt=mkt, + shm=shm, + timeframe=timeframe, + + backfill_from_shm_index=backfill_gap_from_shm_index, + backfill_from_dt=mr_start_dt, + + sampler_stream=sampler_stream, + backfill_until_dt=last_tsdb_dt, + + storage=storage, + write_tsdb=True, + ) + ) + nulls_detected: trio.Event | None = None + if last_tsdb_dt is not None: + # calc the index from which the tsdb data should be + # prepended, presuming there is a gap between the + # latest frame (loaded/read above) and the latest + # sample loaded from the tsdb. + backfill_diff: Duration = mr_start_dt - last_tsdb_dt + offset_s: float = backfill_diff.in_seconds() + + # XXX EDGE CASEs: the most recent frame overlaps with + # prior tsdb history!! + # - so the latest frame's start time is earlier then + # the tsdb's latest sample. + # - alternatively this may also more generally occur + # when the venue was closed (say over the weeknd) + # causing a timeseries gap, AND the query frames size + # (eg. for ib's 1s we rx 2k datums ~= 33.33m) IS + # GREATER THAN the current venue-market's operating + # session (time) we will receive datums from BEFORE THE + # CLOSURE GAP and thus the `offset_s` value will be + # NEGATIVE! In this case we need to ensure we don't try + # to push datums that have already been recorded in the + # tsdb. In this case we instead only retreive and push + # the series portion missing from the db's data set. + # if offset_s < 0: + # non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt + # non_overlap_offset_s: float = backfill_diff.in_seconds() + + offset_samples: int = round(offset_s / timeframe) + + # TODO: see if there's faster multi-field reads: + # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields + # re-index with a `time` and index field + if offset_s > 0: + # NOTE XXX: ONLY when there is an actual gap + # between the earliest sample in the latest history + # frame do we want to NOT stick the latest tsdb + # history adjacent to that latest frame! + prepend_start = shm._first.value - offset_samples + 1 + to_push = tsdb_history[-prepend_start:] + else: + # when there is overlap we want to remove the + # overlapping samples from the tsdb portion (taking + # instead the latest frame's values since THEY + # SHOULD BE THE SAME) and prepend DIRECTLY adjacent + # to the latest frame! + # TODO: assert the overlap segment array contains + # the same values!?! + prepend_start = shm._first.value + to_push = tsdb_history[-(shm._first.value):offset_samples - 1] + + # tsdb history is so far in the past we can't fit it in + # shm buffer space so simply don't load it! + if prepend_start > 0: + shm.push( + to_push, + + # insert the history pre a "days worth" of samples + # to leave some real-time buffer space at the end. + prepend=True, + # update_first=False, + start=prepend_start, + field_map=storemod.ohlc_key_map, + ) + + log.info(f'Loaded {to_push.shape} datums from storage') + + # NOTE: ASYNC-conduct tsdb timestamp gap detection and backfill any + # seemingly missing (null-time) segments.. + # TODO: ideally these can never exist! + # -[ ] somehow it seems sometimes we're writing zero-ed + # segments to tsdbs during teardown? + # -[ ] can we ensure that the backcfiller tasks do this + # work PREVENTAVELY instead? + # -[ ] fill in non-zero epoch time values ALWAYS! + # await maybe_fill_null_segments( + nulls_detected: trio.Event = await tn.start(partial( + maybe_fill_null_segments, + + shm=shm, + timeframe=timeframe, + get_hist=get_hist, + sampler_stream=sampler_stream, + mkt=mkt, + )) + + # 2nd nursery END + + # TODO: who would want to? + if nulls_detected: + await nulls_detected.wait() + + await bf_done.wait() + # TODO: maybe start history anal and load missing "history + # gaps" via backend.. + + # if len(hist_shm.array) < 2: + # TODO: there's an edge case here to solve where if the last + # frame before market close (at least on ib) was pushed and + # there was only "1 new" row pushed from the first backfill + # query-iteration, then the sample step sizing calcs will + # break upstream from here since you can't diff on at least + # 2 steps... probably should also add logic to compute from + # the tsdb series and stash that somewhere as meta data on + # the shm buffer?.. no se. + + # backload any further data from tsdb (concurrently per + # timeframe) if not all data was able to be loaded (in memory) + # from the ``StorageClient.load()`` call above. + await trio.sleep_forever() + + # XXX NOTE: this is legacy from when we were using + # marketstore and we needed to continue backloading + # incrementally from the tsdb client.. (bc it couldn't + # handle a single large query with gRPC for some + # reason.. classic goolag pos) + # tn.start_soon( + # back_load_from_tsdb, + + # storemod, + # storage, + # fqme, + + # tsdb_history, + # last_tsdb_dt, + # mr_start_dt, + # mr_end_dt, + # bf_done, + + # timeframe, + # shm, + # ) + + +async def manage_history( + mod: ModuleType, + mkt: MktPair, + some_data_ready: trio.Event, + feed_is_live: trio.Event, + timeframe: float = 60, # in seconds + + task_status: TaskStatus[ + tuple[ShmArray, ShmArray] + ] = trio.TASK_STATUS_IGNORED, + +) -> None: + ''' + Load historical series data from offline-storage (tsdb) and any + missing new datums from data provider(s). + + This is the primary "backfilling service" `trio.Task` entrypoint + and conducts, + + - time-series retreival for offline-data previously stored in + any (connected) tsdb, + + - queries for missing new datums (compared with the latest found + from ^) onward to the present by pulling from available + `datad`-provider backends. + + - real-time update of both the existing tsdb-records and the + allocated shared-memory-buffer as required by downstream + `piker.data`-layer consumer-wares. + + Init sequence: + ------------- + - allocate shm (numpy array) buffers for 60s & 1s sample rates + - configure "zero index" for each buffer: the index where + history will prepended *to* and new live data will be + appened *from*. + - open a ``.storage.StorageClient`` and load any existing tsdb + history as well as (async) start a backfill task which loads + missing (newer) history from the data provider backend: + - tsdb history is loaded first and pushed to shm ASAP. + - the backfill task loads the most recent history before + unblocking its parent task, so that the `ShmArray._last` is + up to date to allow the OHLC sampler to begin writing new + samples as the correct buffer index once the provider feed + engages. + + ''' + # TODO: is there a way to make each shm file key + # actor-tree-discovery-addr unique so we avoid collisions + # when doing tests which also allocate shms for certain instruments + # that may be in use on the system by some other running daemons? + # from tractor._state import _runtime_vars + # port = _runtime_vars['_root_mailbox'][1] + + uid: tuple = tractor.current_actor().uid + name, uuid = uid + service: str = name.rstrip(f'.{mod.name}') + fqme: str = mkt.get_fqme(delim_char='') + + # (maybe) allocate shm array for this broker/symbol which will + # be used for fast near-term history capture and processing. + hist_shm, opened = maybe_open_shm_array( + size=_default_hist_size, + append_start_index=_hist_buffer_start, + + key=f'piker.{service}[{uuid[:16]}].{fqme}.hist', + + # use any broker defined ohlc dtype: + dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields), + + # we expect the sub-actor to write + readonly=False, + ) + hist_zero_index = hist_shm.index - 1 + + # TODO: history validation + if not opened: + raise RuntimeError( + "Persistent shm for sym was already open?!" + ) + + rt_shm, opened = maybe_open_shm_array( + size=_default_rt_size, + append_start_index=_rt_buffer_start, + key=f'piker.{service}[{uuid[:16]}].{fqme}.rt', + + # use any broker defined ohlc dtype: + dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields), + + # we expect the sub-actor to write + readonly=False, + ) + + # (for now) set the rt (hft) shm array with space to prepend + # only a few days worth of 1s history. + days: int = 2 + start_index: int = days*_secs_in_day + rt_shm._first.value = start_index + rt_shm._last.value = start_index + rt_zero_index = rt_shm.index - 1 + + if not opened: + raise RuntimeError( + "Persistent shm for sym was already open?!" + ) + + open_history_client = getattr( + mod, + 'open_history_client', + ) + assert open_history_client + + # TODO: maybe it should be a subpkg of `.data`? + from piker import storage + + async with ( + storage.open_storage_client() as (storemod, client), + + # NOTE: this nursery spawns a task per "timeframe" (aka + # sampling period) data set since normally differently + # sampled timeseries can be loaded / process independently + # ;) + tractor.trionics.collapse_eg(), + trio.open_nursery() as tn, + ): + log.info( + f'Connecting to storage backend `{storemod.name}`:\n' + f'location: {client.address}\n' + f'db cardinality: {client.cardinality}\n' + # TODO: show backend config, eg: + # - network settings + # - storage size with compression + # - number of loaded time series? + ) + + # NOTE: this call ONLY UNBLOCKS once the latest-most frame + # (i.e. history just before the live feed latest datum) of + # history has been loaded and written to the shm buffer: + # - the backfiller task can write in reverse chronological + # to the shm and tsdb + # - the tsdb data can be loaded immediately and the + # backfiller can do a single append from it's end datum and + # then prepends backward to that from the current time + # step. + tf2mem: dict = { + 1: rt_shm, + 60: hist_shm, + } + async with open_sample_stream( + period_s=1., + shms_by_period={ + 1.: rt_shm.token, + 60.: hist_shm.token, + }, + + # NOTE: we want to only open a stream for doing + # broadcasts on backfill operations, not receive the + # sample index-stream (since there's no code in this + # data feed layer that needs to consume it). + open_index_stream=True, + sub_for_broadcasts=False, + + ) as sample_stream: + # register 1s and 1m buffers with the global + # incrementer task + log.info(f'Connected to sampler stream: {sample_stream}') + + for timeframe in [60, 1]: + await tn.start(partial( + tsdb_backfill, + mod=mod, + storemod=storemod, + storage=client, + mkt=mkt, + shm=tf2mem[timeframe], + timeframe=timeframe, + sampler_stream=sample_stream, + )) + + # indicate to caller that feed can be delivered to + # remote requesting client since we've loaded history + # data that can be used. + some_data_ready.set() + + # wait for a live feed before starting the sampler. + await feed_is_live.wait() + + # yield back after client connect with filled shm + task_status.started(( + hist_zero_index, + hist_shm, + rt_zero_index, + rt_shm, + )) + + # history retreival loop depending on user interaction + # and thus a small RPC-prot for remotely controllinlg + # what data is loaded for viewing. + await trio.sleep_forever() + + +def iter_dfs_from_shms( + fqme: str +) -> Generator[ + tuple[Path, ShmArray, pl.DataFrame], + None, + None, +]: + # shm buffer size table based on known sample rates + sizes: dict[str, int] = { + 'hist': _default_hist_size, + 'rt': _default_rt_size, + } + + # load all detected shm buffer files which have the + # passed FQME pattern in the file name. + shmfiles: list[Path] = [] + shmdir = Path('/dev/shm/') + + for shmfile in shmdir.glob(f'*{fqme}*'): + filename: str = shmfile.name + + # skip index files + if ( + '_first' in filename + or '_last' in filename + ): + continue + + assert shmfile.is_file() + log.debug(f'Found matching shm buffer file: {filename}') + shmfiles.append(shmfile) + + for shmfile in shmfiles: + + # lookup array buffer size based on file suffix + # being either .rt or .hist + key: str = shmfile.name.rsplit('.')[-1] + + # skip FSP buffers for now.. + if key not in sizes: + continue + + size: int = sizes[key] + + # attach to any shm buffer, load array into polars df, + # write to local parquet file. + shm, opened = maybe_open_shm_array( + key=shmfile.name, + size=size, + dtype=def_iohlcv_fields, + readonly=True, + ) + assert not opened + ohlcv: np.ndarray = shm.array + df: pl.DataFrame = np2pl(ohlcv) + + yield ( + shmfile, + shm, + df, + ) From b5e4c83341dd85afa646e4f8825f0a7d612ceccf Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 2 Oct 2025 14:14:28 -0400 Subject: [PATCH 02/44] Woops, keep `np2pl` exposed from `.tsp` --- piker/tsp/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/piker/tsp/__init__.py b/piker/tsp/__init__.py index 3c49e71b..1df0a554 100644 --- a/piker/tsp/__init__.py +++ b/piker/tsp/__init__.py @@ -35,6 +35,7 @@ from ._anal import ( dedupe as dedupe, detect_time_gaps as detect_time_gaps, pl2np as pl2np, + np2pl as np2pl, # `numpy` only slice_from_time as slice_from_time, From b0953ecbee2d80fb7ee735129cc0acc71eccb986 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 2 Oct 2025 19:53:08 -0400 Subject: [PATCH 03/44] `.tsp._history`: drop `feed_is_live` syncing, another seg flag The `await feed_is_live.wait()` is more or less pointless and would only cause slower startup afaig (as-far-as-i-grok) so i'm masking it here. This also removes the final `strict_exception_groups=False` use from the non-tests code base, flipping to the `tractor.trionics` collapser once and for all! --- piker/tsp/_history.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index a4ee04c2..b6b15e72 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -886,7 +886,7 @@ async def load_tsdb_hist( np.ndarray, DateTime, DateTime, -] | None: +]|None: # loads a (large) frame of data from the tsdb depending # on the db's query size limit; our "nativedb" (using # parquet) generally can load the entire history into mem @@ -899,7 +899,7 @@ async def load_tsdb_hist( DateTime, ] try: - tsdb_entry: tuple | None = await storage.load( + tsdb_entry: tuple|None = await storage.load( fqme, timeframe=timeframe, ) @@ -1046,12 +1046,15 @@ async def tsdb_backfill( last_tsdb_dt, ) = tsdb_entry + # await tractor.pause() + # if there is a gap to backfill from the first # history frame until the last datum loaded from the tsdb # continue that now in the background - async with trio.open_nursery( - strict_exception_groups=False, - ) as tn: + async with ( + tractor.trionics.collapse_eg(), + trio.open_nursery() as tn, + ): bf_done = await tn.start( partial( @@ -1322,8 +1325,14 @@ async def manage_history( # TODO: maybe it should be a subpkg of `.data`? from piker import storage + storemod: ModuleType + client: StorageClient + tn: trio.Nursery async with ( - storage.open_storage_client() as (storemod, client), + storage.open_storage_client() as ( + storemod, + client, + ), # NOTE: this nursery spawns a task per "timeframe" (aka # sampling period) data set since normally differently @@ -1392,7 +1401,7 @@ async def manage_history( some_data_ready.set() # wait for a live feed before starting the sampler. - await feed_is_live.wait() + # await feed_is_live.wait() # yield back after client connect with filled shm task_status.started(( From f4d9090d6d679611de68372adf9ad67c704b6928 Mon Sep 17 00:00:00 2001 From: Tyler Goodlet Date: Thu, 9 Oct 2025 20:00:05 -0400 Subject: [PATCH 04/44] `.storage.__init__`: code styling updates --- piker/storage/__init__.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py index f32f40b6..361eaadc 100644 --- a/piker/storage/__init__.py +++ b/piker/storage/__init__.py @@ -43,7 +43,6 @@ from typing import ( import numpy as np - from .. import config from ..service import ( check_for_service, @@ -152,7 +151,10 @@ class StorageConnectionError(ConnectionError): ''' -def get_storagemod(name: str) -> ModuleType: +def get_storagemod( + name: str, + +) -> ModuleType: mod: ModuleType = import_module( '.' + name, 'piker.storage', @@ -165,9 +167,12 @@ def get_storagemod(name: str) -> ModuleType: @acm async def open_storage_client( - backend: str | None = None, + backend: str|None = None, -) -> tuple[ModuleType, StorageClient]: +) -> tuple[ + ModuleType, + StorageClient, +]: ''' Load the ``StorageClient`` for named backend. @@ -267,7 +272,10 @@ async def open_tsdb_client( from ..data.feed import maybe_open_feed async with ( - open_storage_client() as (_, storage), + open_storage_client() as ( + _, + storage, + ), maybe_open_feed( [fqme], @@ -275,7 +283,7 @@ async def open_tsdb_client( ) as feed, ): - profiler(f'opened feed for {fqme}') + profiler(f'opened feed for {fqme!r}') # to_append = feed.hist_shm.array # to_prepend = None From 8af8ac4f7b3d63eea5ede06a0d7be66869acaaf0 Mon Sep 17 00:00:00 2001 From: goodboy Date: Sun, 18 Jan 2026 14:19:51 -0500 Subject: [PATCH 05/44] Fix polars 1.36.0 duration API Polars tightened type safety for `.dt` accessor methods requiring `total_*` methods for duration types vs datetime component accessors like `day()` which now only work on datetime dtypes. `detect_time_gaps()` in `.tsp._anal` was calling `.dt.day()` on `dt_diff` column (a duration from `.diff()`) which throws `InvalidOperationError` on modern polars. Changes: - use f-string to add pluralization to map time unit strings to `total_s` form for the new duration API. - Handle singular/plural forms: 'day' -> 'days' -> 'total_days' - Ensure trailing 's' before applying 'total_' prefix Also updates inline comments explaining the polars type distinction between datetime components vs duration totals. Fixes `piker store ldshm` crashes on datasets with time gaps. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_anal.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/piker/tsp/_anal.py b/piker/tsp/_anal.py index 42c3aa6c..26c3740e 100644 --- a/piker/tsp/_anal.py +++ b/piker/tsp/_anal.py @@ -578,11 +578,22 @@ def detect_time_gaps( # NOTE: this flag is to indicate that on this (sampling) time # scale we expect to only be filtering against larger venue # closures-scale time gaps. + # + # Map to total_ method since `dt_diff` is a duration type, + # not datetime - modern polars requires `total_*` methods + # for duration types (e.g. `total_days()` not `day()`) + # Ensure plural form for polars API (e.g. 'day' -> 'days') + unit_plural: str = ( + gap_dt_unit + if gap_dt_unit.endswith('s') + else f'{gap_dt_unit}s' + ) + duration_method: str = f'total_{unit_plural}' return step_gaps.filter( # Second by an arbitrary dt-unit step size getattr( pl.col('dt_diff').dt, - gap_dt_unit, + duration_method, )().abs() > gap_thresh ) From 6c28b1cbbcf924761aa1eb6c44ad79fc99e4cfe2 Mon Sep 17 00:00:00 2001 From: goodboy Date: Sun, 18 Jan 2026 18:18:34 -0500 Subject: [PATCH 06/44] Add `pexpect`-based `pdbp`-REPL offline helper Add a new `snippets/claude_debug_helper.py` to provide a programmatic interface to `tractor.pause()` debugger sessions for incremental data inspection matching the interactive UX but able to be run by `claude` "offline" since it can't seem to feed stdin (so it claims) to the `pdb` instance due to lack of ability to allocate a tty internally. The script-wrapper is based on `tractor`'s `tests/devx/` suite's use of `pexpect` patterns for driving `pdbp` prompts and thus enables automated-offline execution of REPL-inspection commands **without** using incremental-realtime output capture (like a human would use it). Features: - `run_pdb_commands()`: batch command execution - `InteractivePdbSession`: context manager for step-by-step REPL interaction - `expect()` wrapper: timeout handling with buffer display - Proper stdin/stdout handling via `pexpect.spawn()` Example usage: ```python from debug_helper import InteractivePdbSession with InteractivePdbSession( cmd='piker store ldshm zecusdt.usdtm.perp.binance' ) as session: session.run('deduped.shape') session.run('step_gaps.shape') ``` (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- snippets/claude_debug_helper.py | 256 ++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100755 snippets/claude_debug_helper.py diff --git a/snippets/claude_debug_helper.py b/snippets/claude_debug_helper.py new file mode 100755 index 00000000..97467d8a --- /dev/null +++ b/snippets/claude_debug_helper.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python +''' +Programmatic debugging helper for `pdbp` REPL human-like +interaction but built to allow `claude` to interact with +crashes and `tractor.pause()` breakpoints along side a human dev. + +Originally written by `clauded` during a backfiller inspection +session with @goodboy trying to resolve duplicate/gappy ohlcv ts +issues discovered while testing the new `nativedb` tsdb. + +Allows `claude` to run `pdb` commands and capture output in an "offline" +manner but generating similar output as if it was iteracting with +the debug REPL. + +The use of `pexpect` is heavily based on tractor's REPL UX test +suite(s), namely various `tests/devx/test_debugger.py` patterns. + +''' +import sys +import os +import time + +import pexpect +from pexpect.exceptions import ( + TIMEOUT, + EOF, +) + + +PROMPT: str = r'\(Pdb\+\)' + + +def expect( + child: pexpect.spawn, + patt: str, + **kwargs, +) -> None: + ''' + Expect wrapper that prints last console data before failing. + + ''' + try: + child.expect( + patt, + **kwargs, + ) + except TIMEOUT: + before: str = ( + str(child.before.decode()) + if isinstance(child.before, bytes) + else str(child.before) + ) + print( + f'TIMEOUT waiting for pattern: {patt}\n' + f'Last seen output:\n{before}' + ) + raise + + +def run_pdb_commands( + commands: list[str], + initial_cmd: str = 'piker store ldshm xmrusdt.usdtm.perp.binance', + timeout: int = 30, + print_output: bool = True, +) -> dict[str, str]: + ''' + Spawn piker process, wait for pdb prompt, execute commands. + + Returns dict mapping command -> output. + + ''' + results: dict[str, str] = {} + + # Disable colored output for easier parsing + os.environ['PYTHON_COLORS'] = '0' + + # Spawn the process + if print_output: + print(f'Spawning: {initial_cmd}') + + child: pexpect.spawn = pexpect.spawn( + initial_cmd, + timeout=timeout, + encoding='utf-8', + echo=False, + ) + + # Wait for pdb prompt + try: + expect(child, PROMPT, timeout=timeout) + if print_output: + print('Reached pdb prompt!') + + # Execute each command + for cmd in commands: + if print_output: + print(f'\n>>> {cmd}') + + child.sendline(cmd) + time.sleep(0.1) + + # Wait for next prompt + expect(child, PROMPT, timeout=timeout) + + # Capture output (everything before the prompt) + output: str = ( + str(child.before.decode()) + if isinstance(child.before, bytes) + else str(child.before) + ) + results[cmd] = output + + if print_output: + print(output) + + # Quit debugger gracefully + child.sendline('quit') + try: + child.expect(EOF, timeout=5) + except (TIMEOUT, EOF): + pass + + except TIMEOUT as e: + print(f'Timeout: {e}') + if child.before: + before: str = ( + str(child.before.decode()) + if isinstance(child.before, bytes) + else str(child.before) + ) + print(f'Buffer:\n{before}') + results['_error'] = str(e) + + finally: + if child.isalive(): + child.close(force=True) + + return results + + +class InteractivePdbSession: + ''' + Interactive pdb session manager for incremental debugging. + + ''' + def __init__( + self, + cmd: str = 'piker store ldshm xmrusdt.usdtm.perp.binance', + timeout: int = 30, + ): + self.cmd: str = cmd + self.timeout: int = timeout + self.child: pexpect.spawn|None = None + self.history: list[tuple[str, str]] = [] + + def start(self) -> None: + ''' + Start the piker process and wait for first prompt. + + ''' + os.environ['PYTHON_COLORS'] = '0' + + print(f'Starting: {self.cmd}') + self.child = pexpect.spawn( + self.cmd, + timeout=self.timeout, + encoding='utf-8', + echo=False, + ) + + # Wait for initial prompt + expect(self.child, PROMPT, timeout=self.timeout) + print('Ready at pdb prompt!') + + def run( + self, + cmd: str, + print_output: bool = True, + ) -> str: + ''' + Execute a single pdb command and return output. + + ''' + if not self.child or not self.child.isalive(): + raise RuntimeError('Session not started or dead') + + if print_output: + print(f'\n>>> {cmd}') + + self.child.sendline(cmd) + time.sleep(0.1) + + # Wait for next prompt + expect(self.child, PROMPT, timeout=self.timeout) + + output: str = ( + str(self.child.before.decode()) + if isinstance(self.child.before, bytes) + else str(self.child.before) + ) + self.history.append((cmd, output)) + + if print_output: + print(output) + + return output + + def quit(self) -> None: + ''' + Exit the debugger and cleanup. + + ''' + if self.child and self.child.isalive(): + self.child.sendline('quit') + try: + self.child.expect(EOF, timeout=5) + except (TIMEOUT, EOF): + pass + self.child.close(force=True) + + def __enter__(self): + self.start() + return self + + def __exit__(self, *args): + self.quit() + + +if __name__ == '__main__': + # Example inspection commands + inspect_cmds: list[str] = [ + 'locals().keys()', + 'type(deduped)', + 'deduped.shape', + ( + 'step_gaps.shape ' + 'if "step_gaps" in locals() ' + 'else "N/A"' + ), + ( + 'venue_gaps.shape ' + 'if "venue_gaps" in locals() ' + 'else "N/A"' + ), + ] + + # Allow commands from CLI args + if len(sys.argv) > 1: + inspect_cmds = sys.argv[1:] + + # Interactive session example + with InteractivePdbSession() as session: + for cmd in inspect_cmds: + session.run(cmd) + + print('\n=== Session Complete ===') From d5af471192b52a0131555bba9d061a663777fe85 Mon Sep 17 00:00:00 2001 From: goodboy Date: Sun, 18 Jan 2026 21:00:17 -0500 Subject: [PATCH 07/44] Add vlm-based "smart" OHLCV de-duping & bar validation Using `claude`, add a `.tsp._dedupe_smart` module that attemps "smarter" duplicate bars by attempting to distinguish between erroneous bars partially written during concurrent backfill race conditions vs. **actual** data quality issues from historical providers. Problem: -------- Concurrent writes (live updates vs. backfilling) can result in create duplicate timestamped ohlcv vars with different values. Some potential scenarios include, - a market live feed is cancelled during live update resulting in the "last" datum being partially updated with all the ticks for the time step. - when the feed is rebooted during charting, the backfiller will not finalize this bar since rn it presumes it should only fill data for time steps not already in the tsdb storage. Our current naive `.unique()` approach obvi keeps the incomplete bar and a "smarter" approach is to compare the provider's final vlm amount vs. the maybe-cancelled tsdb's bar; a higher vlm value from the provider likely indicates the cancelled-during-live-write and **not** a datum discrepancy from said data provider. Analysis (with `claude`) of `zecusdt` data revealed: - 1000 duplicate timestamps - 999 identical bars (pure duplicates from 2022 backfill overlap) - 1 volume-monotonic conflict (live partial vs backfill complete) A soln from `claude` -> `tsp._dedupe_smart.dedupe_ohlcv_smart()` which: - sorts by vlm **before** deduplication and keep the most complete bar based on vlm monotonicity as well as the following OHLCV validation assumptions: * volume should always increase * high should be non-decreasing, * low should be non-increasing * open should be identical - Separates valid race conditions from provider data quality issues and reports and returns both dfs. Change summary by `claude`: - `.tsp._dedupe_smart`: new module with validation logic - `.tsp.__init__`: expose `dedupe_ohlcv_smart()` - `.storage.cli`: integrate smart dedupe, add logging for: * duplicate counts (identical vs monotonic races) * data quality violations (non-monotonic, invalid OHLC ranges) * warnings for provider data issues - Remove `assert not diff` (duplicates are valid now) Verified on `zecusdt`: correctly keeps index 3143645 (volume=287.777) over 3143644 (volume=140.299) for conflicting 2026-01-16 18:54 UTC bar. `claude`'s Summary of reasoning ------------------------------- - volume monotonicity is critical: a bar's volume only increases during its time window. - a backfilled bar should always have volume >= live updated. - violations indicate any of: * Provider data corruption * Non-OHLCV aggregation semantics * Timestamp misalignment (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/storage/cli.py | 42 +++++++- piker/tsp/__init__.py | 3 + piker/tsp/_dedupe_smart.py | 206 +++++++++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+), 5 deletions(-) create mode 100644 piker/tsp/_dedupe_smart.py diff --git a/piker/storage/cli.py b/piker/storage/cli.py index 1c8ff11b..5c087898 100644 --- a/piker/storage/cli.py +++ b/piker/storage/cli.py @@ -441,11 +441,37 @@ def ldshm( wdts, deduped, diff, - ) = tsp.dedupe( + valid_races, + dq_issues, + ) = tsp.dedupe_ohlcv_smart( shm_df, - period=period_s, ) + # Report duplicate analysis + if diff > 0: + log.info( + f'Removed {diff} duplicate timestamp(s)\n' + ) + if valid_races is not None: + identical: int = ( + valid_races + .filter(pl.col('identical_bars')) + .height + ) + monotonic: int = valid_races.height - identical + log.info( + f'Valid race conditions: {valid_races.height}\n' + f' - Identical bars: {identical}\n' + f' - Volume monotonic: {monotonic}\n' + ) + + if dq_issues is not None: + log.warning( + f'DATA QUALITY ISSUES from provider: ' + f'{dq_issues.height} timestamp(s)\n' + f'{dq_issues}\n' + ) + # detect gaps from in expected (uniform OHLC) sample period step_gaps: pl.DataFrame = tsp.detect_time_gaps( deduped, @@ -460,7 +486,8 @@ def ldshm( # TODO: actually pull the exact duration # expected for each venue operational period? - gap_dt_unit='days', + # gap_dt_unit='day', + gap_dt_unit='day', gap_thresh=1, ) @@ -534,8 +561,13 @@ def ldshm( tf2aids[period_s] = aids else: - # allow interaction even when no ts problems. - assert not diff + # No significant gaps to handle, but may have had + # duplicates removed (valid race conditions are ok) + if diff > 0 and dq_issues is not None: + log.warning( + 'Found duplicates with data quality issues ' + 'but no significant time gaps!\n' + ) await tractor.pause() log.info('Exiting TSP shm anal-izer!') diff --git a/piker/tsp/__init__.py b/piker/tsp/__init__.py index 1df0a554..81274ed8 100644 --- a/piker/tsp/__init__.py +++ b/piker/tsp/__init__.py @@ -40,6 +40,9 @@ from ._anal import ( # `numpy` only slice_from_time as slice_from_time, ) +from ._dedupe_smart import ( + dedupe_ohlcv_smart as dedupe_ohlcv_smart, +) from ._history import ( iter_dfs_from_shms as iter_dfs_from_shms, manage_history as manage_history, diff --git a/piker/tsp/_dedupe_smart.py b/piker/tsp/_dedupe_smart.py new file mode 100644 index 00000000..8c0ac55a --- /dev/null +++ b/piker/tsp/_dedupe_smart.py @@ -0,0 +1,206 @@ +''' +Smart OHLCV deduplication with data quality validation. + +Handles concurrent write conflicts by keeping the most complete bar +(highest volume) while detecting data quality anomalies. + +''' +import polars as pl + +from ._anal import with_dts + + +def dedupe_ohlcv_smart( + src_df: pl.DataFrame, + time_col: str = 'time', + volume_col: str = 'volume', + sort: bool = True, + +) -> tuple[ + pl.DataFrame, # with dts + pl.DataFrame, # deduped (keeping higher volume bars) + int, # count of dupes removed + pl.DataFrame|None, # valid race conditions + pl.DataFrame|None, # data quality violations +]: + ''' + Smart OHLCV deduplication keeping most complete bars. + + For duplicate timestamps, keeps bar with highest volume under + the assumption that higher volume indicates more complete/final + data from backfill vs partial live updates. + + Returns + ------- + Tuple of: + - wdts: original dataframe with datetime columns added + - deduped: deduplicated frame keeping highest-volume bars + - diff: number of duplicate rows removed + - valid_races: duplicates meeting expected race condition pattern + (volume monotonic, OHLC ranges valid) + - data_quality_issues: duplicates violating expected relationships + indicating provider data problems + + ''' + wdts: pl.DataFrame = with_dts(src_df) + + # Find duplicate timestamps + dupes: pl.DataFrame = wdts.filter( + pl.col(time_col).is_duplicated() + ) + + if dupes.is_empty(): + # No duplicates, return as-is + return (wdts, wdts, 0, None, None) + + # Analyze duplicate groups for validation + dupe_analysis: pl.DataFrame = ( + dupes + .sort([time_col, 'index']) + .group_by(time_col, maintain_order=True) + .agg([ + pl.col('index').alias('indices'), + pl.col('volume').alias('volumes'), + pl.col('high').alias('highs'), + pl.col('low').alias('lows'), + pl.col('open').alias('opens'), + pl.col('close').alias('closes'), + pl.col('dt').first().alias('dt'), + pl.len().alias('count'), + ]) + ) + + # Validate OHLCV monotonicity for each duplicate group + def check_ohlcv_validity(row) -> dict[str, bool]: + ''' + Check if duplicate bars follow expected race condition pattern. + + For a valid live-update → backfill race: + - volume should be monotonically increasing + - high should be monotonically non-decreasing + - low should be monotonically non-increasing + - open should be identical (fixed at bar start) + + Returns dict of violation flags. + + ''' + vols: list = row['volumes'] + highs: list = row['highs'] + lows: list = row['lows'] + opens: list = row['opens'] + + violations: dict[str, bool] = { + 'volume_non_monotonic': False, + 'high_decreased': False, + 'low_increased': False, + 'open_mismatch': False, + 'identical_bars': False, + } + + # Check if all bars are identical (pure duplicate) + if ( + len(set(vols)) == 1 + and len(set(highs)) == 1 + and len(set(lows)) == 1 + and len(set(opens)) == 1 + ): + violations['identical_bars'] = True + return violations + + # Check volume monotonicity + for i in range(1, len(vols)): + if vols[i] < vols[i-1]: + violations['volume_non_monotonic'] = True + break + + # Check high monotonicity (can only increase or stay same) + for i in range(1, len(highs)): + if highs[i] < highs[i-1]: + violations['high_decreased'] = True + break + + # Check low monotonicity (can only decrease or stay same) + for i in range(1, len(lows)): + if lows[i] > lows[i-1]: + violations['low_increased'] = True + break + + # Check open consistency (should be fixed) + if len(set(opens)) > 1: + violations['open_mismatch'] = True + + return violations + + # Apply validation + dupe_analysis = dupe_analysis.with_columns([ + pl.struct(['volumes', 'highs', 'lows', 'opens']) + .map_elements( + check_ohlcv_validity, + return_dtype=pl.Struct([ + pl.Field('volume_non_monotonic', pl.Boolean), + pl.Field('high_decreased', pl.Boolean), + pl.Field('low_increased', pl.Boolean), + pl.Field('open_mismatch', pl.Boolean), + pl.Field('identical_bars', pl.Boolean), + ]) + ) + .alias('validity') + ]) + + # Unnest validity struct + dupe_analysis = dupe_analysis.unnest('validity') + + # Separate valid races from data quality issues + valid_races: pl.DataFrame|None = ( + dupe_analysis + .filter( + # Valid if no violations OR just identical bars + ~pl.col('volume_non_monotonic') + & ~pl.col('high_decreased') + & ~pl.col('low_increased') + & ~pl.col('open_mismatch') + ) + ) + if valid_races.is_empty(): + valid_races = None + + data_quality_issues: pl.DataFrame|None = ( + dupe_analysis + .filter( + # Issues if any non-identical violation exists + ( + pl.col('volume_non_monotonic') + | pl.col('high_decreased') + | pl.col('low_increased') + | pl.col('open_mismatch') + ) + & ~pl.col('identical_bars') + ) + ) + if data_quality_issues.is_empty(): + data_quality_issues = None + + # Deduplicate: keep highest volume bar for each timestamp + deduped: pl.DataFrame = ( + wdts + .sort([time_col, volume_col]) + .unique( + subset=[time_col], + keep='last', + maintain_order=False, + ) + ) + + # Re-sort by time or index + if sort: + deduped = deduped.sort(by=time_col) + + diff: int = wdts.height - deduped.height + + return ( + wdts, + deduped, + diff, + valid_races, + data_quality_issues, + ) From bd418078ca2b53080bd1c785ab01d99519bffdd7 Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 21 Jan 2026 20:05:07 -0500 Subject: [PATCH 08/44] ib: up API timeout default for remote host conns --- piker/brokers/ib/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py index 5bcc7336..4a63a0f1 100644 --- a/piker/brokers/ib/api.py +++ b/piker/brokers/ib/api.py @@ -1187,7 +1187,7 @@ async def load_aio_clients( # the API TCP in `ib_insync` connection can be flaky af so instead # retry a few times to get the client going.. connect_retries: int = 3, - connect_timeout: float = 10, + connect_timeout: float = 30, # in case a remote-host disconnect_on_exit: bool = True, ) -> dict[str, Client]: From 5f6e24f55c520a6e348aeeaf891a9ab7ff5a11cc Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 21 Jan 2026 21:34:45 -0500 Subject: [PATCH 09/44] Tolerate various "bad data" cases in `markup_gaps()` Namely such that when the previous-df-row by our shm-abs-'index' doesn't exist we ignore certain cases which are likely due to borked-but-benign samples written to the tsdb or rt shm buffers prior. Particularly we now ignore, - any `dt`/`prev_dt` values which are UNIX-epoch timestamped (val of 0). - any row-is-first-row in the df; there is no previous. - any missing previous datum by 'index', in which case we lookup the `wdts` prior row and use that instead. * this would indicate a missing sample for the time-step but we can still detect a "gap" by looking at the prior row, by df-abs-index `i`, and use its timestamp to determine the period/size of missing samples (which need to likely still be retrieved). * in this case i'm leaving in a pause-point for introspecting these rarer cases when `--pdb` is passed via CLI. Relatedly in the `piker store` CLI ep, - add `--pdb` flag to `piker store`, pass it verbatim as `debug_mode`. - when `times` has only a single row, don't calc a `period_s` median. - only trace `null_segs` when in debug mode. - always markup/dedupe gaps for `period_s==60` --- piker/storage/cli.py | 77 ++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/piker/storage/cli.py b/piker/storage/cli.py index 5c087898..e97f4023 100644 --- a/piker/storage/cli.py +++ b/piker/storage/cli.py @@ -242,6 +242,7 @@ def anal( trio.run(main) +# TODO, move to `.tsp._annotate` async def markup_gaps( fqme: str, timeframe: float, @@ -288,18 +289,38 @@ async def markup_gaps( ) # XXX: probably a gap in the (newly sorted or de-duplicated) # dt-df, so we might need to re-index first.. + dt: pl.Series = row['dt'] + dt_prev: pl.Series = row['dt_prev'] if prev_r.is_empty(): - await tractor.pause() + + # XXX, filter out any special ignore cases, + # - UNIX-epoch stamped datums + # - first row + if ( + dt_prev.dt.epoch()[0] == 0 + or + dt.dt.epoch()[0] == 0 + ): + log.warning('Skipping row with UNIX epoch timestamp ??') + continue + + if wdts[0]['index'][0] == iend: # first row + log.warning('Skipping first-row (has no previous obvi) !!') + continue + + # XXX, if the previous-row by shm-index is missing, + # meaning there is a missing sample (set), get the prior + # row by df index and attempt to use it? + i_wdts: pl.DataFrame = wdts.with_row_index(name='i') + i_row: int = i_wdts.filter(pl.col('index') == iend)['i'][0] + prev_row_by_i = wdts[i_row] + prev_r: pl.DataFrame = prev_row_by_i + + # debug any missing pre-row + if tractor._state.is_debug_mode(): + await tractor.pause() istart: int = prev_r['index'][0] - # dt_start_t: float = dt_prev.timestamp() - - # start_t: float = prev_r['time'] - # assert ( - # dt_start_t - # == - # start_t - # ) # TODO: implement px-col width measure # and ensure at least as many px-cols @@ -358,6 +379,7 @@ def ldshm( fqme: str, write_parquet: bool = True, reload_parquet_to_shm: bool = True, + pdb: bool = False, # --pdb passed? ) -> None: ''' @@ -377,7 +399,7 @@ def ldshm( open_piker_runtime( 'polars_boi', enable_modules=['piker.data._sharedmem'], - debug_mode=True, + debug_mode=pdb, ), open_storage_client() as ( mod, @@ -397,17 +419,19 @@ def ldshm( times: np.ndarray = shm.array['time'] d1: float = float(times[-1] - times[-2]) - d2: float = float(times[-2] - times[-3]) - med: float = np.median(np.diff(times)) - if ( - d1 < 1. - and d2 < 1. - and med < 1. - ): - raise ValueError( - f'Something is wrong with time period for {shm}:\n{times}' - ) - + d2: float = 0 + # XXX, take a median sample rate if sufficient data + if times.size > 2: + d2: float = float(times[-2] - times[-3]) + med: float = np.median(np.diff(times)) + if ( + d1 < 1. + and d2 < 1. + and med < 1. + ): + raise ValueError( + f'Something is wrong with time period for {shm}:\n{times}' + ) period_s: float = float(max(d1, d2, med)) null_segs: tuple = tsp.get_null_segs( @@ -417,7 +441,9 @@ def ldshm( # TODO: call null-seg fixer somehow? if null_segs: - await tractor.pause() + + if tractor._state.is_debug_mode(): + await tractor.pause() # async with ( # trio.open_nursery() as tn, # mod.open_history_client( @@ -498,8 +524,11 @@ def ldshm( if ( not venue_gaps.is_empty() or ( - period_s < 60 - and not step_gaps.is_empty() + not step_gaps.is_empty() + # XXX, i presume i put this bc i was guarding + # for ib venue gaps? + # and + # period_s < 60 ) ): # write repaired ts to parquet-file? From 582f9be02f771c8bf9c8d5cc7da4fed100d97ede Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 21 Jan 2026 22:31:30 -0500 Subject: [PATCH 10/44] Enable tracing back insert backfills Namely insertion writes which over-fill the shm buffer past the latest tsdb sample via `.tsp._history.shm_push_in_between()`. Deats, - check earliest `to_push` timestamp and enter pause point if it's earlier then the tsdb's `backfill_until_dt` stamp. - requires actually passing the `backfill_until_dt: datetime` thru, * `get_null_segs()` * `maybe_fill_null_segments()` * `shm_push_in_between()` (obvi XD) --- piker/tsp/_history.py | 58 +++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 16 deletions(-) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index b6b15e72..361b0e23 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -75,7 +75,6 @@ from piker.brokers._util import ( ) from piker.storage import TimeseriesNotFound from ._anal import ( - dedupe, get_null_segs, iter_null_segs, @@ -120,15 +119,16 @@ _rt_buffer_start = int((_days_worth - 1) * _secs_in_day) def diff_history( array: np.ndarray, - append_until_dt: datetime | None = None, - prepend_until_dt: datetime | None = None, + append_until_dt: datetime|None = None, + prepend_until_dt: datetime|None = None, ) -> np.ndarray: # no diffing with tsdb dt index possible.. if ( prepend_until_dt is None - and append_until_dt is None + and + append_until_dt is None ): return array @@ -140,15 +140,26 @@ def diff_history( return array[times >= prepend_until_dt.timestamp()] -# TODO: can't we just make this a sync func now? async def shm_push_in_between( shm: ShmArray, to_push: np.ndarray, prepend_index: int, + backfill_until_dt: datetime, update_start_on_prepend: bool = False, ) -> int: + + # XXX, try to catch bad inserts by peeking at the first/last + # times and ensure we don't violate order. + f_times: np.ndarray = to_push['time'] + f_start: float = f_times[0] + f_start_dt = from_timestamp(f_start) + if ( + f_start_dt < backfill_until_dt + ): + await tractor.pause() + # XXX: extremely important, there can be no checkpoints # in the body of this func to avoid entering new ``frames`` # values while we're pipelining the current ones to @@ -181,6 +192,7 @@ async def maybe_fill_null_segments( get_hist: Callable, sampler_stream: tractor.MsgStream, mkt: MktPair, + backfill_until_dt: datetime, task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED, @@ -191,7 +203,11 @@ async def maybe_fill_null_segments( frame: Frame = shm.array - null_segs: tuple | None = get_null_segs( + # TODO, put in parent task/daemon root! + import greenback + await greenback.ensure_portal() + + null_segs: tuple|None = get_null_segs( frame, period=timeframe, ) @@ -237,6 +253,7 @@ async def maybe_fill_null_segments( shm, to_push, prepend_index=absi_end, + backfill_until_dt=backfill_until_dt, update_start_on_prepend=False, ) # TODO: UI side needs IPC event to update.. @@ -352,15 +369,12 @@ async def start_backfill( mkt: MktPair, shm: ShmArray, timeframe: float, - backfill_from_shm_index: int, backfill_from_dt: datetime, - sampler_stream: tractor.MsgStream, - backfill_until_dt: datetime | None = None, - storage: StorageClient | None = None, - + backfill_until_dt: datetime|None = None, + storage: StorageClient|None = None, write_tsdb: bool = True, task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED, @@ -495,7 +509,14 @@ async def start_backfill( assert time[-1] == next_end_dt.timestamp() - expected_dur: Interval = last_start_dt - next_start_dt + expected_dur: Interval = ( + last_start_dt.subtract( + seconds=timeframe + # ^XXX, always "up to" the bar *before* + ) + - + next_start_dt + ) # frame's worth of sample-period-steps, in seconds frame_size_s: float = len(array) * timeframe @@ -556,6 +577,7 @@ async def start_backfill( shm, to_push, prepend_index=next_prepend_index, + backfill_until_dt=backfill_until_dt, update_start_on_prepend=update_start_on_prepend, ) await sampler_stream.send({ @@ -585,6 +607,7 @@ async def start_backfill( shm, to_push, prepend_index=next_prepend_index, + backfill_until_dt=backfill_until_dt, update_start_on_prepend=update_start_on_prepend, ) await sampler_stream.send({ @@ -899,7 +922,7 @@ async def load_tsdb_hist( DateTime, ] try: - tsdb_entry: tuple|None = await storage.load( + tsdb_entry: tuple|None = await storage.load( fqme, timeframe=timeframe, ) @@ -1056,7 +1079,7 @@ async def tsdb_backfill( trio.open_nursery() as tn, ): - bf_done = await tn.start( + bf_done: trio.Event = await tn.start( partial( start_backfill, get_hist=get_hist, @@ -1076,8 +1099,10 @@ async def tsdb_backfill( write_tsdb=True, ) ) - nulls_detected: trio.Event | None = None + nulls_detected: trio.Event|None = None + if last_tsdb_dt is not None: + # calc the index from which the tsdb data should be # prepended, presuming there is a gap between the # latest frame (loaded/read above) and the latest @@ -1148,7 +1173,7 @@ async def tsdb_backfill( # TODO: ideally these can never exist! # -[ ] somehow it seems sometimes we're writing zero-ed # segments to tsdbs during teardown? - # -[ ] can we ensure that the backcfiller tasks do this + # -[ ] can we ensure that the backfiller tasks do this # work PREVENTAVELY instead? # -[ ] fill in non-zero epoch time values ALWAYS! # await maybe_fill_null_segments( @@ -1160,6 +1185,7 @@ async def tsdb_backfill( get_hist=get_hist, sampler_stream=sampler_stream, mkt=mkt, + backfill_until_dt=last_tsdb_dt, )) # 2nd nursery END From 9257af02b962a97d88a9cae1d5a2c0db9e14f9f7 Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 21 Jan 2026 23:52:12 -0500 Subject: [PATCH 11/44] Mv `markup_gaps()` to new `.tsp._annotate` mod --- piker/storage/cli.py | 141 +--------------------------------- piker/tsp/__init__.py | 3 + piker/tsp/_annotate.py | 166 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 170 insertions(+), 140 deletions(-) create mode 100644 piker/tsp/_annotate.py diff --git a/piker/storage/cli.py b/piker/storage/cli.py index e97f4023..90d5baed 100644 --- a/piker/storage/cli.py +++ b/piker/storage/cli.py @@ -19,16 +19,10 @@ Storage middle-ware CLIs. """ from __future__ import annotations -# from datetime import datetime -# from contextlib import ( -# AsyncExitStack, -# ) from pathlib import Path -from math import copysign import time from types import ModuleType from typing import ( - Any, TYPE_CHECKING, ) @@ -47,7 +41,6 @@ from piker.data import ( ShmArray, ) from piker import tsp -from piker.data._formatters import BGM from . import log from . import ( __tsdbs__, @@ -242,138 +235,6 @@ def anal( trio.run(main) -# TODO, move to `.tsp._annotate` -async def markup_gaps( - fqme: str, - timeframe: float, - actl: AnnotCtl, - wdts: pl.DataFrame, - gaps: pl.DataFrame, - -) -> dict[int, dict]: - ''' - Remote annotate time-gaps in a dt-fielded ts (normally OHLC) - with rectangles. - - ''' - aids: dict[int] = {} - for i in range(gaps.height): - - row: pl.DataFrame = gaps[i] - - # the gap's RIGHT-most bar's OPEN value - # at that time (sample) step. - iend: int = row['index'][0] - # dt: datetime = row['dt'][0] - # dt_prev: datetime = row['dt_prev'][0] - # dt_end_t: float = dt.timestamp() - - - # TODO: can we eventually remove this - # once we figure out why the epoch cols - # don't match? - # TODO: FIX HOW/WHY these aren't matching - # and are instead off by 4hours (EST - # vs. UTC?!?!) - # end_t: float = row['time'] - # assert ( - # dt.timestamp() - # == - # end_t - # ) - - # the gap's LEFT-most bar's CLOSE value - # at that time (sample) step. - prev_r: pl.DataFrame = wdts.filter( - pl.col('index') == iend - 1 - ) - # XXX: probably a gap in the (newly sorted or de-duplicated) - # dt-df, so we might need to re-index first.. - dt: pl.Series = row['dt'] - dt_prev: pl.Series = row['dt_prev'] - if prev_r.is_empty(): - - # XXX, filter out any special ignore cases, - # - UNIX-epoch stamped datums - # - first row - if ( - dt_prev.dt.epoch()[0] == 0 - or - dt.dt.epoch()[0] == 0 - ): - log.warning('Skipping row with UNIX epoch timestamp ??') - continue - - if wdts[0]['index'][0] == iend: # first row - log.warning('Skipping first-row (has no previous obvi) !!') - continue - - # XXX, if the previous-row by shm-index is missing, - # meaning there is a missing sample (set), get the prior - # row by df index and attempt to use it? - i_wdts: pl.DataFrame = wdts.with_row_index(name='i') - i_row: int = i_wdts.filter(pl.col('index') == iend)['i'][0] - prev_row_by_i = wdts[i_row] - prev_r: pl.DataFrame = prev_row_by_i - - # debug any missing pre-row - if tractor._state.is_debug_mode(): - await tractor.pause() - - istart: int = prev_r['index'][0] - - # TODO: implement px-col width measure - # and ensure at least as many px-cols - # shown per rect as configured by user. - # gap_w: float = abs((iend - istart)) - # if gap_w < 6: - # margin: float = 6 - # iend += margin - # istart -= margin - - rect_gap: float = BGM*3/8 - opn: float = row['open'][0] - ro: tuple[float, float] = ( - # dt_end_t, - iend + rect_gap + 1, - opn, - ) - cls: float = prev_r['close'][0] - lc: tuple[float, float] = ( - # dt_start_t, - istart - rect_gap, # + 1 , - cls, - ) - - color: str = 'dad_blue' - diff: float = cls - opn - sgn: float = copysign(1, diff) - color: str = { - -1: 'buy_green', - 1: 'sell_red', - }[sgn] - - rect_kwargs: dict[str, Any] = dict( - fqme=fqme, - timeframe=timeframe, - start_pos=lc, - end_pos=ro, - color=color, - ) - - aid: int = await actl.add_rect(**rect_kwargs) - assert aid - aids[aid] = rect_kwargs - - # tell chart to redraw all its - # graphics view layers Bo - await actl.redraw( - fqme=fqme, - timeframe=timeframe, - ) - return aids - - @store.command() def ldshm( fqme: str, @@ -577,7 +438,7 @@ def ldshm( do_markup_gaps: bool = True if do_markup_gaps: new_df: pl.DataFrame = tsp.np2pl(new) - aids: dict = await markup_gaps( + aids: dict = await tsp._annotate.markup_gaps( fqme, period_s, actl, diff --git a/piker/tsp/__init__.py b/piker/tsp/__init__.py index 81274ed8..baa28c82 100644 --- a/piker/tsp/__init__.py +++ b/piker/tsp/__init__.py @@ -47,3 +47,6 @@ from ._history import ( iter_dfs_from_shms as iter_dfs_from_shms, manage_history as manage_history, ) +from ._annotate import ( + markup_gaps as markup_gaps, +) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py new file mode 100644 index 00000000..797c38cf --- /dev/null +++ b/piker/tsp/_annotate.py @@ -0,0 +1,166 @@ +# piker: trading gear for hackers +# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers) + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. + +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +""" +Time-series (remote) annotation APIs. + +""" +from __future__ import annotations +from math import copysign +from typing import ( + Any, + TYPE_CHECKING, +) + +import polars as pl +import tractor + +from piker.data._formatters import BGM +from piker.storage import log + +if TYPE_CHECKING: + from piker.ui._remote_ctl import AnnotCtl + + +async def markup_gaps( + fqme: str, + timeframe: float, + actl: AnnotCtl, + wdts: pl.DataFrame, + gaps: pl.DataFrame, + +) -> dict[int, dict]: + ''' + Remote annotate time-gaps in a dt-fielded ts (normally OHLC) + with rectangles. + + ''' + aids: dict[int] = {} + for i in range(gaps.height): + + row: pl.DataFrame = gaps[i] + + # the gap's RIGHT-most bar's OPEN value + # at that time (sample) step. + iend: int = row['index'][0] + # dt: datetime = row['dt'][0] + # dt_prev: datetime = row['dt_prev'][0] + # dt_end_t: float = dt.timestamp() + + + # TODO: can we eventually remove this + # once we figure out why the epoch cols + # don't match? + # TODO: FIX HOW/WHY these aren't matching + # and are instead off by 4hours (EST + # vs. UTC?!?!) + # end_t: float = row['time'] + # assert ( + # dt.timestamp() + # == + # end_t + # ) + + # the gap's LEFT-most bar's CLOSE value + # at that time (sample) step. + prev_r: pl.DataFrame = wdts.filter( + pl.col('index') == iend - 1 + ) + # XXX: probably a gap in the (newly sorted or de-duplicated) + # dt-df, so we might need to re-index first.. + dt: pl.Series = row['dt'] + dt_prev: pl.Series = row['dt_prev'] + if prev_r.is_empty(): + + # XXX, filter out any special ignore cases, + # - UNIX-epoch stamped datums + # - first row + if ( + dt_prev.dt.epoch()[0] == 0 + or + dt.dt.epoch()[0] == 0 + ): + log.warning('Skipping row with UNIX epoch timestamp ??') + continue + + if wdts[0]['index'][0] == iend: # first row + log.warning('Skipping first-row (has no previous obvi) !!') + continue + + # XXX, if the previous-row by shm-index is missing, + # meaning there is a missing sample (set), get the prior + # row by df index and attempt to use it? + i_wdts: pl.DataFrame = wdts.with_row_index(name='i') + i_row: int = i_wdts.filter(pl.col('index') == iend)['i'][0] + prev_row_by_i = wdts[i_row] + prev_r: pl.DataFrame = prev_row_by_i + + # debug any missing pre-row + if tractor._state.is_debug_mode(): + await tractor.pause() + + istart: int = prev_r['index'][0] + + # TODO: implement px-col width measure + # and ensure at least as many px-cols + # shown per rect as configured by user. + # gap_w: float = abs((iend - istart)) + # if gap_w < 6: + # margin: float = 6 + # iend += margin + # istart -= margin + + rect_gap: float = BGM*3/8 + opn: float = row['open'][0] + ro: tuple[float, float] = ( + # dt_end_t, + iend + rect_gap + 1, + opn, + ) + cls: float = prev_r['close'][0] + lc: tuple[float, float] = ( + # dt_start_t, + istart - rect_gap, # + 1 , + cls, + ) + + color: str = 'dad_blue' + diff: float = cls - opn + sgn: float = copysign(1, diff) + color: str = { + -1: 'buy_green', + 1: 'sell_red', + }[sgn] + + rect_kwargs: dict[str, Any] = dict( + fqme=fqme, + timeframe=timeframe, + start_pos=lc, + end_pos=ro, + color=color, + ) + + aid: int = await actl.add_rect(**rect_kwargs) + assert aid + aids[aid] = rect_kwargs + + # tell chart to redraw all its + # graphics view layers Bo + await actl.redraw( + fqme=fqme, + timeframe=timeframe, + ) + return aids From 27d077ade590c96825dc1fb8a92684f66d6fe3b7 Mon Sep 17 00:00:00 2001 From: goodboy Date: Sun, 25 Jan 2026 14:14:42 -0500 Subject: [PATCH 12/44] Arrow editor refinements in prep for gap checker Namely exposing `ArrowEditor.add()` params to provide access to coloring/transparency settings over the remote-ctl annotation API and also adding a new `.remove_all()` to easily clear all arrows from a single call. Also add `.remove()` compat methods to the other editors (i.e. for lines, rects). --- piker/ui/_editors.py | 85 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/piker/ui/_editors.py b/piker/ui/_editors.py index 9809ba71..686f9750 100644 --- a/piker/ui/_editors.py +++ b/piker/ui/_editors.py @@ -21,6 +21,7 @@ Higher level annotation editors. from __future__ import annotations from collections import defaultdict from typing import ( + Literal, Sequence, TYPE_CHECKING, ) @@ -71,9 +72,18 @@ log = get_logger(__name__) class ArrowEditor(Struct): + ''' + Annotate a chart-view with arrows most often used for indicating, + - order txns/clears, + - positions directions, + - general points-of-interest like nooz events. + ''' godw: GodWidget = None # type: ignore # noqa - _arrows: dict[str, list[pg.ArrowItem]] = {} + _arrows: dict[ + str, + list[pg.ArrowItem] + ] = {} def add( self, @@ -81,8 +91,14 @@ class ArrowEditor(Struct): uid: str, x: float, y: float, - color: str = 'default', - pointing: str | None = None, + color: str|None = None, + pointing: Literal[ + 'up', + 'down', + None, + ] = None, + alpha: int = 255, + zval: float = 1e9, ) -> pg.ArrowItem: ''' @@ -98,6 +114,11 @@ class ArrowEditor(Struct): # scale arrow sizing to dpi-aware font size = _font.font.pixelSize() * 0.8 + color = color or 'default' + color = QColor(hcolor(color)) + color.setAlpha(alpha) + pen = fn.mkPen(color, width=1) + brush = fn.mkBrush(color) arrow = pg.ArrowItem( angle=angle, baseAngle=0, @@ -105,22 +126,58 @@ class ArrowEditor(Struct): headWidth=size/2, tailLen=None, pxMode=True, - # coloring - pen=pg.mkPen(hcolor('papas_special')), - brush=pg.mkBrush(hcolor(color)), + pen=pen, + brush=brush, ) + arrow.setZValue(zval) arrow.setPos(x, y) - self._arrows.setdefault(uid, []).append(arrow) + plot.addItem(arrow) # render to view - # render to view - plot.addItem(arrow) + # register for removal + arrow._uid = uid + self._arrows.setdefault( + uid, [] + ).append(arrow) return arrow - def remove(self, arrow) -> bool: + def remove( + self, + arrow: pg.ArrowItem, + ) -> None: + ''' + Remove a *single arrow* from all chart views to which it was + added. + + ''' + uid: str = arrow._uid + arrows: list[pg.ArrowItem] = self._arrows[uid] + log.info( + f'Removing arrow from views\n' + f'uid: {uid!r}\n' + f'{arrow!r}\n' + ) for linked in self.godw.iter_linked(): linked.chart.plotItem.removeItem(arrow) + try: + arrows.remove(arrow) + except ValueError: + log.warning( + f'Arrow was already removed?\n' + f'uid: {uid!r}\n' + f'{arrow!r}\n' + ) + + def remove_all(self) -> set[pg.ArrowItem]: + ''' + Remove all arrows added by this editor from all + chart-views. + + ''' + for uid, arrows in self._arrows.items(): + for arrow in arrows: + self.remove(arrow) class LineEditor(Struct): @@ -266,6 +323,9 @@ class LineEditor(Struct): return lines + # compat with ArrowEditor + remove = remove_line + def as_point( pair: Sequence[float, float] | QPointF, @@ -298,7 +358,7 @@ class SelectRect(QtWidgets.QGraphicsRectItem): def __init__( self, viewbox: ViewBox, - color: str | None = None, + color: str|None = None, ) -> None: super().__init__(0, 0, 1, 1) @@ -614,3 +674,6 @@ class SelectRect(QtWidgets.QGraphicsRectItem): ): scen.removeItem(self._label_proxy) + + # compat with ArrowEditor + remove = delete From 51ca9cd4d9cfb71e1cfa296fde42e585d81d526d Mon Sep 17 00:00:00 2001 From: goodboy Date: Sun, 25 Jan 2026 22:06:59 -0500 Subject: [PATCH 13/44] Add arrow indicators to time gaps Such that they're easier to spot when zoomed out, a similar color to the `RectItem`s and also remote-controlled via the `AnnotCtl` api. Deats, - request an arrow per gap from `markup_gaps()` using a new `.add_arrow()` meth, set the color, direction and alpha with position always as the `iend`/close of the last valid bar. - extend the `.ui._remote_ctl` subys to support the above, * add a new `AnnotCtl.add_arrow()`. * add the service-side IPC endpoint for a 'cmd': 'ArrowEditor'. - add a new `rm_annot()` helper to ensure the right graphics removal API is used by annotation type: * `pg.ArrowItem` looks up the `ArrowEditor` and calls `.remove(annot). * `pg.SelectRect` keeps with calling `.delete()`. - global-ize an `_editors` table to enable the prior. - add an explicit RTE for races on the chart-actor's `_dss` init. --- piker/tsp/_annotate.py | 36 ++++++--- piker/ui/_remote_ctl.py | 172 +++++++++++++++++++++++++++++++++++----- 2 files changed, 177 insertions(+), 31 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index 797c38cf..70344b66 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -50,7 +50,6 @@ async def markup_gaps( ''' aids: dict[int] = {} for i in range(gaps.height): - row: pl.DataFrame = gaps[i] # the gap's RIGHT-most bar's OPEN value @@ -113,7 +112,6 @@ async def markup_gaps( await tractor.pause() istart: int = prev_r['index'][0] - # TODO: implement px-col width measure # and ensure at least as many px-cols # shown per rect as configured by user. @@ -125,25 +123,25 @@ async def markup_gaps( rect_gap: float = BGM*3/8 opn: float = row['open'][0] + cls: float = prev_r['close'][0] ro: tuple[float, float] = ( - # dt_end_t, iend + rect_gap + 1, opn, ) - cls: float = prev_r['close'][0] lc: tuple[float, float] = ( - # dt_start_t, istart - rect_gap, # + 1 , cls, ) - color: str = 'dad_blue' diff: float = cls - opn sgn: float = copysign(1, diff) - color: str = { - -1: 'buy_green', - 1: 'sell_red', - }[sgn] + + color: str = 'dad_blue' + # TODO? mks more sense to have up/down coloring? + # color: str = { + # -1: 'lilypad_green', # up-gap + # 1: 'wine', # down-gap + # }[sgn] rect_kwargs: dict[str, Any] = dict( fqme=fqme, @@ -153,9 +151,27 @@ async def markup_gaps( color=color, ) + # add up/down rects aid: int = await actl.add_rect(**rect_kwargs) assert aid aids[aid] = rect_kwargs + direction: str = ( + 'down' if sgn == 1 + else 'up' + ) + arrow_kwargs: dict[str, Any] = dict( + fqme=fqme, + timeframe=timeframe, + x=iend, + y=cls, + color=color, + alpha=160, + pointing=direction, + ) + + aid: int = await actl.add_arrow( + **arrow_kwargs + ) # tell chart to redraw all its # graphics view layers Bo diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py index 05e145e7..42f8a9b7 100644 --- a/piker/ui/_remote_ctl.py +++ b/piker/ui/_remote_ctl.py @@ -27,10 +27,12 @@ from contextlib import ( from functools import partial from pprint import pformat from typing import ( - # Any, AsyncContextManager, + Literal, ) +from uuid import uuid4 +import pyqtgraph as pg import tractor import trio from tractor import trionics @@ -49,11 +51,13 @@ from piker.ui.qt import ( ) from ._display import DisplayState from ._interaction import ChartView -from ._editors import SelectRect +from ._editors import ( + SelectRect, + ArrowEditor, +) from ._chart import ChartPlotWidget from ._dataviz import Viz - log = get_logger(__name__) # NOTE: this is UPDATED by the `._display.graphics_update_loop()` @@ -83,8 +87,34 @@ _ctxs: IpcCtxTable = {} # the "annotations server" which actually renders to a Qt canvas). # type AnnotsTable = dict[int, QGraphicsItem] AnnotsTable = dict[int, QGraphicsItem] +EditorsTable = dict[int, ArrowEditor] _annots: AnnotsTable = {} +_editors: EditorsTable = {} + +def rm_annot( + annot: ArrowEditor|SelectRect +) -> bool: + global _editors + match annot: + case pg.ArrowItem(): + editor = _editors[annot._uid] + editor.remove(annot) + # ^TODO? only remove each arrow or all? + # if editor._arrows: + # editor.remove_all() + # else: + # log.warning( + # f'Annot already removed!\n' + # f'{annot!r}\n' + # ) + return True + + case SelectRect(): + annot.delete() + return True + + return False async def serve_rc_annots( @@ -95,6 +125,12 @@ async def serve_rc_annots( annots: AnnotsTable, ) -> None: + ''' + A small viz(ualization) server for remote ctl of chart + annotations. + + ''' + global _editors async for msg in annot_req_stream: match msg: case { @@ -104,7 +140,6 @@ async def serve_rc_annots( 'meth': str(meth), 'kwargs': dict(kwargs), }: - ds: DisplayState = _dss[fqme] chart: ChartPlotWidget = { 60: ds.hist_chart, @@ -136,15 +171,67 @@ async def serve_rc_annots( aids.add(aid) await annot_req_stream.send(aid) + case { + 'cmd': 'ArrowEditor', + 'fqme': fqme, + 'timeframe': timeframe, + 'meth': 'add'|'remove' as meth, + 'kwargs': { + 'x': float(x), + 'y': float(y), + 'pointing': pointing, + 'color': color, + 'aid': str()|None as aid, + 'alpha': int(alpha), + }, + # ?TODO? split based on method fn-sigs? + # 'pointing', + }: + ds: DisplayState = _dss[fqme] + chart: ChartPlotWidget = { + 60: ds.hist_chart, + 1: ds.chart, + }[timeframe] + cv: ChartView = chart.cv + godw = chart.linked.godwidget + + arrows = ArrowEditor(godw=godw) + # `.add/.remove()` API + if meth != 'add': + # await tractor.pause() + raise ValueError( + f'Invalid arrow-edit request ?\n' + f'{msg!r}\n' + ) + + aid: str = str(uuid4()) + arrow: pg.ArrowItem = arrows.add( + plot=chart.plotItem, + uid=aid, + x=x, + y=y, + pointing=pointing, + color=color, + alpha=alpha, + ) + annots[aid] = arrow + _editors[aid] = arrows + aids: set[int] = ctxs[ipc_key][1] + aids.add(aid) + await annot_req_stream.send(aid) + + # TODO, use `pg.TextItem` to put a humaized + # time label beside the arrows + case { 'cmd': 'remove', - 'aid': int(aid), + 'aid': int(aid)|str(aid), }: # NOTE: this is normally entered on # a client's annotation de-alloc normally # prior to detach or modify. annot: QGraphicsItem = annots[aid] - annot.delete() + assert rm_annot(annot) # respond to client indicating annot # was indeed deleted. @@ -188,6 +275,12 @@ async def remote_annotate( ) -> None: global _dss, _ctxs + if not _dss: + raise RuntimeError( + 'Race condition on chart-init state ??\n' + 'Anoter actor is trying to annoate this chart ' + 'before it has fully spawned.\n' + ) assert _dss _ctxs[ctx.cid] = (ctx, set()) @@ -212,7 +305,7 @@ async def remote_annotate( assert _ctx is ctx for aid in aids: annot: QGraphicsItem = _annots[aid] - annot.delete() + assert rm_annot(annot) class AnnotCtl(Struct): @@ -334,20 +427,55 @@ class AnnotCtl(Struct): 'timeframe': timeframe, }) - # TODO: do we even need this? - # async def modify( - # self, - # aid: int, # annotation id - # meth: str, # far end graphics object method to invoke - # params: dict[str, Any], # far end `meth(**kwargs)` - # ) -> bool: - # ''' - # Modify an existing (remote) annotation's graphics - # paramters, thus changing it's appearance / state in real - # time. + async def add_arrow( + self, + fqme: str, + timeframe: float, + x: float, + y: float, + pointing: Literal[ + 'up', + 'down', + ], + # TODO: a `Literal['view', 'scene']` for this? + # domain: str = 'view', # or 'scene' + color: str = 'dad_blue', + alpha: int = 116, - # ''' - # raise NotImplementedError + from_acm: bool = False, + + ) -> int: + ''' + Add a `SelectRect` annotation to the target view, return + the instances `id(obj)` from the remote UI actor. + + ''' + ipc: MsgStream = self._get_ipc(fqme) + await ipc.send({ + 'fqme': fqme, + 'cmd': 'ArrowEditor', + 'timeframe': timeframe, + # 'meth': str(meth), + 'meth': 'add', + 'kwargs': { + 'x': float(x), + 'y': float(y), + 'color': color, + 'pointing': pointing, # up|down + 'alpha': alpha, + 'aid': None, + }, + }) + aid: int = await ipc.receive() + self._ipcs[aid] = ipc + if not from_acm: + self._annot_stack.push_async_callback( + partial( + self.remove, + aid, + ) + ) + return aid @acm @@ -374,7 +502,9 @@ async def open_annot_ctl( # TODO: print the current discoverable actor UID set # here as well? if not maybe_portals: - raise RuntimeError('No chart UI actors found in service domain?') + raise RuntimeError( + 'No chart actors found in service domain?' + ) for portal in maybe_portals: ctx_mngrs.append( From 11e95d9cbf9fc8b1c37a4089b651e9bee35de71b Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 13:17:28 -0500 Subject: [PATCH 14/44] Catch too-early ib hist frames For now by REPLing them and raising an RTE inside `.ib.feed` as well as tracing any such cases that make it (from other providers) up to the `.tsp._history` layer during null-segment backfilling. --- piker/brokers/ib/feed.py | 32 ++++++++++++++++++++++++++++---- piker/tsp/_history.py | 7 +++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py index 51305ced..c4f91533 100644 --- a/piker/brokers/ib/feed.py +++ b/piker/brokers/ib/feed.py @@ -178,8 +178,8 @@ async def open_history_client( async def get_hist( timeframe: float, - end_dt: datetime | None = None, - start_dt: datetime | None = None, + end_dt: datetime|None = None, + start_dt: datetime|None = None, ) -> tuple[np.ndarray, str]: @@ -262,7 +262,29 @@ async def open_history_client( vlm = bars_array['volume'] vlm[vlm < 0] = 0 - return bars_array, first_dt, last_dt + # XXX, if a start-limit was passed ensure we only + # return history that far back! + if ( + start_dt + and + last_dt < start_dt + ): + bars_array = bars_array[ + bars_array['time'] >= start_dt.timestamp() + ] + # TODO! rm this once we're more confident it never hits! + breakpoint() + raise RuntimeError( + f'OHLC-bars array start is gt `start_dt` limit !!\n' + f'start_dt: {start_dt}\n' + f'last_dt: {last_dt}\n' + ) + + return ( + bars_array, + first_dt, + last_dt, + ) # TODO: it seems like we can do async queries for ohlc # but getting the order right still isn't working and I'm not @@ -397,7 +419,7 @@ async def get_bars( # blank to start which tells ib to look up the latest datum end_dt: str = '', - start_dt: str | None = '', + start_dt: str|None = '', # TODO: make this more dynamic based on measured frame rx latency? # how long before we trigger a feed reset (seconds) @@ -451,6 +473,8 @@ async def get_bars( dt_duration, ) = await proxy.bars( fqme=fqme, + # XXX TODO! lol we're not using this.. + # start_dt=start_dt, end_dt=end_dt, sample_period_s=timeframe, diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index 361b0e23..e11f967d 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -242,6 +242,13 @@ async def maybe_fill_null_segments( end_dt=end_dt, ) + if ( + from_timestamp( + array['time'][0] + ) < backfill_until_dt + ): + await tractor.pause() + # XXX TODO: pretty sure if i plot tsla, btcusdt.binance # and mnq.cme.ib this causes a Qt crash XXDDD From 4ef5a5beb885c3bad882536ecca983dab90ad80c Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 13:33:13 -0500 Subject: [PATCH 15/44] Space gap rect-annots "between" start-end bars --- piker/tsp/_annotate.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index 70344b66..b42b317a 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -121,21 +121,24 @@ async def markup_gaps( # iend += margin # istart -= margin - rect_gap: float = BGM*3/8 opn: float = row['open'][0] cls: float = prev_r['close'][0] - ro: tuple[float, float] = ( - iend + rect_gap + 1, - opn, - ) + + # BGM=0.16 is the normal diff from overlap between bars, SO + # just go slightly "in" from that "between them". + from_idx: int = BGM - .06 # = .10 + lc: tuple[float, float] = ( - istart - rect_gap, # + 1 , + istart + 1 - from_idx, cls, ) + ro: tuple[float, float] = ( + iend + from_idx, + opn, + ) diff: float = cls - opn sgn: float = copysign(1, diff) - color: str = 'dad_blue' # TODO? mks more sense to have up/down coloring? # color: str = { From e6c7834a011655dc4cc7f0c251f39a2937ee3a93 Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 13:33:46 -0500 Subject: [PATCH 16/44] Add break for single bar null segments --- piker/tsp/_anal.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/piker/tsp/_anal.py b/piker/tsp/_anal.py index 26c3740e..bcf20a26 100644 --- a/piker/tsp/_anal.py +++ b/piker/tsp/_anal.py @@ -275,6 +275,10 @@ def get_null_segs( # diff of abs index steps between each zeroed row absi_zdiff: np.ndarray = np.diff(absi_zeros) + if zero_t.size < 2: + breakpoint() + return None + # scan for all frame-indices where the # zeroed-row-abs-index-step-diff is greater then the # expected increment of 1. @@ -434,8 +438,8 @@ def get_null_segs( def iter_null_segs( timeframe: float, - frame: Frame | None = None, - null_segs: tuple | None = None, + frame: Frame|None = None, + null_segs: tuple|None = None, ) -> Generator[ tuple[ @@ -487,7 +491,8 @@ def iter_null_segs( start_dt = None if ( absi_start is not None - and start_t != 0 + and + start_t != 0 ): fi_start: int = absi_start - absi_first start_row: Seq = frame[fi_start] @@ -501,8 +506,8 @@ def iter_null_segs( yield ( absi_start, absi_end, # abs indices fi_start, fi_end, # relative "frame" indices - start_t, end_t, - start_dt, end_dt, + start_t, end_t, # epoch times + start_dt, end_dt, # dts ) From 33ec37a83f8c354da63ff9bd0b24cf248e794af5 Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 14:16:51 -0500 Subject: [PATCH 17/44] Add `pexpect`, `xonsh`@github:main to deps The former bc `claude` needs it for its new "offline" REPL simulator script `snippets/claude_debug_helper.py` and pin to `xonsh` git mainline to get the fancy new next cmd/suggestion prompt feats (which @goodboy is using from `modden` already). Bump lock file to match. Ah right, and for now while hackin pin to a local `tractor` Bp --- pyproject.toml | 14 +++++++-- uv.lock | 85 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 84 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d8b28257..dcd489d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,7 +116,6 @@ uis = [ dev = [ # https://docs.astral.sh/uv/concepts/projects/dependencies/#development-dependencies "cython >=3.0.0, <4.0.0", - # nested deps-groups # https://docs.astral.sh/uv/concepts/projects/dependencies/#nesting-groups {include-group = 'uis'}, @@ -134,6 +133,10 @@ repl = [ "prompt-toolkit ==3.0.40", "pyperclip>=1.9.0", + # for @claude's `snippets/claude_debug_helper.py` it uses to do + # "offline" debug/crash REPL-in alongside a dev. + "pexpect>=4.9.0", + # ?TODO, new stuff to consider.. # "visidata" # console numerics # "xxh" # for remote `xonsh`-ing @@ -191,10 +194,15 @@ pyqtgraph = { git = "https://github.com/pikers/pyqtgraph.git" } tomlkit = { git = "https://github.com/pikers/tomlkit.git", branch ="piker_pin" } pyvnc = { git = "https://github.com/regulad/pyvnc.git" } +# to get fancy next-cmd/suggestion feats prior to 0.22.2 B) +# https://github.com/xonsh/xonsh/pull/6037 +# https://github.com/xonsh/xonsh/pull/6048 +xonsh = { git = 'https://github.com/xonsh/xonsh.git', branch = 'main' } + # XXX since, we're like, always hacking new shite all-the-time. Bp -tractor = { git = "https://github.com/goodboy/tractor.git", branch ="piker_pin" } +# tractor = { git = "https://github.com/goodboy/tractor.git", branch ="piker_pin" } # tractor = { git = "https://pikers.dev/goodboy/tractor", branch = "piker_pin" } # tractor = { git = "https://pikers.dev/goodboy/tractor", branch = "main" } # ------ goodboy ------ # hackin dev-envs, usually there's something new he's hackin in.. -# tractor = { path = "../tractor", editable = true } +tractor = { path = "../tractor", editable = true } diff --git a/uv.lock b/uv.lock index e0f3d7fd..1d96ab57 100644 --- a/uv.lock +++ b/uv.lock @@ -1000,6 +1000,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/23/e98758924d1b3aac11a626268eabf7f3cf177e7837c28d47bf84c64532d0/pendulum-3.1.0-py3-none-any.whl", hash = "sha256:f9178c2a8e291758ade1e8dd6371b1d26d08371b4c7730a6e9a3ef8b16ebae0f", size = 111799, upload-time = "2025-04-19T14:02:34.739Z" }, ] +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, +] + [[package]] name = "piker" version = "0.1.0a0.dev0" @@ -1047,6 +1059,7 @@ dev = [ { name = "greenback" }, { name = "i3ipc" }, { name = "pdbp" }, + { name = "pexpect" }, { name = "prompt-toolkit" }, { name = "pyperclip" }, { name = "pyqt6" }, @@ -1062,6 +1075,7 @@ lint = [ repl = [ { name = "greenback" }, { name = "pdbp" }, + { name = "pexpect" }, { name = "prompt-toolkit" }, { name = "pyperclip" }, { name = "xonsh" }, @@ -1099,7 +1113,7 @@ requires-dist = [ { name = "tomli", specifier = ">=2.0.1,<3.0.0" }, { name = "tomli-w", specifier = ">=1.0.0,<2.0.0" }, { name = "tomlkit", git = "https://github.com/pikers/tomlkit.git?branch=piker_pin" }, - { name = "tractor", git = "https://github.com/goodboy/tractor.git?branch=piker_pin" }, + { name = "tractor", editable = "../tractor" }, { name = "trio", specifier = ">=0.27" }, { name = "trio-typing", specifier = ">=0.10.0" }, { name = "trio-util", specifier = ">=0.7.0,<0.8.0" }, @@ -1116,6 +1130,7 @@ dev = [ { name = "greenback", specifier = ">=1.1.1,<2.0.0" }, { name = "i3ipc", specifier = ">=2.2.1" }, { name = "pdbp", specifier = ">=1.8.2,<2.0.0" }, + { name = "pexpect", specifier = ">=4.9.0" }, { name = "prompt-toolkit", specifier = "==3.0.40" }, { name = "pyperclip", specifier = ">=1.9.0" }, { name = "pyqt6", specifier = ">=6.7.0,<7.0.0" }, @@ -1123,15 +1138,16 @@ dev = [ { name = "pytest" }, { name = "qdarkstyle", specifier = ">=3.0.2,<4.0.0" }, { name = "rapidfuzz", specifier = ">=3.2.0,<4.0.0" }, - { name = "xonsh" }, + { name = "xonsh", git = "https://github.com/xonsh/xonsh.git?branch=main" }, ] lint = [{ name = "ruff", specifier = ">=0.9.6" }] repl = [ { name = "greenback", specifier = ">=1.1.1,<2.0.0" }, { name = "pdbp", specifier = ">=1.8.2,<2.0.0" }, + { name = "pexpect", specifier = ">=4.9.0" }, { name = "prompt-toolkit", specifier = "==3.0.40" }, { name = "pyperclip", specifier = ">=1.9.0" }, - { name = "xonsh" }, + { name = "xonsh", git = "https://github.com/xonsh/xonsh.git?branch=main" }, ] testing = [{ name = "pytest" }] uis = [ @@ -1297,6 +1313,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" }, +] + [[package]] name = "pyarrow" version = "22.0.0" @@ -1843,7 +1868,7 @@ source = { git = "https://github.com/pikers/tomlkit.git?branch=piker_pin#8e0239a [[package]] name = "tractor" version = "0.1.0a6.dev0" -source = { git = "https://github.com/goodboy/tractor.git?branch=piker_pin#e232d9dd06f41b8dca997f0647f2083d27cc34f2" } +source = { editable = "../tractor" } dependencies = [ { name = "bidict" }, { name = "cffi" }, @@ -1856,6 +1881,48 @@ dependencies = [ { name = "wrapt" }, ] +[package.metadata] +requires-dist = [ + { name = "bidict", specifier = ">=0.23.1" }, + { name = "cffi", specifier = ">=1.17.1" }, + { name = "colorlog", specifier = ">=6.8.2,<7" }, + { name = "msgspec", specifier = ">=0.19.0" }, + { name = "pdbp", specifier = ">=1.8.2,<2" }, + { name = "platformdirs", specifier = ">=4.4.0" }, + { name = "tricycle", specifier = ">=0.4.1,<0.5" }, + { name = "trio", specifier = ">0.27" }, + { name = "wrapt", specifier = ">=1.16.0,<2" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "greenback", specifier = ">=1.2.1,<2" }, + { name = "pexpect", specifier = ">=4.9.0,<5" }, + { name = "prompt-toolkit", specifier = ">=3.0.50" }, + { name = "psutil", specifier = ">=7.0.0" }, + { name = "pyperclip", specifier = ">=1.9.0" }, + { name = "pytest", specifier = ">=8.3.5" }, + { name = "stackscope", specifier = ">=0.2.2,<0.3" }, + { name = "typing-extensions", specifier = ">=4.14.1" }, + { name = "xonsh", specifier = ">=0.19.2" }, +] +devx = [ + { name = "greenback", specifier = ">=1.2.1,<2" }, + { name = "stackscope", specifier = ">=0.2.2,<0.3" }, + { name = "typing-extensions", specifier = ">=4.14.1" }, +] +lint = [{ name = "ruff", specifier = ">=0.9.6" }] +repl = [ + { name = "prompt-toolkit", specifier = ">=3.0.50" }, + { name = "psutil", specifier = ">=7.0.0" }, + { name = "pyperclip", specifier = ">=1.9.0" }, + { name = "xonsh", specifier = ">=0.19.2" }, +] +testing = [ + { name = "pexpect", specifier = ">=4.9.0,<5" }, + { name = "pytest", specifier = ">=8.3.5" }, +] + [[package]] name = "tricycle" version = "0.4.1" @@ -2095,14 +2162,8 @@ wheels = [ [[package]] name = "xonsh" -version = "0.20.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/56/af/7e2ba3885da44cbe03c7ff46f90ea917ba10d91dc74d68604001ea28055f/xonsh-0.20.0.tar.gz", hash = "sha256:d44a50ee9f288ff96bd0456f0a38988ef6d4985637140ea793beeef5ec5d2d38", size = 811907, upload-time = "2025-11-24T07:50:50.847Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/db/1c5c057c0b2a89b8919477726558685720ae0849ea1a98a3803e93550824/xonsh-0.20.0-py311-none-any.whl", hash = "sha256:65d27ba31d558f79010d6c652751449fd3ed4df1f1eda78040a6427fa0a0f03e", size = 646312, upload-time = "2025-11-24T07:50:49.488Z" }, - { url = "https://files.pythonhosted.org/packages/d2/a2/d6f7534f31489a4b8b54bd2a2496248f86f7c21a6a6ce9bfdcdd389fe4e7/xonsh-0.20.0-py312-none-any.whl", hash = "sha256:3148900e67b9c2796bef6f2eda003b0a64d4c6f50a0db23324f786d9e1af9353", size = 646323, upload-time = "2025-11-24T07:50:43.028Z" }, - { url = "https://files.pythonhosted.org/packages/bd/48/bcb1e4d329c3d522bc29b066b0b6ee86938ec392376a29c36fac0ad1c586/xonsh-0.20.0-py313-none-any.whl", hash = "sha256:c83daaf6eb2960180fc5a507459dbdf6c0d6d63e1733c43f4e43db77255c7278", size = 646830, upload-time = "2025-11-24T07:50:45.078Z" }, -] +version = "0.22.1" +source = { git = "https://github.com/xonsh/xonsh.git?branch=main#336658ff0919f8d7bb96d581136d37d470a8fe99" } [[package]] name = "yapic-json" From b1d6c595ecdc3b6b8589e95c55b612c7d5777db4 Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 16:20:23 -0500 Subject: [PATCH 18/44] Expose more `pg.ArrowItem` params thru annot-ctl API --- piker/tsp/_annotate.py | 9 +++++++-- piker/tsp/_history.py | 4 ++-- piker/ui/_editors.py | 23 +++++++++++++++++++---- piker/ui/_remote_ctl.py | 20 ++++++++++++++++++++ 4 files changed, 48 insertions(+), 8 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index b42b317a..f333d4a4 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -127,7 +127,6 @@ async def markup_gaps( # BGM=0.16 is the normal diff from overlap between bars, SO # just go slightly "in" from that "between them". from_idx: int = BGM - .06 # = .10 - lc: tuple[float, float] = ( istart + 1 - from_idx, cls, @@ -162,14 +161,20 @@ async def markup_gaps( 'down' if sgn == 1 else 'up' ) + # TODO! mk this a `msgspec.Struct` which we deserialize + # on the server side! arrow_kwargs: dict[str, Any] = dict( fqme=fqme, timeframe=timeframe, x=iend, y=cls, color=color, - alpha=160, + alpha=169, pointing=direction, + # TODO: expose these as params to markup_gaps()? + headLen=10, + headWidth=2.222, + pxMode=True, ) aid: int = await actl.add_arrow( diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index e11f967d..54cbb3b4 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -243,8 +243,8 @@ async def maybe_fill_null_segments( ) if ( - from_timestamp( - array['time'][0] + frame_start_dt := ( + from_timestamp(array['time'][0]) ) < backfill_until_dt ): await tractor.pause() diff --git a/piker/ui/_editors.py b/piker/ui/_editors.py index 686f9750..f8d3e68c 100644 --- a/piker/ui/_editors.py +++ b/piker/ui/_editors.py @@ -99,6 +99,11 @@ class ArrowEditor(Struct): ] = None, alpha: int = 255, zval: float = 1e9, + headLen: float|None = None, + headWidth: float|None = None, + tailLen: float|None = None, + tailWidth: float|None = None, + pxMode: bool = True, ) -> pg.ArrowItem: ''' @@ -114,6 +119,15 @@ class ArrowEditor(Struct): # scale arrow sizing to dpi-aware font size = _font.font.pixelSize() * 0.8 + # allow caller override of head dimensions + if headLen is None: + headLen = size + if headWidth is None: + headWidth = size/2 + # tail params default to None (no tail) + if tailWidth is None: + tailWidth = 3 + color = color or 'default' color = QColor(hcolor(color)) color.setAlpha(alpha) @@ -122,10 +136,11 @@ class ArrowEditor(Struct): arrow = pg.ArrowItem( angle=angle, baseAngle=0, - headLen=size, - headWidth=size/2, - tailLen=None, - pxMode=True, + headLen=headLen, + headWidth=headWidth, + tailLen=tailLen, + tailWidth=tailWidth, + pxMode=pxMode, # coloring pen=pen, brush=brush, diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py index 42f8a9b7..c1153e2b 100644 --- a/piker/ui/_remote_ctl.py +++ b/piker/ui/_remote_ctl.py @@ -183,6 +183,11 @@ async def serve_rc_annots( 'color': color, 'aid': str()|None as aid, 'alpha': int(alpha), + 'headLen': int()|float()|None as headLen, + 'headWidth': int()|float()|None as headWidth, + 'tailLen': int()|float()|None as tailLen, + 'tailWidth': int()|float()|None as tailWidth, + 'pxMode': bool(pxMode), }, # ?TODO? split based on method fn-sigs? # 'pointing', @@ -213,6 +218,11 @@ async def serve_rc_annots( pointing=pointing, color=color, alpha=alpha, + headLen=headLen, + headWidth=headWidth, + tailLen=tailLen, + tailWidth=tailWidth, + pxMode=pxMode, ) annots[aid] = arrow _editors[aid] = arrows @@ -441,6 +451,11 @@ class AnnotCtl(Struct): # domain: str = 'view', # or 'scene' color: str = 'dad_blue', alpha: int = 116, + headLen: float|None = None, + headWidth: float|None = None, + tailLen: float|None = None, + tailWidth: float|None = None, + pxMode: bool = True, from_acm: bool = False, @@ -464,6 +479,11 @@ class AnnotCtl(Struct): 'pointing': pointing, # up|down 'alpha': alpha, 'aid': None, + 'headLen': headLen, + 'headWidth': headWidth, + 'tailLen': tailLen, + 'tailWidth': tailWidth, + 'pxMode': pxMode, }, }) aid: int = await ipc.receive() From 7ddcf5893eb840e6564a46e974888a0b2de087da Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 16:21:19 -0500 Subject: [PATCH 19/44] Lul, woops compare against first-dt in `.ib.feed` bars frame.. --- piker/brokers/ib/feed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py index c4f91533..4bb3c44d 100644 --- a/piker/brokers/ib/feed.py +++ b/piker/brokers/ib/feed.py @@ -267,7 +267,7 @@ async def open_history_client( if ( start_dt and - last_dt < start_dt + first_dt < start_dt ): bars_array = bars_array[ bars_array['time'] >= start_dt.timestamp() From e3d7077f180cf44ee3afec860c66a23340835c5a Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 17:36:33 -0500 Subject: [PATCH 20/44] ib._util: ignore timeout-errs when crash-handling `pyvnc` connects --- piker/brokers/ib/_util.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/piker/brokers/ib/_util.py b/piker/brokers/ib/_util.py index 00b2d233..ef4ea15a 100644 --- a/piker/brokers/ib/_util.py +++ b/piker/brokers/ib/_util.py @@ -250,7 +250,9 @@ async def vnc_click_hack( 'connection': 'r' }[reset_type] - with tractor.devx.open_crash_handler(): + with tractor.devx.open_crash_handler( + ignore={TimeoutError,}, + ): client = await AsyncVNCClient.connect( VNCConfig( host=host, From 40cbc8546d0cb13143c188ca5b3fc49f7572dfbd Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 17:37:25 -0500 Subject: [PATCH 21/44] .ib.feed: trim bars frame to `start_dt` --- piker/brokers/ib/feed.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py index 4bb3c44d..14803144 100644 --- a/piker/brokers/ib/feed.py +++ b/piker/brokers/ib/feed.py @@ -269,16 +269,25 @@ async def open_history_client( and first_dt < start_dt ): - bars_array = bars_array[ + trimmed_bars = bars_array[ bars_array['time'] >= start_dt.timestamp() ] - # TODO! rm this once we're more confident it never hits! - breakpoint() - raise RuntimeError( - f'OHLC-bars array start is gt `start_dt` limit !!\n' - f'start_dt: {start_dt}\n' - f'last_dt: {last_dt}\n' - ) + if ( + trimmed_first_dt := from_timestamp(trimmed_bars['time'][0]) + != + start_dt + ): + # TODO! rm this once we're more confident it never hits! + breakpoint() + raise RuntimeError( + f'OHLC-bars array start is gt `start_dt` limit !!\n' + f'start_dt: {start_dt}\n' + f'first_dt: {first_dt}\n' + f'trimmed_first_dt: {trimmed_first_dt}\n' + ) + + # XXX, overwrite with start_dt-limited frame + bars_array = trimmed_bars return ( bars_array, From fe11f79f2107c60af4dac7fe589fea69ba404650 Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 20:51:21 -0500 Subject: [PATCH 22/44] Add humanized duration labels to gap annotations Introduce `humanize_duration()` helper in `.tsp._annotate` to convert seconds to short human-readable format (d/h/m/s). Extend annot-ctl API with `add_text()` method for placing `pg.TextItem` labels on charts. Also, - add duration labels on RHS of gap arrows in `markup_gaps()` - handle text item removal in `rm_annot()` match block - expose `TextItem` cmd in `serve_rc_annots()` IPC handler - use `hcolor()` for named-to-hex color conversion - set anchor positioning for up vs down gaps (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_annotate.py | 66 ++++++++++++++++++++++++++++++ piker/ui/_remote_ctl.py | 90 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 153 insertions(+), 3 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index f333d4a4..fa9f04c3 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -35,6 +35,51 @@ if TYPE_CHECKING: from piker.ui._remote_ctl import AnnotCtl +def humanize_duration( + seconds: float, +) -> str: + ''' + Convert duration in seconds to short human-readable form. + + Uses smallest appropriate time unit: + - d: days + - h: hours + - m: minutes + - s: seconds + + Examples: + - 86400 -> "1d" + - 28800 -> "8h" + - 180 -> "3m" + - 45 -> "45s" + + ''' + abs_secs: float = abs(seconds) + + if abs_secs >= 86400: + days: float = abs_secs / 86400 + if days >= 10: + return f'{int(days)}d' + return f'{days:.1f}d' + + elif abs_secs >= 3600: + hours: float = abs_secs / 3600 + if hours >= 10: + return f'{int(hours)}h' + return f'{hours:.1f}h' + + elif abs_secs >= 60: + mins: float = abs_secs / 60 + if mins >= 10: + return f'{int(mins)}m' + return f'{mins:.1f}m' + + else: + if abs_secs >= 10: + return f'{int(abs_secs)}s' + return f'{abs_secs:.1f}s' + + async def markup_gaps( fqme: str, timeframe: float, @@ -124,6 +169,10 @@ async def markup_gaps( opn: float = row['open'][0] cls: float = prev_r['close'][0] + # get gap duration for humanized label + gap_dur_s: float = row['s_diff'][0] + gap_label: str = humanize_duration(gap_dur_s) + # BGM=0.16 is the normal diff from overlap between bars, SO # just go slightly "in" from that "between them". from_idx: int = BGM - .06 # = .10 @@ -181,6 +230,23 @@ async def markup_gaps( **arrow_kwargs ) + # add duration label to RHS of arrow + if sgn == -1: # up-gap + anchor = (0, 0) # XXX, i dun get dese dims.. XD + else: # down-gap + anchor = (0, 1) # XXX y, x? + + text_aid: int = await actl.add_text( + fqme=fqme, + timeframe=timeframe, + text=gap_label, + x=iend + 1, + y=cls, + color=color, + anchor=anchor, + ) + aids[text_aid] = {'text': gap_label} + # tell chart to redraw all its # graphics view layers Bo await actl.redraw( diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py index c1153e2b..ccea90e1 100644 --- a/piker/ui/_remote_ctl.py +++ b/piker/ui/_remote_ctl.py @@ -48,6 +48,7 @@ from piker.service import find_service from piker.brokers import SymbolNotFound from piker.ui.qt import ( QGraphicsItem, + QColor, ) from ._display import DisplayState from ._interaction import ChartView @@ -57,6 +58,7 @@ from ._editors import ( ) from ._chart import ChartPlotWidget from ._dataviz import Viz +from ._style import hcolor log = get_logger(__name__) @@ -93,7 +95,7 @@ _annots: AnnotsTable = {} _editors: EditorsTable = {} def rm_annot( - annot: ArrowEditor|SelectRect + annot: ArrowEditor|SelectRect|pg.TextItem ) -> bool: global _editors match annot: @@ -114,6 +116,12 @@ def rm_annot( annot.delete() return True + case pg.TextItem(): + scene = annot.scene() + if scene: + scene.removeItem(annot) + return True + return False @@ -230,8 +238,41 @@ async def serve_rc_annots( aids.add(aid) await annot_req_stream.send(aid) - # TODO, use `pg.TextItem` to put a humaized - # time label beside the arrows + case { + 'cmd': 'TextItem', + 'fqme': fqme, + 'timeframe': timeframe, + 'kwargs': { + 'text': str(text), + 'x': int()|float() as x, + 'y': int()|float() as y, + 'color': color, + 'anchor': list(anchor), + }, + }: + ds: DisplayState = _dss[fqme] + chart: ChartPlotWidget = { + 60: ds.hist_chart, + 1: ds.chart, + }[timeframe] + + # convert named color to hex + color_hex: str = hcolor(color) + + # create text item + text_item: pg.TextItem = pg.TextItem( + text=text, + color=color_hex, + anchor=anchor, + ) + text_item.setPos(x, y) + chart.plotItem.addItem(text_item) + + aid: str = str(uuid4()) + annots[aid] = text_item + aids: set[int] = ctxs[ipc_key][1] + aids.add(aid) + await annot_req_stream.send(aid) case { 'cmd': 'remove', @@ -497,6 +538,49 @@ class AnnotCtl(Struct): ) return aid + async def add_text( + self, + fqme: str, + timeframe: float, + text: str, + x: float, + y: float, + color: str|tuple = 'dad_blue', + anchor: tuple[float, float] = (0, 1), + + from_acm: bool = False, + + ) -> int: + ''' + Add a `pg.TextItem` annotation to the target view. + + anchor: (x, y) where (0,0) is upper-left, (1,1) is lower-right + + ''' + ipc: MsgStream = self._get_ipc(fqme) + await ipc.send({ + 'fqme': fqme, + 'cmd': 'TextItem', + 'timeframe': timeframe, + 'kwargs': { + 'text': text, + 'x': float(x), + 'y': float(y), + 'color': color, + 'anchor': tuple(anchor), + }, + }) + aid: int = await ipc.receive() + self._ipcs[aid] = ipc + if not from_acm: + self._annot_stack.push_async_callback( + partial( + self.remove, + aid, + ) + ) + return aid + @acm async def open_annot_ctl( From 4e7e4a7a1b40a2db435c762d241749231444b5ee Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 20:53:10 -0500 Subject: [PATCH 23/44] Add `font_size` param to `AnnotCtl.add_text()` API Expose font sizing control for `pg.TextItem` annotations thru the annot-ctl API. Default to `_font.font.pixelSize() - 3` when no size provided. Also, - thread `font_size` param thru IPC handler in `serve_rc_annots()` - apply font via `QFont.setPixelSize()` on text item creation - add `?TODO` note in `markup_gaps()` re using `conf.toml` value - update `add_text()` docstring with font_size param desc (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_annotate.py | 10 ++++++++++ piker/ui/_remote_ctl.py | 21 ++++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index fa9f04c3..c2c6e9a1 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -30,6 +30,7 @@ import tractor from piker.data._formatters import BGM from piker.storage import log +# from piker.ui._style import _font if TYPE_CHECKING: from piker.ui._remote_ctl import AnnotCtl @@ -236,6 +237,14 @@ async def markup_gaps( else: # down-gap anchor = (0, 1) # XXX y, x? + # ?TODO? why returning -1 !? + # [ ] use conf.toml value instead! + # + # font_size: int = _font.font.pixelSize() - 10 + # await tractor.pause() + # assert isinstance(font_size, int) + font_size = None + text_aid: int = await actl.add_text( fqme=fqme, timeframe=timeframe, @@ -244,6 +253,7 @@ async def markup_gaps( y=cls, color=color, anchor=anchor, + font_size=font_size, ) aids[text_aid] = {'text': gap_label} diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py index ccea90e1..0fb2f2b7 100644 --- a/piker/ui/_remote_ctl.py +++ b/piker/ui/_remote_ctl.py @@ -48,8 +48,8 @@ from piker.service import find_service from piker.brokers import SymbolNotFound from piker.ui.qt import ( QGraphicsItem, - QColor, ) +from PyQt6.QtGui import QFont from ._display import DisplayState from ._interaction import ChartView from ._editors import ( @@ -248,6 +248,7 @@ async def serve_rc_annots( 'y': int()|float() as y, 'color': color, 'anchor': list(anchor), + 'font_size': int()|None as font_size, }, }: ds: DisplayState = _dss[fqme] @@ -264,7 +265,22 @@ async def serve_rc_annots( text=text, color=color_hex, anchor=anchor, + + # ?TODO, pin to github:main for this? + # legacy, can have scaling ish? + # ensureInBounds=True, ) + + # apply font size (default to DpiAwareFont if not + # provided) + if font_size is None: + from ._style import _font + font_size = _font.font.pixelSize() - 3 + + qfont: QFont = text_item.textItem.font() + qfont.setPixelSize(font_size) + text_item.setFont(qfont) + text_item.setPos(x, y) chart.plotItem.addItem(text_item) @@ -547,6 +563,7 @@ class AnnotCtl(Struct): y: float, color: str|tuple = 'dad_blue', anchor: tuple[float, float] = (0, 1), + font_size: int|None = None, from_acm: bool = False, @@ -555,6 +572,7 @@ class AnnotCtl(Struct): Add a `pg.TextItem` annotation to the target view. anchor: (x, y) where (0,0) is upper-left, (1,1) is lower-right + font_size: pixel size for font, defaults to `_font.font.pixelSize()` ''' ipc: MsgStream = self._get_ipc(fqme) @@ -568,6 +586,7 @@ class AnnotCtl(Struct): 'y': float(y), 'color': color, 'anchor': tuple(anchor), + 'font_size': font_size, }, }) aid: int = await ipc.receive() From 7964cc3cf4b85fb7701d840d6a839a6fd67b3580 Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 21:09:49 -0500 Subject: [PATCH 24/44] Drop decimal points for whole-number durations Adjust `humanize_duration()` to show "3h" instead of "3.0h" when the duration value is a whole number, making labels cleaner. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_annotate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index c2c6e9a1..9ef9bcfb 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -59,24 +59,24 @@ def humanize_duration( if abs_secs >= 86400: days: float = abs_secs / 86400 - if days >= 10: + if days >= 10 or days == int(days): return f'{int(days)}d' return f'{days:.1f}d' elif abs_secs >= 3600: hours: float = abs_secs / 3600 - if hours >= 10: + if hours >= 10 or hours == int(hours): return f'{int(hours)}h' return f'{hours:.1f}h' elif abs_secs >= 60: mins: float = abs_secs / 60 - if mins >= 10: + if mins >= 10 or mins == int(mins): return f'{int(mins)}m' return f'{mins:.1f}m' else: - if abs_secs >= 10: + if abs_secs >= 10 or abs_secs == int(abs_secs): return f'{int(abs_secs)}s' return f'{abs_secs:.1f}s' From 0845b257d9a48e53f6aae0207dfdc3cdee2c3641 Mon Sep 17 00:00:00 2001 From: goodboy Date: Tue, 27 Jan 2026 23:52:00 -0500 Subject: [PATCH 25/44] Add buffer capacity checks to backfill loop Prevent `ValueError` from negative prepend index in `start_backfill()` by checking buffer space before push attempts. Truncate incoming frame if needed and stop gracefully when buffer full. Also, - add pre-push capacity check with frame truncation logic - stop backfill when `next_prepend_index <= 0` - log warnings for capacity exceeded and buffer-full conditions (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_history.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index 54cbb3b4..d416c679 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -577,6 +577,25 @@ async def start_backfill( f'{next_start_dt} -> {last_start_dt}' ) + # Check if we're about to exceed buffer capacity BEFORE + # attempting the push + if next_prepend_index - ln < 0: + log.warning( + f'Backfill would exceed buffer capacity!\n' + f'next_prepend_index: {next_prepend_index}\n' + f'frame size: {ln}\n' + f'Truncating to fit remaining space..\n' + ) + # only push what fits + to_push = to_push[-(next_prepend_index):] + ln = len(to_push) + + if ln == 0: + log.warning( + 'No space left in buffer, stopping backfill!' + ) + break + # bail gracefully on shm allocation overrun/full # condition try: @@ -597,6 +616,14 @@ async def start_backfill( next_prepend_index = next_prepend_index - ln last_start_dt = next_start_dt + # Stop if we've hit buffer start + if next_prepend_index <= 0: + log.warning( + f'Reached buffer start (index={next_prepend_index}), ' + f'stopping backfill' + ) + break + except ValueError as ve: _ve = ve log.error( From 3f001cc1f65a9143743d7a4065b00674e279ff61 Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 28 Jan 2026 12:48:26 -0500 Subject: [PATCH 26/44] Do time-based shm-index lookup for annots on server Fix annotation misalignment during backfill by switching from client-computed indices to server-side timestamp lookups against current shm state. Store absolute coords on annotations and reposition on viz redraws. Lowlevel impl deats, - add `time` param to `.add_arrow()`, `.add_text()`, `.add_rect()` - lookup indices from shm via timestamp matching in IPC handlers - force chart redraw before `markup_gaps()` annotation creation - wrap IPC send/receive in `trio.fail_after(3)` for timeout when server fails to respond, likely hangs on no-case-match/error. - cache `_meth`/`_kwargs` on rects, `_abs_x`/`_abs_y` on arrows - auto-reposition all annotations after viz reset in redraw cmd Also, - handle `KeyError` for missing timeframes in chart lookup - return `-1` aid on annotation creation failures (lol oh `claude`..) - reconstruct rect positions from timestamps + BGM offset logic - log repositioned annotation counts on viz redraw (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_annotate.py | 25 ++- piker/ui/_remote_ctl.py | 353 ++++++++++++++++++++++++++++++---------- 2 files changed, 291 insertions(+), 87 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index 9ef9bcfb..f75cbf36 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -94,6 +94,15 @@ async def markup_gaps( with rectangles. ''' + # XXX: force chart redraw FIRST to ensure PlotItem coordinate + # system is properly initialized before we position annotations! + # Without this, annotations may be misaligned on first creation + # due to Qt/pyqtgraph initialization race conditions. + await actl.redraw( + fqme=fqme, + timeframe=timeframe, + ) + aids: dict[int] = {} for i in range(gaps.height): row: pl.DataFrame = gaps[i] @@ -101,6 +110,7 @@ async def markup_gaps( # the gap's RIGHT-most bar's OPEN value # at that time (sample) step. iend: int = row['index'][0] + # dt: datetime = row['dt'][0] # dt_prev: datetime = row['dt_prev'][0] # dt_end_t: float = dt.timestamp() @@ -174,6 +184,10 @@ async def markup_gaps( gap_dur_s: float = row['s_diff'][0] gap_label: str = humanize_duration(gap_dur_s) + # XXX: get timestamps for server-side index lookup + start_time: float = prev_r['time'][0] + end_time: float = row['time'][0] + # BGM=0.16 is the normal diff from overlap between bars, SO # just go slightly "in" from that "between them". from_idx: int = BGM - .06 # = .10 @@ -201,6 +215,8 @@ async def markup_gaps( start_pos=lc, end_pos=ro, color=color, + start_time=start_time, + end_time=end_time, ) # add up/down rects @@ -213,11 +229,15 @@ async def markup_gaps( ) # TODO! mk this a `msgspec.Struct` which we deserialize # on the server side! + # XXX: send timestamp for server-side index lookup + # to ensure alignment with current shm state + gap_time: float = row['time'][0] arrow_kwargs: dict[str, Any] = dict( fqme=fqme, timeframe=timeframe, - x=iend, + x=iend, # fallback if timestamp lookup fails y=cls, + time=gap_time, # for server-side index lookup color=color, alpha=169, pointing=direction, @@ -249,8 +269,9 @@ async def markup_gaps( fqme=fqme, timeframe=timeframe, text=gap_label, - x=iend + 1, + x=iend + 1, # fallback if timestamp lookup fails y=cls, + time=gap_time, # server-side index lookup color=color, anchor=anchor, font_size=font_size, diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py index 0fb2f2b7..fdaf54df 100644 --- a/piker/ui/_remote_ctl.py +++ b/piker/ui/_remote_ctl.py @@ -149,12 +149,72 @@ async def serve_rc_annots( 'kwargs': dict(kwargs), }: ds: DisplayState = _dss[fqme] - chart: ChartPlotWidget = { - 60: ds.hist_chart, - 1: ds.chart, - }[timeframe] + try: + chart: ChartPlotWidget = { + 60: ds.hist_chart, + 1: ds.chart, + }[timeframe] + except KeyError: + log.warning( + f'No chart for timeframe={timeframe}s, ' + f'skipping rect annotation' + ) + await annot_req_stream.send(-1) + continue cv: ChartView = chart.cv + # NEW: if timestamps provided, lookup current indices + # from shm to ensure alignment with current buffer + # state + start_time = kwargs.pop('start_time', None) + end_time = kwargs.pop('end_time', None) + if ( + start_time is not None + and end_time is not None + ): + viz: Viz = chart.get_viz(fqme) + shm = viz.shm + arr = shm.array + + # lookup start index + start_matches = arr[arr['time'] == start_time] + if len(start_matches) == 0: + log.error( + f'No shm entry for start_time=' + f'{start_time}, skipping rect' + ) + await annot_req_stream.send(-1) + continue + + # lookup end index + end_matches = arr[arr['time'] == end_time] + if len(end_matches) == 0: + log.error( + f'No shm entry for end_time={end_time}, ' + f'skipping rect' + ) + await annot_req_stream.send(-1) + continue + + # get close price from start bar, open from end + # bar + start_idx = float(start_matches[0]['index']) + end_idx = float(end_matches[0]['index']) + start_close = float(start_matches[0]['close']) + end_open = float(end_matches[0]['open']) + + # reconstruct start_pos and end_pos with + # looked-up indices + from_idx: float = 0.16 - 0.06 # BGM offset + kwargs['start_pos'] = ( + start_idx + 1 - from_idx, + start_close, + ) + kwargs['end_pos'] = ( + end_idx + from_idx, + end_open, + ) + # annot type lookup from cmd rect = SelectRect( viewbox=cv, @@ -173,6 +233,12 @@ async def serve_rc_annots( # delegate generically to the requested method getattr(rect, meth)(**kwargs) rect.show() + + # XXX: store absolute coords for repositioning + # during viz redraws (eg backfill updates) + rect._meth = meth + rect._kwargs = kwargs + aid: int = id(rect) annots[aid] = rect aids: set[int] = ctxs[ipc_key][1] @@ -196,18 +262,47 @@ async def serve_rc_annots( 'tailLen': int()|float()|None as tailLen, 'tailWidth': int()|float()|None as tailWidth, 'pxMode': bool(pxMode), + 'time': int()|float()|None as timestamp, }, # ?TODO? split based on method fn-sigs? # 'pointing', }: ds: DisplayState = _dss[fqme] - chart: ChartPlotWidget = { - 60: ds.hist_chart, - 1: ds.chart, - }[timeframe] + try: + chart: ChartPlotWidget = { + 60: ds.hist_chart, + 1: ds.chart, + }[timeframe] + except KeyError: + log.warning( + f'No chart for timeframe={timeframe}s, ' + f'skipping arrow annotation' + ) + # return -1 to indicate failure + await annot_req_stream.send(-1) + continue cv: ChartView = chart.cv godw = chart.linked.godwidget + # NEW: if timestamp provided, lookup current index + # from shm to ensure alignment with current buffer + # state + if timestamp is not None: + viz: Viz = chart.get_viz(fqme) + shm = viz.shm + arr = shm.array + # find index where time matches timestamp + matches = arr[arr['time'] == timestamp] + if len(matches) == 0: + log.error( + f'No shm entry for timestamp={timestamp}, ' + f'skipping arrow annotation' + ) + await annot_req_stream.send(-1) + continue + # use the matched row's index as x + x = float(matches[0]['index']) + arrows = ArrowEditor(godw=godw) # `.add/.remove()` API if meth != 'add': @@ -232,6 +327,11 @@ async def serve_rc_annots( tailWidth=tailWidth, pxMode=pxMode, ) + # XXX: store absolute coords for repositioning + # during viz redraws (eg backfill updates) + arrow._abs_x = x + arrow._abs_y = y + annots[aid] = arrow _editors[aid] = arrows aids: set[int] = ctxs[ipc_key][1] @@ -249,13 +349,42 @@ async def serve_rc_annots( 'color': color, 'anchor': list(anchor), 'font_size': int()|None as font_size, + 'time': int()|float()|None as timestamp, }, }: ds: DisplayState = _dss[fqme] - chart: ChartPlotWidget = { - 60: ds.hist_chart, - 1: ds.chart, - }[timeframe] + try: + chart: ChartPlotWidget = { + 60: ds.hist_chart, + 1: ds.chart, + }[timeframe] + except KeyError: + log.warning( + f'No chart for timeframe={timeframe}s, ' + f'skipping text annotation' + ) + await annot_req_stream.send(-1) + continue + + # NEW: if timestamp provided, lookup current index + # from shm to ensure alignment with current buffer + # state + if timestamp is not None: + viz: Viz = chart.get_viz(fqme) + shm = viz.shm + arr = shm.array + # find index where time matches timestamp + matches = arr[arr['time'] == timestamp] + if len(matches) == 0: + log.error( + f'No shm entry for timestamp={timestamp}, ' + f'skipping text annotation' + ) + await annot_req_stream.send(-1) + continue + # use the matched row's index as x, +1 for text + # offset + x = float(matches[0]['index']) + 1 # convert named color to hex color_hex: str = hcolor(color) @@ -284,6 +413,11 @@ async def serve_rc_annots( text_item.setPos(x, y) chart.plotItem.addItem(text_item) + # XXX: store absolute coords for repositioning + # during viz redraws (eg backfill updates) + text_item._abs_x = x + text_item._abs_y = y + aid: str = str(uuid4()) annots[aid] = text_item aids: set[int] = ctxs[ipc_key][1] @@ -329,6 +463,38 @@ async def serve_rc_annots( ) viz.reset_graphics() + # XXX: reposition all annotations to ensure they + # stay aligned with viz data after reset (eg during + # backfill when abs-index range changes) + n_repositioned: int = 0 + for aid, annot in annots.items(): + # arrows and text items use abs x,y coords + if ( + hasattr(annot, '_abs_x') + and + hasattr(annot, '_abs_y') + ): + annot.setPos( + annot._abs_x, + annot._abs_y, + ) + n_repositioned += 1 + + # rects use method + kwargs + elif ( + hasattr(annot, '_meth') + and + hasattr(annot, '_kwargs') + ): + getattr(annot, annot._meth)(**annot._kwargs) + n_repositioned += 1 + + if n_repositioned: + log.info( + f'Repositioned {n_repositioned} annotation(s) ' + f'after viz redraw' + ) + case _: log.error( 'Unknown remote annotation cmd:\n' @@ -417,6 +583,10 @@ class AnnotCtl(Struct): from_acm: bool = False, + # NEW: optional timestamps for server-side index lookup + start_time: float|None = None, + end_time: float|None = None, + ) -> int: ''' Add a `SelectRect` annotation to the target view, return @@ -424,29 +594,32 @@ class AnnotCtl(Struct): ''' ipc: MsgStream = self._get_ipc(fqme) - await ipc.send({ - 'fqme': fqme, - 'cmd': 'SelectRect', - 'timeframe': timeframe, - # 'meth': str(meth), - 'meth': 'set_view_pos' if domain == 'view' else 'set_scene_pos', - 'kwargs': { - 'start_pos': tuple(start_pos), - 'end_pos': tuple(end_pos), - 'color': color, - 'update_label': False, - }, - }) - aid: int = await ipc.receive() - self._ipcs[aid] = ipc - if not from_acm: - self._annot_stack.push_async_callback( - partial( - self.remove, - aid, + with trio.fail_after(3): + await ipc.send({ + 'fqme': fqme, + 'cmd': 'SelectRect', + 'timeframe': timeframe, + # 'meth': str(meth), + 'meth': 'set_view_pos' if domain == 'view' else 'set_scene_pos', + 'kwargs': { + 'start_pos': tuple(start_pos), + 'end_pos': tuple(end_pos), + 'color': color, + 'update_label': False, + 'start_time': start_time, + 'end_time': end_time, + }, + }) + aid: int = await ipc.receive() + self._ipcs[aid] = ipc + if not from_acm: + self._annot_stack.push_async_callback( + partial( + self.remove, + aid, + ) ) - ) - return aid + return aid async def remove( self, @@ -516,6 +689,9 @@ class AnnotCtl(Struct): from_acm: bool = False, + # NEW: optional timestamp for server-side index lookup + time: float|None = None, + ) -> int: ''' Add a `SelectRect` annotation to the target view, return @@ -523,36 +699,38 @@ class AnnotCtl(Struct): ''' ipc: MsgStream = self._get_ipc(fqme) - await ipc.send({ - 'fqme': fqme, - 'cmd': 'ArrowEditor', - 'timeframe': timeframe, - # 'meth': str(meth), - 'meth': 'add', - 'kwargs': { - 'x': float(x), - 'y': float(y), - 'color': color, - 'pointing': pointing, # up|down - 'alpha': alpha, - 'aid': None, - 'headLen': headLen, - 'headWidth': headWidth, - 'tailLen': tailLen, - 'tailWidth': tailWidth, - 'pxMode': pxMode, - }, - }) - aid: int = await ipc.receive() - self._ipcs[aid] = ipc - if not from_acm: - self._annot_stack.push_async_callback( - partial( - self.remove, - aid, + with trio.fail_after(3): + await ipc.send({ + 'fqme': fqme, + 'cmd': 'ArrowEditor', + 'timeframe': timeframe, + # 'meth': str(meth), + 'meth': 'add', + 'kwargs': { + 'x': float(x), + 'y': float(y), + 'color': color, + 'pointing': pointing, # up|down + 'alpha': alpha, + 'aid': None, + 'headLen': headLen, + 'headWidth': headWidth, + 'tailLen': tailLen, + 'tailWidth': tailWidth, + 'pxMode': pxMode, + 'time': time, # for server-side index lookup + }, + }) + aid: int = await ipc.receive() + self._ipcs[aid] = ipc + if not from_acm: + self._annot_stack.push_async_callback( + partial( + self.remove, + aid, + ) ) - ) - return aid + return aid async def add_text( self, @@ -567,6 +745,9 @@ class AnnotCtl(Struct): from_acm: bool = False, + # NEW: optional timestamp for server-side index lookup + time: float|None = None, + ) -> int: ''' Add a `pg.TextItem` annotation to the target view. @@ -576,29 +757,31 @@ class AnnotCtl(Struct): ''' ipc: MsgStream = self._get_ipc(fqme) - await ipc.send({ - 'fqme': fqme, - 'cmd': 'TextItem', - 'timeframe': timeframe, - 'kwargs': { - 'text': text, - 'x': float(x), - 'y': float(y), - 'color': color, - 'anchor': tuple(anchor), - 'font_size': font_size, - }, - }) - aid: int = await ipc.receive() - self._ipcs[aid] = ipc - if not from_acm: - self._annot_stack.push_async_callback( - partial( - self.remove, - aid, + with trio.fail_after(3): + await ipc.send({ + 'fqme': fqme, + 'cmd': 'TextItem', + 'timeframe': timeframe, + 'kwargs': { + 'text': text, + 'x': float(x), + 'y': float(y), + 'color': color, + 'anchor': tuple(anchor), + 'font_size': font_size, + 'time': time, # for server-side index lookup + }, + }) + aid: int = await ipc.receive() + self._ipcs[aid] = ipc + if not from_acm: + self._annot_stack.push_async_callback( + partial( + self.remove, + aid, + ) ) - ) - return aid + return aid @acm From b4944916c9ffa5aa46d3a3686ffcaca60b0f4237 Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 28 Jan 2026 14:43:52 -0500 Subject: [PATCH 27/44] Relay annot creation failures with err-dict resps Change annot-ctl APIs to return `None` on failure instead of invalid `aid`s. Server now sends `{'error': msg}` dict on failures, client match-blocks handle gracefully. Also, - update return types: `.add_rect()`, `.add_arrow()`, `.add_text()` now return `int|None` - match on `{'error': str(msg)}` in client IPC receive blocks - send error dicts from server on timestamp lookup failures - add failure handling in `markup_gaps()` to skip bad rects (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_annotate.py | 11 +++++++++- piker/ui/_remote_ctl.py | 45 ++++++++++++++++++++++++++++------------- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index f75cbf36..81c28394 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -220,7 +220,16 @@ async def markup_gaps( ) # add up/down rects - aid: int = await actl.add_rect(**rect_kwargs) + aid: int|None = await actl.add_rect(**rect_kwargs) + if aid is None: + log.error( + f'Failed to add rect for,\n' + f'{rect_kwargs!r}\n' + f'\n' + f'Skipping to next gap!\n' + ) + continue + assert aid aids[aid] = rect_kwargs direction: str = ( diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py index fdaf54df..b1775841 100644 --- a/piker/ui/_remote_ctl.py +++ b/piker/ui/_remote_ctl.py @@ -155,12 +155,14 @@ async def serve_rc_annots( 1: ds.chart, }[timeframe] except KeyError: - log.warning( + msg: str = ( f'No chart for timeframe={timeframe}s, ' f'skipping rect annotation' ) - await annot_req_stream.send(-1) + log.exeception(msg) + await annot_req_stream.send({'error': msg}) continue + cv: ChartView = chart.cv # NEW: if timestamps provided, lookup current indices @@ -179,21 +181,23 @@ async def serve_rc_annots( # lookup start index start_matches = arr[arr['time'] == start_time] if len(start_matches) == 0: - log.error( - f'No shm entry for start_time=' - f'{start_time}, skipping rect' + msg: str = ( + f'No shm entry for start_time={start_time}, ' + f'skipping rect' ) - await annot_req_stream.send(-1) + log.error(msg) + await annot_req_stream.send({'error': msg}) continue # lookup end index end_matches = arr[arr['time'] == end_time] if len(end_matches) == 0: - log.error( + msg: str = ( f'No shm entry for end_time={end_time}, ' f'skipping rect' ) - await annot_req_stream.send(-1) + log.error(msg) + await annot_req_stream.send({'error': msg}) continue # get close price from start bar, open from end @@ -587,7 +591,7 @@ class AnnotCtl(Struct): start_time: float|None = None, end_time: float|None = None, - ) -> int: + ) -> int|None: ''' Add a `SelectRect` annotation to the target view, return the instances `id(obj)` from the remote UI actor. @@ -610,7 +614,11 @@ class AnnotCtl(Struct): 'end_time': end_time, }, }) - aid: int = await ipc.receive() + aid: int|dict = await ipc.receive() + match aid: + case {'error': str(msg)}: + log.error(msg) + return None self._ipcs[aid] = ipc if not from_acm: self._annot_stack.push_async_callback( @@ -692,7 +700,7 @@ class AnnotCtl(Struct): # NEW: optional timestamp for server-side index lookup time: float|None = None, - ) -> int: + ) -> int|None: ''' Add a `SelectRect` annotation to the target view, return the instances `id(obj)` from the remote UI actor. @@ -721,7 +729,12 @@ class AnnotCtl(Struct): 'time': time, # for server-side index lookup }, }) - aid: int = await ipc.receive() + aid: int|dict = await ipc.receive() + match aid: + case {'error': str(msg)}: + log.error(msg) + return None + self._ipcs[aid] = ipc if not from_acm: self._annot_stack.push_async_callback( @@ -748,7 +761,7 @@ class AnnotCtl(Struct): # NEW: optional timestamp for server-side index lookup time: float|None = None, - ) -> int: + ) -> int|None: ''' Add a `pg.TextItem` annotation to the target view. @@ -772,7 +785,11 @@ class AnnotCtl(Struct): 'time': time, # for server-side index lookup }, }) - aid: int = await ipc.receive() + aid: int|dict = await ipc.receive() + match aid: + case {'error': str(msg)}: + log.error(msg) + return None self._ipcs[aid] = ipc if not from_acm: self._annot_stack.push_async_callback( From 1016f54c9879c5214b11d92b48540d802960e869 Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 28 Jan 2026 15:34:57 -0500 Subject: [PATCH 28/44] Add `get_fonts()` API and fix `.px_size` for non-Qt ctxs Add a public `.ui._style.get_fonts()` helper to retrieve the `_font[_small]: DpiAwareFont` singleton pair. Adjust `DpiAwareFont.px_size` to return `conf.toml` value when Qt returns `-1` (no active Qt app). Also, - raise `ValueError` with detailed msg if both Qt and a conf-lookup fail - add some more type union whitespace cleanups: `int | None` -> `int|None` (this commit-msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/ui/_style.py | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/piker/ui/_style.py b/piker/ui/_style.py index b6c47817..893308b0 100644 --- a/piker/ui/_style.py +++ b/piker/ui/_style.py @@ -61,7 +61,7 @@ class DpiAwareFont: ) -> None: self._font_size_calc_key: str = _font_size_key - self._font_size: int | None = None + self._font_size: int|None = None # Read preferred font size from main config file if it exists conf, path = config.load('conf', touch_if_dne=True) @@ -107,7 +107,22 @@ class DpiAwareFont: @property def px_size(self) -> int: - return self._qfont.pixelSize() + size: int = self._qfont.pixelSize() + + # XXX, when no Qt app has been spawned this will always be + # invalid.. + # SO, just return any conf.toml value. + if size == -1: + if (conf_size := self._font_size) is None: + raise ValueError( + f'No valid `{type(_font).__name__}.px_size` set?\n' + f'\n' + f'-> `ui.font_size` is NOT set in `conf.toml`\n' + f'-> no Qt app is active ??\n' + ) + return conf_size + + return size def configure_to_dpi(self, screen: QtGui.QScreen | None = None): ''' @@ -221,6 +236,20 @@ def _config_fonts_to_screen() -> None: _font_small.configure_to_dpi() +def get_fonts() -> tuple[ + DpiAwareFont, + DpiAwareFont, +]: + ''' + Get the singleton font pair (of instances) from which all other + UI/UX should be "scaled around". + + See `DpiAwareFont` for (internal) deats. + + ''' + return _font, _font_small + + # TODO: re-compute font size when main widget switches screens? # https://forum.qt.io/topic/54136/how-do-i-get-the-qscreen-my-widget-is-on-qapplication-desktop-screen-returns-a-qwidget-and-qobject_cast-qscreen-returns-null/3 From 8c2fd7c7802fad85bb587b1eea1753f48fe07cf7 Mon Sep 17 00:00:00 2001 From: goodboy Date: Wed, 28 Jan 2026 16:30:41 -0500 Subject: [PATCH 29/44] Use `get_fonts()`, add `show_txt` flag to gap annots Switch `.tsp._annotate.markup_gaps()` to use new `.ui._style.get_fonts()` API for font size calc on client side and add optional `show_txt: bool` flag to toggle gap duration labels (with default `False`). Also, - replace `sgn` checks with named bools: `up_gap`, `down_gap` - use `small_font.px_size - 1` for gap label font sizing - wrap text creation in `if show_txt:` block - update IPC handler to use `get_fonts()` vs direct `_font` import (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_annotate.py | 58 ++++++++++++++++++++++++----------------- piker/ui/_remote_ctl.py | 5 ++-- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py index 81c28394..7e91300f 100644 --- a/piker/tsp/_annotate.py +++ b/piker/tsp/_annotate.py @@ -30,7 +30,7 @@ import tractor from piker.data._formatters import BGM from piker.storage import log -# from piker.ui._style import _font +from piker.ui._style import get_fonts if TYPE_CHECKING: from piker.ui._remote_ctl import AnnotCtl @@ -88,6 +88,10 @@ async def markup_gaps( wdts: pl.DataFrame, gaps: pl.DataFrame, + # XXX, switch on to see txt showing a "humanized" label of each + # gap's duration. + show_txt: bool = False, + ) -> dict[int, dict]: ''' Remote annotate time-gaps in a dt-fielded ts (normally OHLC) @@ -202,6 +206,10 @@ async def markup_gaps( diff: float = cls - opn sgn: float = copysign(1, diff) + up_gap: bool = sgn == -1 + down_gap: bool = sgn == 1 + flat: bool = sgn == 0 + color: str = 'dad_blue' # TODO? mks more sense to have up/down coloring? # color: str = { @@ -233,7 +241,7 @@ async def markup_gaps( assert aid aids[aid] = rect_kwargs direction: str = ( - 'down' if sgn == 1 + 'down' if down_gap else 'up' ) # TODO! mk this a `msgspec.Struct` which we deserialize @@ -261,31 +269,33 @@ async def markup_gaps( ) # add duration label to RHS of arrow - if sgn == -1: # up-gap - anchor = (0, 0) # XXX, i dun get dese dims.. XD - else: # down-gap + if up_gap: + anchor = (0, 0) + # ^XXX? i dun get dese dims.. XD + elif down_gap: anchor = (0, 1) # XXX y, x? + else: # no-gap? + assert flat + anchor = (0, 0) # up from bottom - # ?TODO? why returning -1 !? - # [ ] use conf.toml value instead! - # - # font_size: int = _font.font.pixelSize() - 10 - # await tractor.pause() - # assert isinstance(font_size, int) - font_size = None + # use a slightly smaller font for gap label txt. + font, small_font = get_fonts() + font_size: int = small_font.px_size - 1 + assert isinstance(font_size, int) - text_aid: int = await actl.add_text( - fqme=fqme, - timeframe=timeframe, - text=gap_label, - x=iend + 1, # fallback if timestamp lookup fails - y=cls, - time=gap_time, # server-side index lookup - color=color, - anchor=anchor, - font_size=font_size, - ) - aids[text_aid] = {'text': gap_label} + if show_txt: + text_aid: int = await actl.add_text( + fqme=fqme, + timeframe=timeframe, + text=gap_label, + x=iend + 1, # fallback if timestamp lookup fails + y=cls, + time=gap_time, # server-side index lookup + color=color, + anchor=anchor, + font_size=font_size, + ) + aids[text_aid] = {'text': gap_label} # tell chart to redraw all its # graphics view layers Bo diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py index b1775841..f67f80ad 100644 --- a/piker/ui/_remote_ctl.py +++ b/piker/ui/_remote_ctl.py @@ -407,8 +407,9 @@ async def serve_rc_annots( # apply font size (default to DpiAwareFont if not # provided) if font_size is None: - from ._style import _font - font_size = _font.font.pixelSize() - 3 + from ._style import get_fonts + font, font_small = get_fonts() + font_size = font_small.px_size - 1 qfont: QFont = text_item.textItem.font() qfont.setPixelSize(font_size) From fd92cd99c28979c7d9049038df24355794a3119e Mon Sep 17 00:00:00 2001 From: goodboy Date: Thu, 29 Jan 2026 02:48:41 -0500 Subject: [PATCH 30/44] .ib._util: ignore attr err on click-hack twm wakeups? --- piker/brokers/ib/_util.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/piker/brokers/ib/_util.py b/piker/brokers/ib/_util.py index ef4ea15a..5ecd4e55 100644 --- a/piker/brokers/ib/_util.py +++ b/piker/brokers/ib/_util.py @@ -333,7 +333,14 @@ def i3ipc_xdotool_manual_click_hack() -> None: ''' focussed, matches = i3ipc_fin_wins_titled() - orig_win_id = focussed.window + try: + orig_win_id = focussed.window + except AttributeError: + # XXX if .window cucks we prolly aren't intending to + # use this and/or just woke up from suspend.. + log.exception('xdotool invalid usage ya ??\n') + return + try: for name, con in matches: print(f'Resetting data feed for {name}') From 0f1f2e263d86fbdfd1ae64854992f80de0c7a776 Mon Sep 17 00:00:00 2001 From: goodboy Date: Thu, 29 Jan 2026 02:49:25 -0500 Subject: [PATCH 31/44] For claude, ignore no runtime for offline shm reading --- piker/data/_sharedmem.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py index 0a797600..acb8070f 100644 --- a/piker/data/_sharedmem.py +++ b/piker/data/_sharedmem.py @@ -520,9 +520,12 @@ def open_shm_array( # "unlink" created shm on process teardown by # pushing teardown calls onto actor context stack - stack = tractor.current_actor().lifetime_stack - stack.callback(shmarr.close) - stack.callback(shmarr.destroy) + stack = tractor.current_actor( + err_on_no_runtime=False, + ).lifetime_stack + if stack: + stack.callback(shmarr.close) + stack.callback(shmarr.destroy) return shmarr @@ -607,7 +610,10 @@ def attach_shm_array( _known_tokens[key] = token # "close" attached shm on actor teardown - tractor.current_actor().lifetime_stack.callback(sha.close) + if (actor := tractor.current_actor( + err_on_no_runtime=False, + )): + actor.lifetime_stack.callback(sha.close) return sha From 763faa0cc15c4a462c2f43f9ea07f8be5a05cd87 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 30 Jan 2026 14:46:23 -0500 Subject: [PATCH 32/44] Always overwrite tsdb duplicates found during backfill Enable the previously commented-out dedupe-and-write logic in `start_backfill()` to ensure tsdb stays clean of duplicate entries. (this patch was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_history.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index d416c679..e875bab7 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -708,8 +708,16 @@ async def start_backfill( deduped, diff, ) = dedupe(df) - # if diff: - # sort_diff(df) + if diff: + log.warning( + f'Found {diff} duplicates in tsdb, ' + f'overwriting with deduped data\n' + ) + await storage.write_ohlcv( + col_sym_key, + deduped, + timeframe, + ) else: # finally filled gap From be4adfc202e44d91686a3c5cc795ea228e4fa058 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 30 Jan 2026 14:47:11 -0500 Subject: [PATCH 33/44] ib.feed: drop legacy "quote-with-vlm" polling Since now we explicitly check each mkt's venue hours now we don't need this mega hacky "waiting on a quote with real vlm" stuff to determing whether historical data should be loaded immediately. This approach also had the added complexity that we needed to handle edge cases for tickers (like xauusd.cmdty) which never have vlm.. so it's nice to be rid of it all ;p --- piker/brokers/ib/feed.py | 44 +--------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py index 14803144..5f2c0062 100644 --- a/piker/brokers/ib/feed.py +++ b/piker/brokers/ib/feed.py @@ -1246,54 +1246,12 @@ async def stream_quotes( tn.start_soon(reset_on_feed) async with aclosing(iter_quotes): - # if syminfo.get('no_vlm', False): - if not init_msg.shm_write_opts['has_vlm']: - - # generally speaking these feeds don't - # include vlm data. - atype: str = mkt.dst.atype - log.info( - f'No-vlm {mkt.fqme}@{atype}, skipping quote poll' - ) - - else: - # wait for real volume on feed (trading might be - # closed) - while True: - ticker = await iter_quotes.receive() - - # for a real volume contract we rait for - # the first "real" trade to take place - if ( - # not calc_price - # and not ticker.rtTime - False - # not ticker.rtTime - ): - # spin consuming tickers until we - # get a real market datum - log.debug(f"New unsent ticker: {ticker}") - continue - - else: - log.debug("Received first volume tick") - # ugh, clear ticks since we've - # consumed them (ahem, ib_insync is - # truly stateful trash) - # ticker.ticks = [] - - # XXX: this works because we don't use - # ``aclosing()`` above? - break - - quote = normalize(ticker) - log.debug(f"First ticker received {quote}") - # tell data-layer spawner-caller that live # quotes are now active desptie not having # necessarily received a first vlm/clearing # tick. ticker = await iter_quotes.receive() + quote = normalize(ticker) feed_is_live.set() fqme: str = quote['fqme'] await send_chan.send({fqme: quote}) From e75c3d8a34a8be3308598d6a680e7ec5c7ab3c89 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 30 Jan 2026 14:53:00 -0500 Subject: [PATCH 34/44] Ignore single-zero-sample trace on no runtime.. --- piker/tsp/_anal.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/piker/tsp/_anal.py b/piker/tsp/_anal.py index bcf20a26..30cc6b59 100644 --- a/piker/tsp/_anal.py +++ b/piker/tsp/_anal.py @@ -276,7 +276,15 @@ def get_null_segs( absi_zdiff: np.ndarray = np.diff(absi_zeros) if zero_t.size < 2: - breakpoint() + try: + breakpoint() + except RuntimeError: + # XXX, if greenback not active from + # piker store ldshm cmd.. + log.exception( + "Can't debug single-sample null!\n" + ) + return None # scan for all frame-indices where the From f2b04c40716d743ae017d8a61ef5acdc03d67813 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 30 Jan 2026 18:35:52 -0500 Subject: [PATCH 35/44] Clarify `register_with_sampler()` started type and vars Markup `ctx.started()` type-sig as `set[int]`, rename binding var `first` to `shm_periods` and add type hints for clarity on context mgr unpacking. Also, - whitespace cleanup: `Type | None` -> `Type|None` throughout - format long lines: `.setdefault()`, `await ctx.started()` - fix backtick style in docstrings for consistency - add placeholder TODO comment for `feed_is_live` check; it might be more rigorous to pass the syncing state down thru all this? (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/data/_sampling.py | 52 +++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py index 802ea391..f8a0ec27 100644 --- a/piker/data/_sampling.py +++ b/piker/data/_sampling.py @@ -80,20 +80,20 @@ class Sampler: This non-instantiated type is meant to be a singleton within a `samplerd` actor-service spawned once by the user wishing to time-step-sample (real-time) quote feeds, see - ``.service.maybe_open_samplerd()`` and the below - ``register_with_sampler()``. + `.service.maybe_open_samplerd()` and the below + `register_with_sampler()`. ''' - service_nursery: None | trio.Nursery = None + service_nursery: None|trio.Nursery = None - # TODO: we could stick these in a composed type to avoid - # angering the "i hate module scoped variables crowd" (yawn). + # TODO: we could stick these in a composed type to avoid angering + # the "i hate module scoped variables crowd" (yawn). ohlcv_shms: dict[float, list[ShmArray]] = {} # holds one-task-per-sample-period tasks which are spawned as-needed by # data feed requests with a given detected time step usually from # history loading. - incr_task_cs: trio.CancelScope | None = None + incr_task_cs: trio.CancelScope|None = None bcast_errors: tuple[Exception] = ( trio.BrokenResourceError, @@ -249,8 +249,8 @@ class Sampler: async def broadcast( self, period_s: float, - time_stamp: float | None = None, - info: dict | None = None, + time_stamp: float|None = None, + info: dict|None = None, ) -> None: ''' @@ -315,7 +315,7 @@ class Sampler: @classmethod async def broadcast_all( self, - info: dict | None = None, + info: dict|None = None, ) -> None: # NOTE: take a copy of subs since removals can happen @@ -332,12 +332,12 @@ class Sampler: async def register_with_sampler( ctx: Context, period_s: float, - shms_by_period: dict[float, dict] | None = None, + shms_by_period: dict[float, dict]|None = None, open_index_stream: bool = True, # open a 2way stream for sample step msgs? sub_for_broadcasts: bool = True, # sampler side to send step updates? -) -> None: +) -> set[int]: get_console_log(tractor.current_actor().loglevel) incr_was_started: bool = False @@ -364,7 +364,12 @@ async def register_with_sampler( # insert the base 1s period (for OHLC style sampling) into # the increment buffer set to update and shift every second. - if shms_by_period is not None: + if ( + shms_by_period is not None + # and + # feed_is_live.is_set() + # ^TODO? pass it in instead? + ): from ._sharedmem import ( attach_shm_array, _Token, @@ -378,12 +383,17 @@ async def register_with_sampler( readonly=False, ) shms_by_period[period] = shm - Sampler.ohlcv_shms.setdefault(period, []).append(shm) + Sampler.ohlcv_shms.setdefault( + period, + [], + ).append(shm) assert Sampler.ohlcv_shms # unblock caller - await ctx.started(set(Sampler.ohlcv_shms.keys())) + await ctx.started( + set(Sampler.ohlcv_shms.keys()) + ) if open_index_stream: try: @@ -429,7 +439,7 @@ async def register_with_sampler( async def spawn_samplerd( - loglevel: str | None = None, + loglevel: str|None = None, **extra_tractor_kwargs ) -> bool: @@ -475,7 +485,7 @@ async def spawn_samplerd( @acm async def maybe_open_samplerd( - loglevel: str | None = None, + loglevel: str|None = None, **pikerd_kwargs, ) -> tractor.Portal: # noqa @@ -500,11 +510,11 @@ async def maybe_open_samplerd( @acm async def open_sample_stream( period_s: float, - shms_by_period: dict[float, dict] | None = None, + shms_by_period: dict[float, dict]|None = None, open_index_stream: bool = True, sub_for_broadcasts: bool = True, - cache_key: str | None = None, + cache_key: str|None = None, allow_new_sampler: bool = True, ensure_is_active: bool = False, @@ -535,6 +545,8 @@ async def open_sample_stream( # yield bistream # else: + ctx: tractor.Context + shm_periods: set[int] # in `int`-seconds async with ( # XXX: this should be singleton on a host, # a lone broker-daemon per provider should be @@ -549,10 +561,10 @@ async def open_sample_stream( 'open_index_stream': open_index_stream, 'sub_for_broadcasts': sub_for_broadcasts, }, - ) as (ctx, first) + ) as (ctx, shm_periods) ): if ensure_is_active: - assert len(first) > 1 + assert len(shm_periods) > 1 async with ( ctx.open_stream( From ce3d8e7a1ec1b18a99280e9a13e71694a2cf7463 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 30 Jan 2026 18:40:48 -0500 Subject: [PATCH 36/44] Only register shms w sampler when `feed_is_live` Add timeout-gated wait for `feed_is_live: trio.Event` before passing shm tokens to `open_sample_stream()`; skip registering shm-buffers with the sampler if the feed doesn't "go live" within a new timeout. The main motivation here is to avoid the sampler incrementing shm-array bufs when the mkt-venue is closed so that a trailing "same price" line/bars isn't updated/rendered in the chart's view when unnecessary. Deats, - add `wait_for_live_timeout: float = 0.5` param to `manage_history()` - warn-log the fqme when timeout triggers - add error log for invalid `frame_start_dt` comparisons to `maybe_fill_null_segments()`. (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_history.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index e875bab7..3384e516 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -247,6 +247,11 @@ async def maybe_fill_null_segments( from_timestamp(array['time'][0]) ) < backfill_until_dt ): + log.error( + f'Invalid frame_start !?\n' + f'frame_start_dt: {frame_start_dt!r}\n' + f'backfill_until_dt: {backfill_until_dt!r}\n' + ) await tractor.pause() # XXX TODO: pretty sure if i plot tsla, btcusdt.binance @@ -1284,6 +1289,7 @@ async def manage_history( some_data_ready: trio.Event, feed_is_live: trio.Event, timeframe: float = 60, # in seconds + wait_for_live_timeout: float = 0.5, task_status: TaskStatus[ tuple[ShmArray, ShmArray] @@ -1432,12 +1438,26 @@ async def manage_history( 1: rt_shm, 60: hist_shm, } - async with open_sample_stream( - period_s=1., - shms_by_period={ + + shms_by_period: dict|None = None + with trio.move_on_after(wait_for_live_timeout) as cs: + await feed_is_live.wait() + + if cs.cancelled_caught: + log.warning( + f'No live feed within {wait_for_live_timeout!r}s\n' + f'fqme: {mkt.fqme!r}\n' + f'NOT activating shm-buffer-sampler!!\n' + ) + + if feed_is_live.is_set(): + shms_by_period: dict[int, dict] = { 1.: rt_shm.token, 60.: hist_shm.token, - }, + } + async with open_sample_stream( + period_s=1., + shms_by_period=shms_by_period, # NOTE: we want to only open a stream for doing # broadcasts on backfill operations, not receive the From b90edf95a7bf31b3c27157a70b66433a1738c226 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 30 Jan 2026 18:50:26 -0500 Subject: [PATCH 37/44] .ib.feed: only set `feed_is_live` after first quote Move `feed_is_live.set()` to after receiving the first valid quote instead of setting early on venue-closed path. Prevents sampler registration when no live data expected. Also, - drop redundant `.set()` call in quote iteration loop - add TODO note about sleeping until venue opens vs forever - init `first_quote: dict` early for consistency (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/brokers/ib/feed.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py index 5f2c0062..28054da4 100644 --- a/piker/brokers/ib/feed.py +++ b/piker/brokers/ib/feed.py @@ -1115,6 +1115,7 @@ async def stream_quotes( con: Contract = details.contract first_ticker: Ticker|None = None + first_quote: dict[str, Any] = {} timeout: float = 1.6 with trio.move_on_after(timeout) as quote_cs: @@ -1167,15 +1168,14 @@ async def stream_quotes( first_quote, )) - # it's not really live but this will unblock - # the brokerd feed task to tell the ui to update? - feed_is_live.set() - # block and let data history backfill code run. # XXX obvi given the venue is closed, we never expect feed # to come up; a taskc should be the only way to # terminate this task. await trio.sleep_forever() + # + # ^^XXX^^TODO! INSTEAD impl a `trio.sleep()` for the + # duration until the venue opens!! # ?TODO, we could instead spawn a task that waits on a feed # to start and let it wait indefinitely..instead of this @@ -1199,6 +1199,9 @@ async def stream_quotes( 'Rxed init quote:\n' f'{pformat(first_quote)}' ) + # signal `.data.feed` layer that mkt quotes are LIVE + feed_is_live.set() + cs: trio.CancelScope|None = None startup: bool = True iter_quotes: trio.abc.Channel @@ -1252,7 +1255,6 @@ async def stream_quotes( # tick. ticker = await iter_quotes.receive() quote = normalize(ticker) - feed_is_live.set() fqme: str = quote['fqme'] await send_chan.send({fqme: quote}) From d1991b33003a4294d6f5fcdb000844e7c4169be3 Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 30 Jan 2026 19:21:28 -0500 Subject: [PATCH 38/44] Guard against `None` chart in `ArrowEditor.remove()` Add null check for `linked.chart` before calling `.plotItem.removeItem()` to prevent `AttributeError` when chart is `None`. (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/ui/_editors.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/piker/ui/_editors.py b/piker/ui/_editors.py index f8d3e68c..872ec910 100644 --- a/piker/ui/_editors.py +++ b/piker/ui/_editors.py @@ -174,7 +174,10 @@ class ArrowEditor(Struct): f'{arrow!r}\n' ) for linked in self.godw.iter_linked(): - linked.chart.plotItem.removeItem(arrow) + if not (chart := linked.chart): + continue + + chart.plotItem.removeItem(arrow) try: arrows.remove(arrow) except ValueError: From fdb3999902a3707f6db5c50ebd12c6692c50982f Mon Sep 17 00:00:00 2001 From: goodboy Date: Thu, 5 Feb 2026 17:48:52 -0500 Subject: [PATCH 39/44] .tsp._history: add gap detection in backfill loop Add frame-gap detection when `frame_last_dt < end_dt_param` to warn about potential venue closures or missing data during the backfill loop in `start_backfill()`. Deats, - add `frame_last_dt < end_dt_param` check after frame recv - log warnings with EST-converted timestamps for clarity - add `await tractor.pause()` for REPL-investigation on gaps - add TODO comment about venue closure hour checking - capture `_until_was_none` walrus var for null-check clarity - add `last_time` assertion for `time[-1] == next_end_dt` - rename `_daterr` to `nodata` with `_nodata` capture Also, - import `pendulum.timezone` and create `est` tz instance - change `get_logger()` import from `.data._util` to `.log` - add parens around `(next_prepend_index - ln) < 0` check (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/tsp/_history.py | 54 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index 3384e516..0db071fe 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -49,6 +49,7 @@ from pendulum import ( Duration, duration as mk_duration, from_timestamp, + timezone, ) import numpy as np import polars as pl @@ -57,9 +58,7 @@ from piker.brokers import NoData from piker.accounting import ( MktPair, ) -from piker.data._util import ( - log, -) +from piker.log import get_logger from ..data._sharedmem import ( maybe_open_shm_array, ShmArray, @@ -97,6 +96,9 @@ if TYPE_CHECKING: # from .feed import _FeedsBus +log = get_logger() + + # `ShmArray` buffer sizing configuration: _mins_in_day = int(60 * 24) # how much is probably dependent on lifestyle @@ -401,7 +403,9 @@ async def start_backfill( # based on the sample step size, maybe load a certain amount history update_start_on_prepend: bool = False - if backfill_until_dt is None: + if ( + _until_was_none := (backfill_until_dt is None) + ): # TODO: per-provider default history-durations? # -[ ] inside the `open_history_client()` config allow @@ -435,6 +439,8 @@ async def start_backfill( last_start_dt: datetime = backfill_from_dt next_prepend_index: int = backfill_from_shm_index + est = timezone('EST') + while last_start_dt > backfill_until_dt: log.info( f'Requesting {timeframe}s frame:\n' @@ -448,9 +454,10 @@ async def start_backfill( next_end_dt, ) = await get_hist( timeframe, - end_dt=last_start_dt, + end_dt=(end_dt_param := last_start_dt), ) - except NoData as _daterr: + except NoData as nodata: + _nodata = nodata orig_last_start_dt: datetime = last_start_dt gap_report: str = ( f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n' @@ -518,8 +525,32 @@ async def start_backfill( == next_start_dt.timestamp() ) + assert ( + (last_time := time[-1]) + == + next_end_dt.timestamp() + ) - assert time[-1] == next_end_dt.timestamp() + frame_last_dt = from_timestamp(last_time) + if ( + frame_last_dt.add(seconds=timeframe) + < + end_dt_param + ): + est_frame_last_dt = est.convert(frame_last_dt) + est_end_dt_param = est.convert(end_dt_param) + log.warning( + f'Provider frame ending BEFORE requested end_dt={end_dt_param} ??\n' + f'frame_last_dt (EST): {est_frame_last_dt!r}\n' + f'end_dt_param (EST): {est_end_dt_param!r}\n' + f'\n' + f'Likely contains,\n' + f'- a venue closure.\n' + f'- (maybe?) missing data ?\n' + ) + # ?TODO, check against venue closure hours + # if/when provided by backend? + await tractor.pause() expected_dur: Interval = ( last_start_dt.subtract( @@ -581,10 +612,11 @@ async def start_backfill( '0 BARS TO PUSH after diff!?\n' f'{next_start_dt} -> {last_start_dt}' ) + await tractor.pause() # Check if we're about to exceed buffer capacity BEFORE # attempting the push - if next_prepend_index - ln < 0: + if (next_prepend_index - ln) < 0: log.warning( f'Backfill would exceed buffer capacity!\n' f'next_prepend_index: {next_prepend_index}\n' @@ -655,7 +687,7 @@ async def start_backfill( }, }) - # can't push the entire frame? so + # XXX, can't push the entire frame? so # push only the amount that can fit.. break @@ -715,8 +747,8 @@ async def start_backfill( ) = dedupe(df) if diff: log.warning( - f'Found {diff} duplicates in tsdb, ' - f'overwriting with deduped data\n' + f'Found {diff!r} duplicates in tsdb! ' + f'=> Overwriting with `deduped` data !! <=\n' ) await storage.write_ohlcv( col_sym_key, From 964f207150869ef14a18fa18fb42a6aa52254266 Mon Sep 17 00:00:00 2001 From: goodboy Date: Sat, 14 Feb 2026 16:55:27 -0500 Subject: [PATCH 40/44] Replace assert with warn for no-gaps in `.storage.cli` Change `assert aids` to a warning log when no history gaps are found during `ldshm` gap detection; it is the **ideal case** OBVI. This avoids crashing the CLI when gap detection finds no issues, which is actually good news! Bp (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/storage/cli.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/piker/storage/cli.py b/piker/storage/cli.py index 90d5baed..c73d3b6d 100644 --- a/piker/storage/cli.py +++ b/piker/storage/cli.py @@ -447,7 +447,13 @@ def ldshm( ) # last chance manual overwrites in REPL # await tractor.pause() - assert aids + if not aids: + log.warning( + f'No gaps were found !?\n' + f'fqme: {fqme!r}\n' + f'timeframe: {period_s!r}\n' + f"WELL THAT'S GOOD NOOZ!\n" + ) tf2aids[period_s] = aids else: From a940018721e6855ac5b8ac7bdb700ea963ae8d65 Mon Sep 17 00:00:00 2001 From: goodboy Date: Sat, 14 Feb 2026 16:57:36 -0500 Subject: [PATCH 41/44] Adjust binance stale-bar detection to 2x tolerance Change the stale-bar check in `.binance.feed` from `timeframe` to `timeframe * 2` tolerance to avoid false-positive pauses when bars are slightly delayed but still within acceptable bounds. Styling, - add walrus operator to capture `_time_step` for debugger inspection. - add comment explaining the debug purpose of this check. (this commit msg was generated in some part by [`claude-code`][claude-code-gh]) [claude-code-gh]: https://github.com/anthropics/claude-code --- piker/brokers/binance/feed.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/piker/brokers/binance/feed.py b/piker/brokers/binance/feed.py index 53a5073d..ff5f6ec0 100644 --- a/piker/brokers/binance/feed.py +++ b/piker/brokers/binance/feed.py @@ -275,9 +275,15 @@ async def open_history_client( f'{times}' ) + # XXX, debug any case where the latest 1m bar we get is + # already another "sample's-step-old".. if end_dt is None: inow: int = round(time.time()) - if (inow - times[-1]) > 60: + if ( + _time_step := (inow - times[-1]) + > + timeframe * 2 + ): await tractor.pause() start_dt = from_timestamp(times[0]) From a97f6c8dcf524154e8083355177da0d15317082b Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 6 Feb 2026 00:39:26 -0500 Subject: [PATCH 42/44] Flip `.tsp._history` logger to explicit mod-name (again) --- piker/tsp/_history.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py index 0db071fe..a47e8e9d 100644 --- a/piker/tsp/_history.py +++ b/piker/tsp/_history.py @@ -96,7 +96,7 @@ if TYPE_CHECKING: # from .feed import _FeedsBus -log = get_logger() +log = get_logger(__name__) # `ShmArray` buffer sizing configuration: From 90fce9fcd43a023834b9a70bec339eb165238ffc Mon Sep 17 00:00:00 2001 From: goodboy Date: Sun, 22 Feb 2026 23:37:32 -0500 Subject: [PATCH 43/44] Woops, use `piker_pin` from GH for `tractor` Also, install the `'repl'` deps-group by default to ensure we get the extras required by `tractor` for non-`trio` task debug REPLin.. Bump lock file to match. --- pyproject.toml | 5 +++-- uv.lock | 46 ++-------------------------------------------- 2 files changed, 5 insertions(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index dcd489d2..3d850d7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,6 +98,7 @@ python-downloads = 'manual' # https://docs.astral.sh/uv/concepts/projects/dependencies/#default-groups default-groups = [ 'uis', + 'repl', ] # ------ tool.uv ------ @@ -200,9 +201,9 @@ pyvnc = { git = "https://github.com/regulad/pyvnc.git" } xonsh = { git = 'https://github.com/xonsh/xonsh.git', branch = 'main' } # XXX since, we're like, always hacking new shite all-the-time. Bp -# tractor = { git = "https://github.com/goodboy/tractor.git", branch ="piker_pin" } +tractor = { git = "https://github.com/goodboy/tractor.git", branch ="piker_pin" } # tractor = { git = "https://pikers.dev/goodboy/tractor", branch = "piker_pin" } # tractor = { git = "https://pikers.dev/goodboy/tractor", branch = "main" } # ------ goodboy ------ # hackin dev-envs, usually there's something new he's hackin in.. -tractor = { path = "../tractor", editable = true } +# tractor = { path = "../tractor", editable = true } diff --git a/uv.lock b/uv.lock index 1d96ab57..44cced3a 100644 --- a/uv.lock +++ b/uv.lock @@ -1113,7 +1113,7 @@ requires-dist = [ { name = "tomli", specifier = ">=2.0.1,<3.0.0" }, { name = "tomli-w", specifier = ">=1.0.0,<2.0.0" }, { name = "tomlkit", git = "https://github.com/pikers/tomlkit.git?branch=piker_pin" }, - { name = "tractor", editable = "../tractor" }, + { name = "tractor", git = "https://github.com/goodboy/tractor.git?branch=piker_pin" }, { name = "trio", specifier = ">=0.27" }, { name = "trio-typing", specifier = ">=0.10.0" }, { name = "trio-util", specifier = ">=0.7.0,<0.8.0" }, @@ -1868,7 +1868,7 @@ source = { git = "https://github.com/pikers/tomlkit.git?branch=piker_pin#8e0239a [[package]] name = "tractor" version = "0.1.0a6.dev0" -source = { editable = "../tractor" } +source = { git = "https://github.com/goodboy/tractor.git?branch=piker_pin#36307c59175a1d04fecc77ef2c28f5c943b5f3d1" } dependencies = [ { name = "bidict" }, { name = "cffi" }, @@ -1881,48 +1881,6 @@ dependencies = [ { name = "wrapt" }, ] -[package.metadata] -requires-dist = [ - { name = "bidict", specifier = ">=0.23.1" }, - { name = "cffi", specifier = ">=1.17.1" }, - { name = "colorlog", specifier = ">=6.8.2,<7" }, - { name = "msgspec", specifier = ">=0.19.0" }, - { name = "pdbp", specifier = ">=1.8.2,<2" }, - { name = "platformdirs", specifier = ">=4.4.0" }, - { name = "tricycle", specifier = ">=0.4.1,<0.5" }, - { name = "trio", specifier = ">0.27" }, - { name = "wrapt", specifier = ">=1.16.0,<2" }, -] - -[package.metadata.requires-dev] -dev = [ - { name = "greenback", specifier = ">=1.2.1,<2" }, - { name = "pexpect", specifier = ">=4.9.0,<5" }, - { name = "prompt-toolkit", specifier = ">=3.0.50" }, - { name = "psutil", specifier = ">=7.0.0" }, - { name = "pyperclip", specifier = ">=1.9.0" }, - { name = "pytest", specifier = ">=8.3.5" }, - { name = "stackscope", specifier = ">=0.2.2,<0.3" }, - { name = "typing-extensions", specifier = ">=4.14.1" }, - { name = "xonsh", specifier = ">=0.19.2" }, -] -devx = [ - { name = "greenback", specifier = ">=1.2.1,<2" }, - { name = "stackscope", specifier = ">=0.2.2,<0.3" }, - { name = "typing-extensions", specifier = ">=4.14.1" }, -] -lint = [{ name = "ruff", specifier = ">=0.9.6" }] -repl = [ - { name = "prompt-toolkit", specifier = ">=3.0.50" }, - { name = "psutil", specifier = ">=7.0.0" }, - { name = "pyperclip", specifier = ">=1.9.0" }, - { name = "xonsh", specifier = ">=0.19.2" }, -] -testing = [ - { name = "pexpect", specifier = ">=4.9.0,<5" }, - { name = "pytest", specifier = ">=8.3.5" }, -] - [[package]] name = "tricycle" version = "0.4.1" From 8dd969e85fea3fdc09909fe7067680bcc2c109bd Mon Sep 17 00:00:00 2001 From: goodboy Date: Fri, 6 Feb 2026 10:34:56 -0500 Subject: [PATCH 44/44] Pin to min `xonsh` release for @goodboy needs --- pyproject.toml | 4 ++-- uv.lock | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3d850d7e..13a7fbe3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -130,7 +130,7 @@ repl = [ "greenback >=1.1.1, <2.0.0", # @goodboy's preferred console toolz - "xonsh", + "xonsh>=0.22.2", "prompt-toolkit ==3.0.40", "pyperclip>=1.9.0", @@ -198,7 +198,7 @@ pyvnc = { git = "https://github.com/regulad/pyvnc.git" } # to get fancy next-cmd/suggestion feats prior to 0.22.2 B) # https://github.com/xonsh/xonsh/pull/6037 # https://github.com/xonsh/xonsh/pull/6048 -xonsh = { git = 'https://github.com/xonsh/xonsh.git', branch = 'main' } +# xonsh = { git = 'https://github.com/xonsh/xonsh.git', branch = 'main' } # XXX since, we're like, always hacking new shite all-the-time. Bp tractor = { git = "https://github.com/goodboy/tractor.git", branch ="piker_pin" } diff --git a/uv.lock b/uv.lock index 44cced3a..9de1420d 100644 --- a/uv.lock +++ b/uv.lock @@ -1138,7 +1138,7 @@ dev = [ { name = "pytest" }, { name = "qdarkstyle", specifier = ">=3.0.2,<4.0.0" }, { name = "rapidfuzz", specifier = ">=3.2.0,<4.0.0" }, - { name = "xonsh", git = "https://github.com/xonsh/xonsh.git?branch=main" }, + { name = "xonsh", specifier = ">=0.22.2" }, ] lint = [{ name = "ruff", specifier = ">=0.9.6" }] repl = [ @@ -1147,7 +1147,7 @@ repl = [ { name = "pexpect", specifier = ">=4.9.0" }, { name = "prompt-toolkit", specifier = "==3.0.40" }, { name = "pyperclip", specifier = ">=1.9.0" }, - { name = "xonsh", git = "https://github.com/xonsh/xonsh.git?branch=main" }, + { name = "xonsh", specifier = ">=0.22.2" }, ] testing = [{ name = "pytest" }] uis = [ @@ -2120,8 +2120,14 @@ wheels = [ [[package]] name = "xonsh" -version = "0.22.1" -source = { git = "https://github.com/xonsh/xonsh.git?branch=main#336658ff0919f8d7bb96d581136d37d470a8fe99" } +version = "0.22.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/48/df/1fc9ed62b3d7c14612e1713e9eb7bd41d54f6ad1028a8fbb6b7cddebc345/xonsh-0.22.4.tar.gz", hash = "sha256:6be346563fec2db75778ba5d2caee155525e634e99d9cc8cc347626025c0b3fa", size = 826665, upload-time = "2026-02-17T07:53:39.424Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/00/7cbc0c1fb64365a0a317c54ce3a151c9644eea5a509d9cbaae61c9fd1426/xonsh-0.22.4-py311-none-any.whl", hash = "sha256:38b29b29fa85aa756462d9d9bbcaa1d85478c2108da3de6cc590a69a4bcd1a01", size = 654375, upload-time = "2026-02-17T07:53:37.702Z" }, + { url = "https://files.pythonhosted.org/packages/2e/c2/3dd498dc28d8f89cdd52e39950c5e591499ae423f61694c0bb4d03ed1d82/xonsh-0.22.4-py312-none-any.whl", hash = "sha256:4e538fac9f4c3d866ddbdeca068f0c0515469c997ed58d3bfee963878c6df5a5", size = 654300, upload-time = "2026-02-17T07:53:35.813Z" }, + { url = "https://files.pythonhosted.org/packages/82/7d/1f9c7147518e9f03f6ce081b5bfc4f1aceb6ec5caba849024d005e41d3be/xonsh-0.22.4-py313-none-any.whl", hash = "sha256:cc5fabf0ad0c56a2a11bed1e6a43c4ec6416a5b30f24f126b8e768547c3793e2", size = 654818, upload-time = "2026-02-17T07:53:33.477Z" }, +] [[package]] name = "yapic-json"