diff --git a/piker/brokers/binance/feed.py b/piker/brokers/binance/feed.py
index 53a5073d..ff5f6ec0 100644
--- a/piker/brokers/binance/feed.py
+++ b/piker/brokers/binance/feed.py
@@ -275,9 +275,15 @@ async def open_history_client(
f'{times}'
)
+ # XXX, debug any case where the latest 1m bar we get is
+ # already another "sample's-step-old"..
if end_dt is None:
inow: int = round(time.time())
- if (inow - times[-1]) > 60:
+ if (
+ _time_step := (inow - times[-1])
+ >
+ timeframe * 2
+ ):
await tractor.pause()
start_dt = from_timestamp(times[0])
diff --git a/piker/brokers/ib/_util.py b/piker/brokers/ib/_util.py
index 00b2d233..5ecd4e55 100644
--- a/piker/brokers/ib/_util.py
+++ b/piker/brokers/ib/_util.py
@@ -250,7 +250,9 @@ async def vnc_click_hack(
'connection': 'r'
}[reset_type]
- with tractor.devx.open_crash_handler():
+ with tractor.devx.open_crash_handler(
+ ignore={TimeoutError,},
+ ):
client = await AsyncVNCClient.connect(
VNCConfig(
host=host,
@@ -331,7 +333,14 @@ def i3ipc_xdotool_manual_click_hack() -> None:
'''
focussed, matches = i3ipc_fin_wins_titled()
- orig_win_id = focussed.window
+ try:
+ orig_win_id = focussed.window
+ except AttributeError:
+ # XXX if .window cucks we prolly aren't intending to
+ # use this and/or just woke up from suspend..
+ log.exception('xdotool invalid usage ya ??\n')
+ return
+
try:
for name, con in matches:
print(f'Resetting data feed for {name}')
diff --git a/piker/brokers/ib/api.py b/piker/brokers/ib/api.py
index 5bcc7336..4a63a0f1 100644
--- a/piker/brokers/ib/api.py
+++ b/piker/brokers/ib/api.py
@@ -1187,7 +1187,7 @@ async def load_aio_clients(
# the API TCP in `ib_insync` connection can be flaky af so instead
# retry a few times to get the client going..
connect_retries: int = 3,
- connect_timeout: float = 10,
+ connect_timeout: float = 30, # in case a remote-host
disconnect_on_exit: bool = True,
) -> dict[str, Client]:
diff --git a/piker/brokers/ib/feed.py b/piker/brokers/ib/feed.py
index 51305ced..28054da4 100644
--- a/piker/brokers/ib/feed.py
+++ b/piker/brokers/ib/feed.py
@@ -178,8 +178,8 @@ async def open_history_client(
async def get_hist(
timeframe: float,
- end_dt: datetime | None = None,
- start_dt: datetime | None = None,
+ end_dt: datetime|None = None,
+ start_dt: datetime|None = None,
) -> tuple[np.ndarray, str]:
@@ -262,7 +262,38 @@ async def open_history_client(
vlm = bars_array['volume']
vlm[vlm < 0] = 0
- return bars_array, first_dt, last_dt
+ # XXX, if a start-limit was passed ensure we only
+ # return history that far back!
+ if (
+ start_dt
+ and
+ first_dt < start_dt
+ ):
+ trimmed_bars = bars_array[
+ bars_array['time'] >= start_dt.timestamp()
+ ]
+ if (
+ trimmed_first_dt := from_timestamp(trimmed_bars['time'][0])
+ !=
+ start_dt
+ ):
+ # TODO! rm this once we're more confident it never hits!
+ breakpoint()
+ raise RuntimeError(
+ f'OHLC-bars array start is gt `start_dt` limit !!\n'
+ f'start_dt: {start_dt}\n'
+ f'first_dt: {first_dt}\n'
+ f'trimmed_first_dt: {trimmed_first_dt}\n'
+ )
+
+ # XXX, overwrite with start_dt-limited frame
+ bars_array = trimmed_bars
+
+ return (
+ bars_array,
+ first_dt,
+ last_dt,
+ )
# TODO: it seems like we can do async queries for ohlc
# but getting the order right still isn't working and I'm not
@@ -397,7 +428,7 @@ async def get_bars(
# blank to start which tells ib to look up the latest datum
end_dt: str = '',
- start_dt: str | None = '',
+ start_dt: str|None = '',
# TODO: make this more dynamic based on measured frame rx latency?
# how long before we trigger a feed reset (seconds)
@@ -451,6 +482,8 @@ async def get_bars(
dt_duration,
) = await proxy.bars(
fqme=fqme,
+ # XXX TODO! lol we're not using this..
+ # start_dt=start_dt,
end_dt=end_dt,
sample_period_s=timeframe,
@@ -1082,6 +1115,7 @@ async def stream_quotes(
con: Contract = details.contract
first_ticker: Ticker|None = None
+ first_quote: dict[str, Any] = {}
timeout: float = 1.6
with trio.move_on_after(timeout) as quote_cs:
@@ -1134,15 +1168,14 @@ async def stream_quotes(
first_quote,
))
- # it's not really live but this will unblock
- # the brokerd feed task to tell the ui to update?
- feed_is_live.set()
-
# block and let data history backfill code run.
# XXX obvi given the venue is closed, we never expect feed
# to come up; a taskc should be the only way to
# terminate this task.
await trio.sleep_forever()
+ #
+ # ^^XXX^^TODO! INSTEAD impl a `trio.sleep()` for the
+ # duration until the venue opens!!
# ?TODO, we could instead spawn a task that waits on a feed
# to start and let it wait indefinitely..instead of this
@@ -1166,6 +1199,9 @@ async def stream_quotes(
'Rxed init quote:\n'
f'{pformat(first_quote)}'
)
+ # signal `.data.feed` layer that mkt quotes are LIVE
+ feed_is_live.set()
+
cs: trio.CancelScope|None = None
startup: bool = True
iter_quotes: trio.abc.Channel
@@ -1213,55 +1249,12 @@ async def stream_quotes(
tn.start_soon(reset_on_feed)
async with aclosing(iter_quotes):
- # if syminfo.get('no_vlm', False):
- if not init_msg.shm_write_opts['has_vlm']:
-
- # generally speaking these feeds don't
- # include vlm data.
- atype: str = mkt.dst.atype
- log.info(
- f'No-vlm {mkt.fqme}@{atype}, skipping quote poll'
- )
-
- else:
- # wait for real volume on feed (trading might be
- # closed)
- while True:
- ticker = await iter_quotes.receive()
-
- # for a real volume contract we rait for
- # the first "real" trade to take place
- if (
- # not calc_price
- # and not ticker.rtTime
- False
- # not ticker.rtTime
- ):
- # spin consuming tickers until we
- # get a real market datum
- log.debug(f"New unsent ticker: {ticker}")
- continue
-
- else:
- log.debug("Received first volume tick")
- # ugh, clear ticks since we've
- # consumed them (ahem, ib_insync is
- # truly stateful trash)
- # ticker.ticks = []
-
- # XXX: this works because we don't use
- # ``aclosing()`` above?
- break
-
- quote = normalize(ticker)
- log.debug(f"First ticker received {quote}")
-
# tell data-layer spawner-caller that live
# quotes are now active desptie not having
# necessarily received a first vlm/clearing
# tick.
ticker = await iter_quotes.receive()
- feed_is_live.set()
+ quote = normalize(ticker)
fqme: str = quote['fqme']
await send_chan.send({fqme: quote})
diff --git a/piker/data/_sampling.py b/piker/data/_sampling.py
index 802ea391..f8a0ec27 100644
--- a/piker/data/_sampling.py
+++ b/piker/data/_sampling.py
@@ -80,20 +80,20 @@ class Sampler:
This non-instantiated type is meant to be a singleton within
a `samplerd` actor-service spawned once by the user wishing to
time-step-sample (real-time) quote feeds, see
- ``.service.maybe_open_samplerd()`` and the below
- ``register_with_sampler()``.
+ `.service.maybe_open_samplerd()` and the below
+ `register_with_sampler()`.
'''
- service_nursery: None | trio.Nursery = None
+ service_nursery: None|trio.Nursery = None
- # TODO: we could stick these in a composed type to avoid
- # angering the "i hate module scoped variables crowd" (yawn).
+ # TODO: we could stick these in a composed type to avoid angering
+ # the "i hate module scoped variables crowd" (yawn).
ohlcv_shms: dict[float, list[ShmArray]] = {}
# holds one-task-per-sample-period tasks which are spawned as-needed by
# data feed requests with a given detected time step usually from
# history loading.
- incr_task_cs: trio.CancelScope | None = None
+ incr_task_cs: trio.CancelScope|None = None
bcast_errors: tuple[Exception] = (
trio.BrokenResourceError,
@@ -249,8 +249,8 @@ class Sampler:
async def broadcast(
self,
period_s: float,
- time_stamp: float | None = None,
- info: dict | None = None,
+ time_stamp: float|None = None,
+ info: dict|None = None,
) -> None:
'''
@@ -315,7 +315,7 @@ class Sampler:
@classmethod
async def broadcast_all(
self,
- info: dict | None = None,
+ info: dict|None = None,
) -> None:
# NOTE: take a copy of subs since removals can happen
@@ -332,12 +332,12 @@ class Sampler:
async def register_with_sampler(
ctx: Context,
period_s: float,
- shms_by_period: dict[float, dict] | None = None,
+ shms_by_period: dict[float, dict]|None = None,
open_index_stream: bool = True, # open a 2way stream for sample step msgs?
sub_for_broadcasts: bool = True, # sampler side to send step updates?
-) -> None:
+) -> set[int]:
get_console_log(tractor.current_actor().loglevel)
incr_was_started: bool = False
@@ -364,7 +364,12 @@ async def register_with_sampler(
# insert the base 1s period (for OHLC style sampling) into
# the increment buffer set to update and shift every second.
- if shms_by_period is not None:
+ if (
+ shms_by_period is not None
+ # and
+ # feed_is_live.is_set()
+ # ^TODO? pass it in instead?
+ ):
from ._sharedmem import (
attach_shm_array,
_Token,
@@ -378,12 +383,17 @@ async def register_with_sampler(
readonly=False,
)
shms_by_period[period] = shm
- Sampler.ohlcv_shms.setdefault(period, []).append(shm)
+ Sampler.ohlcv_shms.setdefault(
+ period,
+ [],
+ ).append(shm)
assert Sampler.ohlcv_shms
# unblock caller
- await ctx.started(set(Sampler.ohlcv_shms.keys()))
+ await ctx.started(
+ set(Sampler.ohlcv_shms.keys())
+ )
if open_index_stream:
try:
@@ -429,7 +439,7 @@ async def register_with_sampler(
async def spawn_samplerd(
- loglevel: str | None = None,
+ loglevel: str|None = None,
**extra_tractor_kwargs
) -> bool:
@@ -475,7 +485,7 @@ async def spawn_samplerd(
@acm
async def maybe_open_samplerd(
- loglevel: str | None = None,
+ loglevel: str|None = None,
**pikerd_kwargs,
) -> tractor.Portal: # noqa
@@ -500,11 +510,11 @@ async def maybe_open_samplerd(
@acm
async def open_sample_stream(
period_s: float,
- shms_by_period: dict[float, dict] | None = None,
+ shms_by_period: dict[float, dict]|None = None,
open_index_stream: bool = True,
sub_for_broadcasts: bool = True,
- cache_key: str | None = None,
+ cache_key: str|None = None,
allow_new_sampler: bool = True,
ensure_is_active: bool = False,
@@ -535,6 +545,8 @@ async def open_sample_stream(
# yield bistream
# else:
+ ctx: tractor.Context
+ shm_periods: set[int] # in `int`-seconds
async with (
# XXX: this should be singleton on a host,
# a lone broker-daemon per provider should be
@@ -549,10 +561,10 @@ async def open_sample_stream(
'open_index_stream': open_index_stream,
'sub_for_broadcasts': sub_for_broadcasts,
},
- ) as (ctx, first)
+ ) as (ctx, shm_periods)
):
if ensure_is_active:
- assert len(first) > 1
+ assert len(shm_periods) > 1
async with (
ctx.open_stream(
diff --git a/piker/data/_sharedmem.py b/piker/data/_sharedmem.py
index 0a797600..acb8070f 100644
--- a/piker/data/_sharedmem.py
+++ b/piker/data/_sharedmem.py
@@ -520,9 +520,12 @@ def open_shm_array(
# "unlink" created shm on process teardown by
# pushing teardown calls onto actor context stack
- stack = tractor.current_actor().lifetime_stack
- stack.callback(shmarr.close)
- stack.callback(shmarr.destroy)
+ stack = tractor.current_actor(
+ err_on_no_runtime=False,
+ ).lifetime_stack
+ if stack:
+ stack.callback(shmarr.close)
+ stack.callback(shmarr.destroy)
return shmarr
@@ -607,7 +610,10 @@ def attach_shm_array(
_known_tokens[key] = token
# "close" attached shm on actor teardown
- tractor.current_actor().lifetime_stack.callback(sha.close)
+ if (actor := tractor.current_actor(
+ err_on_no_runtime=False,
+ )):
+ actor.lifetime_stack.callback(sha.close)
return sha
diff --git a/piker/storage/__init__.py b/piker/storage/__init__.py
index f32f40b6..361eaadc 100644
--- a/piker/storage/__init__.py
+++ b/piker/storage/__init__.py
@@ -43,7 +43,6 @@ from typing import (
import numpy as np
-
from .. import config
from ..service import (
check_for_service,
@@ -152,7 +151,10 @@ class StorageConnectionError(ConnectionError):
'''
-def get_storagemod(name: str) -> ModuleType:
+def get_storagemod(
+ name: str,
+
+) -> ModuleType:
mod: ModuleType = import_module(
'.' + name,
'piker.storage',
@@ -165,9 +167,12 @@ def get_storagemod(name: str) -> ModuleType:
@acm
async def open_storage_client(
- backend: str | None = None,
+ backend: str|None = None,
-) -> tuple[ModuleType, StorageClient]:
+) -> tuple[
+ ModuleType,
+ StorageClient,
+]:
'''
Load the ``StorageClient`` for named backend.
@@ -267,7 +272,10 @@ async def open_tsdb_client(
from ..data.feed import maybe_open_feed
async with (
- open_storage_client() as (_, storage),
+ open_storage_client() as (
+ _,
+ storage,
+ ),
maybe_open_feed(
[fqme],
@@ -275,7 +283,7 @@ async def open_tsdb_client(
) as feed,
):
- profiler(f'opened feed for {fqme}')
+ profiler(f'opened feed for {fqme!r}')
# to_append = feed.hist_shm.array
# to_prepend = None
diff --git a/piker/storage/cli.py b/piker/storage/cli.py
index 1c8ff11b..c73d3b6d 100644
--- a/piker/storage/cli.py
+++ b/piker/storage/cli.py
@@ -19,16 +19,10 @@ Storage middle-ware CLIs.
"""
from __future__ import annotations
-# from datetime import datetime
-# from contextlib import (
-# AsyncExitStack,
-# )
from pathlib import Path
-from math import copysign
import time
from types import ModuleType
from typing import (
- Any,
TYPE_CHECKING,
)
@@ -47,7 +41,6 @@ from piker.data import (
ShmArray,
)
from piker import tsp
-from piker.data._formatters import BGM
from . import log
from . import (
__tsdbs__,
@@ -242,122 +235,12 @@ def anal(
trio.run(main)
-async def markup_gaps(
- fqme: str,
- timeframe: float,
- actl: AnnotCtl,
- wdts: pl.DataFrame,
- gaps: pl.DataFrame,
-
-) -> dict[int, dict]:
- '''
- Remote annotate time-gaps in a dt-fielded ts (normally OHLC)
- with rectangles.
-
- '''
- aids: dict[int] = {}
- for i in range(gaps.height):
-
- row: pl.DataFrame = gaps[i]
-
- # the gap's RIGHT-most bar's OPEN value
- # at that time (sample) step.
- iend: int = row['index'][0]
- # dt: datetime = row['dt'][0]
- # dt_prev: datetime = row['dt_prev'][0]
- # dt_end_t: float = dt.timestamp()
-
-
- # TODO: can we eventually remove this
- # once we figure out why the epoch cols
- # don't match?
- # TODO: FIX HOW/WHY these aren't matching
- # and are instead off by 4hours (EST
- # vs. UTC?!?!)
- # end_t: float = row['time']
- # assert (
- # dt.timestamp()
- # ==
- # end_t
- # )
-
- # the gap's LEFT-most bar's CLOSE value
- # at that time (sample) step.
- prev_r: pl.DataFrame = wdts.filter(
- pl.col('index') == iend - 1
- )
- # XXX: probably a gap in the (newly sorted or de-duplicated)
- # dt-df, so we might need to re-index first..
- if prev_r.is_empty():
- await tractor.pause()
-
- istart: int = prev_r['index'][0]
- # dt_start_t: float = dt_prev.timestamp()
-
- # start_t: float = prev_r['time']
- # assert (
- # dt_start_t
- # ==
- # start_t
- # )
-
- # TODO: implement px-col width measure
- # and ensure at least as many px-cols
- # shown per rect as configured by user.
- # gap_w: float = abs((iend - istart))
- # if gap_w < 6:
- # margin: float = 6
- # iend += margin
- # istart -= margin
-
- rect_gap: float = BGM*3/8
- opn: float = row['open'][0]
- ro: tuple[float, float] = (
- # dt_end_t,
- iend + rect_gap + 1,
- opn,
- )
- cls: float = prev_r['close'][0]
- lc: tuple[float, float] = (
- # dt_start_t,
- istart - rect_gap, # + 1 ,
- cls,
- )
-
- color: str = 'dad_blue'
- diff: float = cls - opn
- sgn: float = copysign(1, diff)
- color: str = {
- -1: 'buy_green',
- 1: 'sell_red',
- }[sgn]
-
- rect_kwargs: dict[str, Any] = dict(
- fqme=fqme,
- timeframe=timeframe,
- start_pos=lc,
- end_pos=ro,
- color=color,
- )
-
- aid: int = await actl.add_rect(**rect_kwargs)
- assert aid
- aids[aid] = rect_kwargs
-
- # tell chart to redraw all its
- # graphics view layers Bo
- await actl.redraw(
- fqme=fqme,
- timeframe=timeframe,
- )
- return aids
-
-
@store.command()
def ldshm(
fqme: str,
write_parquet: bool = True,
reload_parquet_to_shm: bool = True,
+ pdb: bool = False, # --pdb passed?
) -> None:
'''
@@ -377,7 +260,7 @@ def ldshm(
open_piker_runtime(
'polars_boi',
enable_modules=['piker.data._sharedmem'],
- debug_mode=True,
+ debug_mode=pdb,
),
open_storage_client() as (
mod,
@@ -397,17 +280,19 @@ def ldshm(
times: np.ndarray = shm.array['time']
d1: float = float(times[-1] - times[-2])
- d2: float = float(times[-2] - times[-3])
- med: float = np.median(np.diff(times))
- if (
- d1 < 1.
- and d2 < 1.
- and med < 1.
- ):
- raise ValueError(
- f'Something is wrong with time period for {shm}:\n{times}'
- )
-
+ d2: float = 0
+ # XXX, take a median sample rate if sufficient data
+ if times.size > 2:
+ d2: float = float(times[-2] - times[-3])
+ med: float = np.median(np.diff(times))
+ if (
+ d1 < 1.
+ and d2 < 1.
+ and med < 1.
+ ):
+ raise ValueError(
+ f'Something is wrong with time period for {shm}:\n{times}'
+ )
period_s: float = float(max(d1, d2, med))
null_segs: tuple = tsp.get_null_segs(
@@ -417,7 +302,9 @@ def ldshm(
# TODO: call null-seg fixer somehow?
if null_segs:
- await tractor.pause()
+
+ if tractor._state.is_debug_mode():
+ await tractor.pause()
# async with (
# trio.open_nursery() as tn,
# mod.open_history_client(
@@ -441,11 +328,37 @@ def ldshm(
wdts,
deduped,
diff,
- ) = tsp.dedupe(
+ valid_races,
+ dq_issues,
+ ) = tsp.dedupe_ohlcv_smart(
shm_df,
- period=period_s,
)
+ # Report duplicate analysis
+ if diff > 0:
+ log.info(
+ f'Removed {diff} duplicate timestamp(s)\n'
+ )
+ if valid_races is not None:
+ identical: int = (
+ valid_races
+ .filter(pl.col('identical_bars'))
+ .height
+ )
+ monotonic: int = valid_races.height - identical
+ log.info(
+ f'Valid race conditions: {valid_races.height}\n'
+ f' - Identical bars: {identical}\n'
+ f' - Volume monotonic: {monotonic}\n'
+ )
+
+ if dq_issues is not None:
+ log.warning(
+ f'DATA QUALITY ISSUES from provider: '
+ f'{dq_issues.height} timestamp(s)\n'
+ f'{dq_issues}\n'
+ )
+
# detect gaps from in expected (uniform OHLC) sample period
step_gaps: pl.DataFrame = tsp.detect_time_gaps(
deduped,
@@ -460,7 +373,8 @@ def ldshm(
# TODO: actually pull the exact duration
# expected for each venue operational period?
- gap_dt_unit='days',
+ # gap_dt_unit='day',
+ gap_dt_unit='day',
gap_thresh=1,
)
@@ -471,8 +385,11 @@ def ldshm(
if (
not venue_gaps.is_empty()
or (
- period_s < 60
- and not step_gaps.is_empty()
+ not step_gaps.is_empty()
+ # XXX, i presume i put this bc i was guarding
+ # for ib venue gaps?
+ # and
+ # period_s < 60
)
):
# write repaired ts to parquet-file?
@@ -521,7 +438,7 @@ def ldshm(
do_markup_gaps: bool = True
if do_markup_gaps:
new_df: pl.DataFrame = tsp.np2pl(new)
- aids: dict = await markup_gaps(
+ aids: dict = await tsp._annotate.markup_gaps(
fqme,
period_s,
actl,
@@ -530,12 +447,23 @@ def ldshm(
)
# last chance manual overwrites in REPL
# await tractor.pause()
- assert aids
+ if not aids:
+ log.warning(
+ f'No gaps were found !?\n'
+ f'fqme: {fqme!r}\n'
+ f'timeframe: {period_s!r}\n'
+ f"WELL THAT'S GOOD NOOZ!\n"
+ )
tf2aids[period_s] = aids
else:
- # allow interaction even when no ts problems.
- assert not diff
+ # No significant gaps to handle, but may have had
+ # duplicates removed (valid race conditions are ok)
+ if diff > 0 and dq_issues is not None:
+ log.warning(
+ 'Found duplicates with data quality issues '
+ 'but no significant time gaps!\n'
+ )
await tractor.pause()
log.info('Exiting TSP shm anal-izer!')
diff --git a/piker/tsp/__init__.py b/piker/tsp/__init__.py
index 121fcbb7..baa28c82 100644
--- a/piker/tsp/__init__.py
+++ b/piker/tsp/__init__.py
@@ -28,1435 +28,25 @@ Historical TSP (time-series processing) lowlevel mgmt machinery and biz logic fo
stored offline (in a tsdb).
'''
-from __future__ import annotations
-from datetime import datetime
-from functools import partial
-from pathlib import Path
-from pprint import pformat
-from types import ModuleType
-from typing import (
- Callable,
- Generator,
- TYPE_CHECKING,
-)
-
-import trio
-from trio_typing import TaskStatus
-import tractor
-from pendulum import (
- Interval,
- DateTime,
- Duration,
- duration as mk_duration,
- from_timestamp,
-)
-import numpy as np
-import polars as pl
-
-from piker.brokers import NoData
-from piker.accounting import (
- MktPair,
-)
-from piker.data._util import (
- log,
-)
-from ..data._sharedmem import (
- maybe_open_shm_array,
- ShmArray,
-)
-from ..data._source import def_iohlcv_fields
-from ..data._sampling import (
- open_sample_stream,
-)
from ._anal import (
-
get_null_segs as get_null_segs,
- iter_null_segs as iter_null_segs,
- Frame as Frame,
- Seq as Seq,
-
- # codec-ish
- np2pl as np2pl,
- pl2np as pl2np,
-
- # `numpy` only
- slice_from_time as slice_from_time,
# `polars` specific
dedupe as dedupe,
- with_dts as with_dts,
detect_time_gaps as detect_time_gaps,
- sort_diff as sort_diff,
+ pl2np as pl2np,
+ np2pl as np2pl,
- # TODO:
- detect_price_gaps as detect_price_gaps
+ # `numpy` only
+ slice_from_time as slice_from_time,
)
-
-# TODO: break up all this shite into submods!
-from ..brokers._util import (
- DataUnavailable,
+from ._dedupe_smart import (
+ dedupe_ohlcv_smart as dedupe_ohlcv_smart,
)
-from ..storage import TimeseriesNotFound
-
-if TYPE_CHECKING:
- from bidict import bidict
- from ..service.marketstore import StorageClient
- # from .feed import _FeedsBus
-
-
-# `ShmArray` buffer sizing configuration:
-_mins_in_day = int(60 * 24)
-# how much is probably dependent on lifestyle
-# but we reco a buncha times (but only on a
-# run-every-other-day kinda week).
-_secs_in_day = int(60 * _mins_in_day)
-_days_in_week: int = 7
-
-_days_worth: int = 3
-_default_hist_size: int = 6 * 365 * _mins_in_day
-_hist_buffer_start = int(
- _default_hist_size - round(7 * _mins_in_day)
+from ._history import (
+ iter_dfs_from_shms as iter_dfs_from_shms,
+ manage_history as manage_history,
+)
+from ._annotate import (
+ markup_gaps as markup_gaps,
)
-
-_default_rt_size: int = _days_worth * _secs_in_day
-# NOTE: start the append index in rt buffer such that 1 day's worth
-# can be appenened before overrun.
-_rt_buffer_start = int((_days_worth - 1) * _secs_in_day)
-
-
-def diff_history(
- array: np.ndarray,
- append_until_dt: datetime | None = None,
- prepend_until_dt: datetime | None = None,
-
-) -> np.ndarray:
-
- # no diffing with tsdb dt index possible..
- if (
- prepend_until_dt is None
- and append_until_dt is None
- ):
- return array
-
- times = array['time']
-
- if append_until_dt:
- return array[times < append_until_dt.timestamp()]
- else:
- return array[times >= prepend_until_dt.timestamp()]
-
-
-# TODO: can't we just make this a sync func now?
-async def shm_push_in_between(
- shm: ShmArray,
- to_push: np.ndarray,
- prepend_index: int,
-
- update_start_on_prepend: bool = False,
-
-) -> int:
- # XXX: extremely important, there can be no checkpoints
- # in the body of this func to avoid entering new ``frames``
- # values while we're pipelining the current ones to
- # memory...
- shm.push(
- to_push,
- prepend=True,
-
- # XXX: only update the ._first index if no tsdb
- # segment was previously prepended by the
- # parent task.
- update_first=update_start_on_prepend,
-
- # XXX: only prepend from a manually calculated shm
- # index if there was already a tsdb history
- # segment prepended (since then the
- # ._first.value is going to be wayyy in the
- # past!)
- start=(
- prepend_index
- if not update_start_on_prepend
- else None
- ),
- )
-
-
-async def maybe_fill_null_segments(
- shm: ShmArray,
- timeframe: float,
- get_hist: Callable,
- sampler_stream: tractor.MsgStream,
- mkt: MktPair,
-
- task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED,
-
-) -> list[Frame]:
-
- null_segs_detected = trio.Event()
- task_status.started(null_segs_detected)
-
- frame: Frame = shm.array
-
- null_segs: tuple | None = get_null_segs(
- frame,
- period=timeframe,
- )
- for (
- absi_start, absi_end,
- fi_start, fi_end,
- start_t, end_t,
- start_dt, end_dt,
- ) in iter_null_segs(
- null_segs=null_segs,
- frame=frame,
- timeframe=timeframe,
- ):
-
- # XXX NOTE: ?if we get a badly ordered timestamp
- # pair, immediately stop backfilling?
- if (
- start_dt
- and
- end_dt < start_dt
- ):
- await tractor.pause()
- break
-
- (
- array,
- next_start_dt,
- next_end_dt,
- ) = await get_hist(
- timeframe,
- start_dt=start_dt,
- end_dt=end_dt,
- )
-
- # XXX TODO: pretty sure if i plot tsla, btcusdt.binance
- # and mnq.cme.ib this causes a Qt crash XXDDD
-
- # make sure we don't overrun the buffer start
- len_to_push: int = min(absi_end, array.size)
- to_push: np.ndarray = array[-len_to_push:]
-
- await shm_push_in_between(
- shm,
- to_push,
- prepend_index=absi_end,
- update_start_on_prepend=False,
- )
- # TODO: UI side needs IPC event to update..
- # - make sure the UI actually always handles
- # this update!
- # - remember that in the display side, only refersh this
- # if the respective history is actually "in view".
- # loop
- try:
- await sampler_stream.send({
- 'broadcast_all': {
-
- # XXX NOTE XXX: see the
- # `.ui._display.increment_history_view()` if block
- # that looks for this info to FORCE a hard viz
- # redraw!
- 'backfilling': (mkt.fqme, timeframe),
- },
- })
- except tractor.ContextCancelled:
- # log.exception
- await tractor.pause()
- raise
-
- null_segs_detected.set()
- # RECHECK for more null-gaps
- frame: Frame = shm.array
- null_segs: tuple | None = get_null_segs(
- frame,
- period=timeframe,
- )
- if (
- null_segs
- and
- len(null_segs[-1])
- ):
- (
- iabs_slices,
- iabs_zero_rows,
- zero_t,
- ) = null_segs
- log.warning(
- f'{len(iabs_slices)} NULL TIME SEGMENTS DETECTED!\n'
- f'{pformat(iabs_slices)}'
- )
-
- # TODO: always backfill gaps with the earliest (price) datum's
- # value to avoid the y-ranger including zeros and completely
- # stretching the y-axis..
- # array: np.ndarray = shm.array
- # zeros = array[array['low'] == 0]
- ohlc_fields: list[str] = [
- 'open',
- 'high',
- 'low',
- 'close',
- ]
-
- for istart, istop in iabs_slices:
-
- # get view into buffer for null-segment
- gap: np.ndarray = shm._array[istart:istop]
-
- # copy the oldest OHLC samples forward
- cls: float = shm._array[istart]['close']
-
- # TODO: how can we mark this range as being a gap tho?
- # -[ ] maybe pg finally supports nulls in ndarray to
- # show empty space somehow?
- # -[ ] we could put a special value in the vlm or
- # another col/field to denote?
- gap[ohlc_fields] = cls
-
- start_t: float = shm._array[istart]['time']
- t_diff: float = (istop - istart)*timeframe
-
- gap['time'] = np.arange(
- start=start_t,
- stop=start_t + t_diff,
- step=timeframe,
- )
-
- # TODO: reimpl using the new `.ui._remote_ctl` ctx
- # ideally using some kinda decent
- # tractory-reverse-lookup-connnection from some other
- # `Context` type thingy?
- await sampler_stream.send({
- 'broadcast_all': {
-
- # XXX NOTE XXX: see the
- # `.ui._display.increment_history_view()` if block
- # that looks for this info to FORCE a hard viz
- # redraw!
- 'backfilling': (mkt.fqme, timeframe),
- },
- })
-
- # TODO: interatively step through any remaining
- # time-gaps/null-segments and spawn piecewise backfiller
- # tasks in a nursery?
- # -[ ] not sure that's going to work so well on the ib
- # backend but worth a shot?
- # -[ ] mk new history connections to make it properly
- # parallel possible no matter the backend?
- # -[ ] fill algo: do queries in alternating "latest, then
- # earliest, then latest.. etc?"
-
-
-async def start_backfill(
- get_hist,
- def_frame_duration: Duration,
- mod: ModuleType,
- mkt: MktPair,
- shm: ShmArray,
- timeframe: float,
-
- backfill_from_shm_index: int,
- backfill_from_dt: datetime,
-
- sampler_stream: tractor.MsgStream,
-
- backfill_until_dt: datetime | None = None,
- storage: StorageClient | None = None,
-
- write_tsdb: bool = True,
-
- task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED,
-
-) -> int:
-
- # let caller unblock and deliver latest history frame
- # and use to signal that backfilling the shm gap until
- # the tsdb end is complete!
- bf_done = trio.Event()
- task_status.started(bf_done)
-
- # based on the sample step size, maybe load a certain amount history
- update_start_on_prepend: bool = False
- if backfill_until_dt is None:
-
- # TODO: per-provider default history-durations?
- # -[ ] inside the `open_history_client()` config allow
- # declaring the history duration limits instead of
- # guessing and/or applying the same limits to all?
- #
- # -[ ] allow declaring (default) per-provider backfill
- # limits inside a [storage] sub-section in conf.toml?
- #
- # NOTE, when no tsdb "last datum" is provided, we just
- # load some near-term history by presuming a "decently
- # large" 60s duration limit and a much shorter 1s range.
- periods = {
- 1: {'days': 2},
- 60: {'years': 6},
- }
- period_duration: int = periods[timeframe]
- update_start_on_prepend: bool = True
-
- # NOTE: manually set the "latest" datetime which we intend to
- # backfill history "until" so as to adhere to the history
- # settings above when the tsdb is detected as being empty.
- backfill_until_dt = backfill_from_dt.subtract(**period_duration)
-
- # STAGE NOTE: "backward history gap filling":
- # - we push to the shm buffer until we have history back
- # until the latest entry loaded from the tsdb's table B)
- # - after this loop continue to check for other gaps in the
- # (tsdb) history and (at least report) maybe fill them
- # from new frame queries to the backend?
- last_start_dt: datetime = backfill_from_dt
- next_prepend_index: int = backfill_from_shm_index
-
- while last_start_dt > backfill_until_dt:
- log.info(
- f'Requesting {timeframe}s frame:\n'
- f'backfill_until_dt: {backfill_until_dt}\n'
- f'last_start_dt: {last_start_dt}\n'
- )
- try:
- (
- array,
- next_start_dt,
- next_end_dt,
- ) = await get_hist(
- timeframe,
- end_dt=last_start_dt,
- )
- except NoData as _daterr:
- orig_last_start_dt: datetime = last_start_dt
- gap_report: str = (
- f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n'
- f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n'
- f'last_start_dt: {orig_last_start_dt}\n\n'
- f'bf_until: {backfill_until_dt}\n'
- )
- # EMPTY FRAME signal with 3 (likely) causes:
- #
- # 1. range contains legit gap in venue history
- # 2. history actually (edge case) **began** at the
- # value `last_start_dt`
- # 3. some other unknown error (ib blocking the
- # history-query bc they don't want you seeing how
- # they cucked all the tinas.. like with options
- # hist)
- #
- if def_frame_duration:
- # decrement by a duration's (frame) worth of time
- # as maybe indicated by the backend to see if we
- # can get older data before this possible
- # "history gap".
- last_start_dt: datetime = last_start_dt.subtract(
- seconds=def_frame_duration.total_seconds()
- )
- gap_report += (
- f'Decrementing `end_dt` and retrying with,\n'
- f'def_frame_duration: {def_frame_duration}\n'
- f'(new) last_start_dt: {last_start_dt}\n'
- )
- log.warning(gap_report)
- # skip writing to shm/tsdb and try the next
- # duration's worth of prior history.
- continue
-
- else:
- # await tractor.pause()
- raise DataUnavailable(gap_report)
-
- # broker says there never was or is no more history to pull
- except DataUnavailable as due:
- message: str = due.args[0]
- log.warning(
- f'Provider {mod.name!r} halted backfill due to,\n\n'
-
- f'{message}\n'
-
- f'fqme: {mkt.fqme}\n'
- f'timeframe: {timeframe}\n'
- f'last_start_dt: {last_start_dt}\n'
- f'bf_until: {backfill_until_dt}\n'
- )
- # UGH: what's a better way?
- # TODO: backends are responsible for being correct on
- # this right!?
- # -[ ] in the `ib` case we could maybe offer some way
- # to halt the request loop until the condition is
- # resolved or should the backend be entirely in
- # charge of solving such faults? yes, right?
- return
-
- time: np.ndarray = array['time']
- assert (
- time[0]
- ==
- next_start_dt.timestamp()
- )
-
- assert time[-1] == next_end_dt.timestamp()
-
- expected_dur: Interval = last_start_dt - next_start_dt
-
- # frame's worth of sample-period-steps, in seconds
- frame_size_s: float = len(array) * timeframe
- recv_frame_dur: Duration = (
- from_timestamp(array[-1]['time'])
- -
- from_timestamp(array[0]['time'])
- )
- if (
- (lt_frame := (recv_frame_dur < expected_dur))
- or
- (null_frame := (frame_size_s == 0))
- # ^XXX, should NEVER hit now!
- ):
- # XXX: query result includes a start point prior to our
- # expected "frame size" and thus is likely some kind of
- # history gap (eg. market closed period, outage, etc.)
- # so just report it to console for now.
- if lt_frame:
- reason = 'Possible GAP (or first-datum)'
- else:
- assert null_frame
- reason = 'NULL-FRAME'
-
- missing_dur: Interval = expected_dur.end - recv_frame_dur.end
- log.warning(
- f'{timeframe}s-series {reason} detected!\n'
- f'fqme: {mkt.fqme}\n'
- f'last_start_dt: {last_start_dt}\n\n'
- f'recv interval: {recv_frame_dur}\n'
- f'expected interval: {expected_dur}\n\n'
-
- f'Missing duration of history of {missing_dur.in_words()!r}\n'
- f'{missing_dur}\n'
- )
- # await tractor.pause()
-
- to_push = diff_history(
- array,
- prepend_until_dt=backfill_until_dt,
- )
- ln: int = len(to_push)
- if ln:
- log.info(
- f'{ln} bars for {next_start_dt} -> {last_start_dt}'
- )
-
- else:
- log.warning(
- '0 BARS TO PUSH after diff!?\n'
- f'{next_start_dt} -> {last_start_dt}'
- )
-
- # bail gracefully on shm allocation overrun/full
- # condition
- try:
- await shm_push_in_between(
- shm,
- to_push,
- prepend_index=next_prepend_index,
- update_start_on_prepend=update_start_on_prepend,
- )
- await sampler_stream.send({
- 'broadcast_all': {
- 'backfilling': (mkt.fqme, timeframe),
- },
- })
-
- # decrement next prepend point
- next_prepend_index = next_prepend_index - ln
- last_start_dt = next_start_dt
-
- except ValueError as ve:
- _ve = ve
- log.error(
- f'Shm prepend OVERRUN on: {next_start_dt} -> {last_start_dt}?'
- )
-
- if next_prepend_index < ln:
- log.warning(
- f'Shm buffer can only hold {next_prepend_index} more rows..\n'
- f'Appending those from recent {ln}-sized frame, no more!'
- )
-
- to_push = to_push[-next_prepend_index + 1:]
- await shm_push_in_between(
- shm,
- to_push,
- prepend_index=next_prepend_index,
- update_start_on_prepend=update_start_on_prepend,
- )
- await sampler_stream.send({
- 'broadcast_all': {
- 'backfilling': (mkt.fqme, timeframe),
- },
- })
-
- # can't push the entire frame? so
- # push only the amount that can fit..
- break
-
- log.info(
- f'Shm pushed {ln} frame:\n'
- f'{next_start_dt} -> {last_start_dt}'
- )
-
- # FINALLY, maybe write immediately to the tsdb backend for
- # long-term storage.
- if (
- storage is not None
- and
- write_tsdb
- ):
- log.info(
- f'Writing {ln} frame to storage:\n'
- f'{next_start_dt} -> {last_start_dt}'
- )
-
- # NOTE, always drop the src asset token for
- # non-currency-pair like market types (for now)
- #
- # THAT IS, for now our table key schema is NOT
- # including the dst[/src] source asset token. SO,
- # 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for
- # historical reasons ONLY.
- if mkt.dst.atype not in {
- 'crypto',
- 'crypto_currency',
- 'fiat', # a "forex pair"
- 'perpetual_future', # stupid "perps" from cex land
- }:
- col_sym_key: str = mkt.get_fqme(
- delim_char='',
- without_src=True,
- )
- else:
- col_sym_key: str = mkt.get_fqme(
- delim_char='',
- )
-
- await storage.write_ohlcv(
- col_sym_key,
- shm.array,
- timeframe,
- )
- df: pl.DataFrame = await storage.as_df(
- fqme=mkt.fqme,
- period=timeframe,
- load_from_offline=False,
- )
- (
- wdts,
- deduped,
- diff,
- ) = dedupe(df)
- # if diff:
- # sort_diff(df)
-
- else:
- # finally filled gap
- log.info(
- f'Finished filling gap to tsdb start @ {backfill_until_dt}!'
- )
-
- # XXX: extremely important, there can be no checkpoints
- # in the block above to avoid entering new ``frames``
- # values while we're pipelining the current ones to
- # memory...
- # await sampler_stream.send('broadcast_all')
-
- # short-circuit (for now)
- bf_done.set()
-
-
-# NOTE: originally this was used to cope with a tsdb (marketstore)
-# which could not delivery very large frames of history over gRPC
-# (thanks goolag) due to corruption issues. NOW, using apache
-# parquet (by default in the local filesys) we don't have this
-# requirement since the files can be loaded very quickly in
-# entirety to memory via
-async def back_load_from_tsdb(
- storemod: ModuleType,
- storage: StorageClient,
-
- fqme: str,
-
- tsdb_history: np.ndarray,
-
- last_tsdb_dt: datetime,
- latest_start_dt: datetime,
- latest_end_dt: datetime,
-
- bf_done: trio.Event,
-
- timeframe: int,
- shm: ShmArray,
-):
- assert len(tsdb_history)
-
- # sync to backend history task's query/load completion
- # if bf_done:
- # await bf_done.wait()
-
- # TODO: eventually it'd be nice to not require a shm array/buffer
- # to accomplish this.. maybe we can do some kind of tsdb direct to
- # graphics format eventually in a child-actor?
- if storemod.name == 'nativedb':
- return
-
- await tractor.pause()
- assert shm._first.value == 0
-
- array = shm.array
-
- # if timeframe == 1:
- # times = shm.array['time']
- # assert (times[1] - times[0]) == 1
-
- if len(array):
- shm_last_dt = from_timestamp(
- shm.array[0]['time']
- )
- else:
- shm_last_dt = None
-
- if last_tsdb_dt:
- assert shm_last_dt >= last_tsdb_dt
-
- # do diff against start index of last frame of history and only
- # fill in an amount of datums from tsdb allows for most recent
- # to be loaded into mem *before* tsdb data.
- if (
- last_tsdb_dt
- and latest_start_dt
- ):
- backfilled_size_s: Duration = (
- latest_start_dt - last_tsdb_dt
- ).seconds
- # if the shm buffer len is not large enough to contain
- # all missing data between the most recent backend-queried frame
- # and the most recent dt-index in the db we warn that we only
- # want to load a portion of the next tsdb query to fill that
- # space.
- log.info(
- f'{backfilled_size_s} seconds worth of {timeframe}s loaded'
- )
-
- # Load TSDB history into shm buffer (for display) if there is
- # remaining buffer space.
-
- time_key: str = 'time'
- if getattr(storemod, 'ohlc_key_map', False):
- keymap: bidict = storemod.ohlc_key_map
- time_key: str = keymap.inverse['time']
-
- # if (
- # not len(tsdb_history)
- # ):
- # return
-
- tsdb_last_frame_start: datetime = last_tsdb_dt
- # load as much from storage into shm possible (depends on
- # user's shm size settings).
- while shm._first.value > 0:
-
- tsdb_history = await storage.read_ohlcv(
- fqme,
- timeframe=timeframe,
- end=tsdb_last_frame_start,
- )
-
- # # empty query
- # if not len(tsdb_history):
- # break
-
- next_start = tsdb_history[time_key][0]
- if next_start >= tsdb_last_frame_start:
- # no earlier data detected
- break
-
- else:
- tsdb_last_frame_start = next_start
-
- # TODO: see if there's faster multi-field reads:
- # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
- # re-index with a `time` and index field
- prepend_start = shm._first.value
-
- to_push = tsdb_history[-prepend_start:]
- shm.push(
- to_push,
-
- # insert the history pre a "days worth" of samples
- # to leave some real-time buffer space at the end.
- prepend=True,
- # update_first=False,
- # start=prepend_start,
- field_map=storemod.ohlc_key_map,
- )
-
- log.info(f'Loaded {to_push.shape} datums from storage')
- tsdb_last_frame_start = tsdb_history[time_key][0]
-
- # manually trigger step update to update charts/fsps
- # which need an incremental update.
- # NOTE: the way this works is super duper
- # un-intuitive right now:
- # - the broadcaster fires a msg to the fsp subsystem.
- # - fsp subsys then checks for a sample step diff and
- # possibly recomputes prepended history.
- # - the fsp then sends back to the parent actor
- # (usually a chart showing graphics for said fsp)
- # which tells the chart to conduct a manual full
- # graphics loop cycle.
- # await sampler_stream.send('broadcast_all')
-
-
-async def push_latest_frame(
- # box-type only that should get packed with the datetime
- # objects received for the latest history frame
- dt_eps: list[DateTime, DateTime],
- shm: ShmArray,
- get_hist: Callable[
- [int, datetime, datetime],
- tuple[np.ndarray, str]
- ],
- timeframe: float,
- config: dict,
-
- task_status: TaskStatus[
- Exception | list[datetime, datetime]
- ] = trio.TASK_STATUS_IGNORED,
-
-) -> list[datetime, datetime] | None:
- # get latest query's worth of history all the way
- # back to what is recorded in the tsdb
- try:
- (
- array,
- mr_start_dt,
- mr_end_dt,
- ) = await get_hist(
- timeframe,
- end_dt=None,
- )
- # so caller can access these ep values
- dt_eps.extend([
- mr_start_dt,
- mr_end_dt,
- ])
- task_status.started(dt_eps)
-
- # XXX: timeframe not supported for backend (since
- # above exception type), terminate immediately since
- # there's no backfilling possible.
- except DataUnavailable:
- task_status.started(None)
-
- if timeframe > 1:
- await tractor.pause()
-
- # prolly tf not supported
- return None
-
- # NOTE: on the first history, most recent history
- # frame we PREPEND from the current shm ._last index
- # and thus a gap between the earliest datum loaded here
- # and the latest loaded from the tsdb may exist!
- log.info(f'Pushing {array.size} to shm!')
- shm.push(
- array,
- prepend=True, # append on first frame
- )
-
- return dt_eps
-
-
-async def load_tsdb_hist(
- storage: StorageClient,
- mkt: MktPair,
- timeframe: float,
-
- task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED,
-
-) -> tuple[
- np.ndarray,
- DateTime,
- DateTime,
-] | None:
- # loads a (large) frame of data from the tsdb depending
- # on the db's query size limit; our "nativedb" (using
- # parquet) generally can load the entire history into mem
- # but if not then below the remaining history can be lazy
- # loaded?
- fqme: str = mkt.fqme
- tsdb_entry: tuple[
- np.ndarray,
- DateTime,
- DateTime,
- ]
- try:
- tsdb_entry: tuple | None = await storage.load(
- fqme,
- timeframe=timeframe,
- )
- return tsdb_entry
-
- except TimeseriesNotFound:
- log.warning(
- f'No timeseries yet for {timeframe}@{fqme}'
- )
- return None
-
-
-async def tsdb_backfill(
- mod: ModuleType,
- storemod: ModuleType,
-
- storage: StorageClient,
- mkt: MktPair,
- shm: ShmArray,
- timeframe: float,
-
- sampler_stream: tractor.MsgStream,
-
- task_status: TaskStatus[
- tuple[ShmArray, ShmArray]
- ] = trio.TASK_STATUS_IGNORED,
-
-) -> None:
-
- if timeframe not in (1, 60):
- raise ValueError(
- '`piker` only needs to support 1m and 1s sampling '
- 'but ur api is trying to deliver a longer '
- f'timeframe of {timeframe} seconds..\n'
- 'So yuh.. dun do dat brudder.'
- )
-
- get_hist: Callable[
- [int, datetime, datetime],
- tuple[np.ndarray, str]
- ]
- config: dict[str, int]
- async with (
- mod.open_history_client(
- mkt,
- ) as (get_hist, config),
-
- # NOTE: this sub-nursery splits to tasks for the given
- # sampling rate to concurrently load offline tsdb
- # timeseries as well as new data from the venue backend!
- ):
- log.info(
- f'`{mod}` history client returned backfill config:\n'
- f'{pformat(config)}\n'
- )
-
- # concurrently load the provider's most-recent-frame AND any
- # pre-existing tsdb history already saved in `piker` storage.
- dt_eps: list[DateTime, DateTime] = []
- async with (
- tractor.trionics.collapse_eg(),
- trio.open_nursery() as tn
- ):
- tn.start_soon(
- push_latest_frame,
- dt_eps,
- shm,
- get_hist,
- timeframe,
- config,
- )
- tsdb_entry: tuple = await load_tsdb_hist(
- storage,
- mkt,
- timeframe,
- )
-
- # tell parent task to continue
- # TODO: really we'd want this the other way with the
- # tsdb load happening asap and the since the latest
- # frame query will normally be the main source of
- # latency?
- task_status.started()
-
- # NOTE: iabs to start backfilling from, reverse chronological,
- # ONLY AFTER the first history frame has been pushed to
- # mem!
- backfill_gap_from_shm_index: int = shm._first.value + 1
-
- # Prepend any tsdb history into the rt-shm-buffer which
- # should NOW be getting filled with the most recent history
- # pulled from the data-backend.
- if dt_eps:
- # well then, unpack the latest (gap) backfilled frame dts
- (
- mr_start_dt,
- mr_end_dt,
- ) = dt_eps
-
- first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds
- calced_frame_size: Duration = mk_duration(
- seconds=first_frame_dur_s,
- )
- # NOTE, attempt to use the backend declared default frame
- # sizing (as allowed by their time-series query APIs) and
- # if not provided try to construct a default from the
- # first frame received above.
- def_frame_durs: dict[
- int,
- Duration,
- ]|None = config.get('frame_types', None)
-
- if def_frame_durs:
- def_frame_size: Duration = def_frame_durs[timeframe]
-
- if def_frame_size != calced_frame_size:
- log.warning(
- f'Expected frame size {def_frame_size}\n'
- f'Rxed frame {calced_frame_size}\n'
- )
- # await tractor.pause()
- else:
- # use what we calced from first frame above.
- def_frame_size = calced_frame_size
-
- # NOTE: when there's no offline data, there's 2 cases:
- # - data backend doesn't support timeframe/sample
- # period (in which case `dt_eps` should be `None` and
- # we shouldn't be here!), or
- # - no prior history has been stored (yet) and we need
- # todo full backfill of the history now.
- if tsdb_entry is None:
- # indicate to backfill task to fill the whole
- # shm buffer as much as it can!
- last_tsdb_dt = None
-
- # there's existing tsdb history from (offline) storage
- # so only backfill the gap between the
- # most-recent-frame (mrf) and that latest sample.
- else:
- (
- tsdb_history,
- first_tsdb_dt,
- last_tsdb_dt,
- ) = tsdb_entry
-
- # if there is a gap to backfill from the first
- # history frame until the last datum loaded from the tsdb
- # continue that now in the background
- async with trio.open_nursery(
- strict_exception_groups=False,
- ) as tn:
-
- bf_done = await tn.start(
- partial(
- start_backfill,
- get_hist=get_hist,
- def_frame_duration=def_frame_size,
- mod=mod,
- mkt=mkt,
- shm=shm,
- timeframe=timeframe,
-
- backfill_from_shm_index=backfill_gap_from_shm_index,
- backfill_from_dt=mr_start_dt,
-
- sampler_stream=sampler_stream,
- backfill_until_dt=last_tsdb_dt,
-
- storage=storage,
- write_tsdb=True,
- )
- )
- nulls_detected: trio.Event | None = None
- if last_tsdb_dt is not None:
- # calc the index from which the tsdb data should be
- # prepended, presuming there is a gap between the
- # latest frame (loaded/read above) and the latest
- # sample loaded from the tsdb.
- backfill_diff: Duration = mr_start_dt - last_tsdb_dt
- offset_s: float = backfill_diff.in_seconds()
-
- # XXX EDGE CASEs: the most recent frame overlaps with
- # prior tsdb history!!
- # - so the latest frame's start time is earlier then
- # the tsdb's latest sample.
- # - alternatively this may also more generally occur
- # when the venue was closed (say over the weeknd)
- # causing a timeseries gap, AND the query frames size
- # (eg. for ib's 1s we rx 2k datums ~= 33.33m) IS
- # GREATER THAN the current venue-market's operating
- # session (time) we will receive datums from BEFORE THE
- # CLOSURE GAP and thus the `offset_s` value will be
- # NEGATIVE! In this case we need to ensure we don't try
- # to push datums that have already been recorded in the
- # tsdb. In this case we instead only retreive and push
- # the series portion missing from the db's data set.
- # if offset_s < 0:
- # non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt
- # non_overlap_offset_s: float = backfill_diff.in_seconds()
-
- offset_samples: int = round(offset_s / timeframe)
-
- # TODO: see if there's faster multi-field reads:
- # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
- # re-index with a `time` and index field
- if offset_s > 0:
- # NOTE XXX: ONLY when there is an actual gap
- # between the earliest sample in the latest history
- # frame do we want to NOT stick the latest tsdb
- # history adjacent to that latest frame!
- prepend_start = shm._first.value - offset_samples + 1
- to_push = tsdb_history[-prepend_start:]
- else:
- # when there is overlap we want to remove the
- # overlapping samples from the tsdb portion (taking
- # instead the latest frame's values since THEY
- # SHOULD BE THE SAME) and prepend DIRECTLY adjacent
- # to the latest frame!
- # TODO: assert the overlap segment array contains
- # the same values!?!
- prepend_start = shm._first.value
- to_push = tsdb_history[-(shm._first.value):offset_samples - 1]
-
- # tsdb history is so far in the past we can't fit it in
- # shm buffer space so simply don't load it!
- if prepend_start > 0:
- shm.push(
- to_push,
-
- # insert the history pre a "days worth" of samples
- # to leave some real-time buffer space at the end.
- prepend=True,
- # update_first=False,
- start=prepend_start,
- field_map=storemod.ohlc_key_map,
- )
-
- log.info(f'Loaded {to_push.shape} datums from storage')
-
- # NOTE: ASYNC-conduct tsdb timestamp gap detection and backfill any
- # seemingly missing (null-time) segments..
- # TODO: ideally these can never exist!
- # -[ ] somehow it seems sometimes we're writing zero-ed
- # segments to tsdbs during teardown?
- # -[ ] can we ensure that the backcfiller tasks do this
- # work PREVENTAVELY instead?
- # -[ ] fill in non-zero epoch time values ALWAYS!
- # await maybe_fill_null_segments(
- nulls_detected: trio.Event = await tn.start(partial(
- maybe_fill_null_segments,
-
- shm=shm,
- timeframe=timeframe,
- get_hist=get_hist,
- sampler_stream=sampler_stream,
- mkt=mkt,
- ))
-
- # 2nd nursery END
-
- # TODO: who would want to?
- if nulls_detected:
- await nulls_detected.wait()
-
- await bf_done.wait()
- # TODO: maybe start history anal and load missing "history
- # gaps" via backend..
-
- # if len(hist_shm.array) < 2:
- # TODO: there's an edge case here to solve where if the last
- # frame before market close (at least on ib) was pushed and
- # there was only "1 new" row pushed from the first backfill
- # query-iteration, then the sample step sizing calcs will
- # break upstream from here since you can't diff on at least
- # 2 steps... probably should also add logic to compute from
- # the tsdb series and stash that somewhere as meta data on
- # the shm buffer?.. no se.
-
- # backload any further data from tsdb (concurrently per
- # timeframe) if not all data was able to be loaded (in memory)
- # from the ``StorageClient.load()`` call above.
- await trio.sleep_forever()
-
- # XXX NOTE: this is legacy from when we were using
- # marketstore and we needed to continue backloading
- # incrementally from the tsdb client.. (bc it couldn't
- # handle a single large query with gRPC for some
- # reason.. classic goolag pos)
- # tn.start_soon(
- # back_load_from_tsdb,
-
- # storemod,
- # storage,
- # fqme,
-
- # tsdb_history,
- # last_tsdb_dt,
- # mr_start_dt,
- # mr_end_dt,
- # bf_done,
-
- # timeframe,
- # shm,
- # )
-
-
-async def manage_history(
- mod: ModuleType,
- mkt: MktPair,
- some_data_ready: trio.Event,
- feed_is_live: trio.Event,
- timeframe: float = 60, # in seconds
-
- task_status: TaskStatus[
- tuple[ShmArray, ShmArray]
- ] = trio.TASK_STATUS_IGNORED,
-
-) -> None:
- '''
- Load and manage historical data including the loading of any
- available series from any connected tsdb as well as conduct
- real-time update of both that existing db and the allocated
- shared memory buffer.
-
- Init sequence:
- - allocate shm (numpy array) buffers for 60s & 1s sample rates
- - configure "zero index" for each buffer: the index where
- history will prepended *to* and new live data will be
- appened *from*.
- - open a ``.storage.StorageClient`` and load any existing tsdb
- history as well as (async) start a backfill task which loads
- missing (newer) history from the data provider backend:
- - tsdb history is loaded first and pushed to shm ASAP.
- - the backfill task loads the most recent history before
- unblocking its parent task, so that the `ShmArray._last` is
- up to date to allow the OHLC sampler to begin writing new
- samples as the correct buffer index once the provider feed
- engages.
-
- '''
- # TODO: is there a way to make each shm file key
- # actor-tree-discovery-addr unique so we avoid collisions
- # when doing tests which also allocate shms for certain instruments
- # that may be in use on the system by some other running daemons?
- # from tractor._state import _runtime_vars
- # port = _runtime_vars['_root_mailbox'][1]
-
- uid: tuple = tractor.current_actor().uid
- name, uuid = uid
- service: str = name.rstrip(f'.{mod.name}')
- fqme: str = mkt.get_fqme(delim_char='')
-
- # (maybe) allocate shm array for this broker/symbol which will
- # be used for fast near-term history capture and processing.
- hist_shm, opened = maybe_open_shm_array(
- size=_default_hist_size,
- append_start_index=_hist_buffer_start,
-
- key=f'piker.{service}[{uuid[:16]}].{fqme}.hist',
-
- # use any broker defined ohlc dtype:
- dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields),
-
- # we expect the sub-actor to write
- readonly=False,
- )
- hist_zero_index = hist_shm.index - 1
-
- # TODO: history validation
- if not opened:
- raise RuntimeError(
- "Persistent shm for sym was already open?!"
- )
-
- rt_shm, opened = maybe_open_shm_array(
- size=_default_rt_size,
- append_start_index=_rt_buffer_start,
- key=f'piker.{service}[{uuid[:16]}].{fqme}.rt',
-
- # use any broker defined ohlc dtype:
- dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields),
-
- # we expect the sub-actor to write
- readonly=False,
- )
-
- # (for now) set the rt (hft) shm array with space to prepend
- # only a few days worth of 1s history.
- days: int = 2
- start_index: int = days*_secs_in_day
- rt_shm._first.value = start_index
- rt_shm._last.value = start_index
- rt_zero_index = rt_shm.index - 1
-
- if not opened:
- raise RuntimeError(
- "Persistent shm for sym was already open?!"
- )
-
- open_history_client = getattr(
- mod,
- 'open_history_client',
- )
- assert open_history_client
-
- # TODO: maybe it should be a subpkg of `.data`?
- from piker import storage
-
- async with (
- storage.open_storage_client() as (storemod, client),
-
- # NOTE: this nursery spawns a task per "timeframe" (aka
- # sampling period) data set since normally differently
- # sampled timeseries can be loaded / process independently
- # ;)
- tractor.trionics.collapse_eg(),
- trio.open_nursery() as tn,
- ):
- log.info(
- f'Connecting to storage backend `{storemod.name}`:\n'
- f'location: {client.address}\n'
- f'db cardinality: {client.cardinality}\n'
- # TODO: show backend config, eg:
- # - network settings
- # - storage size with compression
- # - number of loaded time series?
- )
-
- # NOTE: this call ONLY UNBLOCKS once the latest-most frame
- # (i.e. history just before the live feed latest datum) of
- # history has been loaded and written to the shm buffer:
- # - the backfiller task can write in reverse chronological
- # to the shm and tsdb
- # - the tsdb data can be loaded immediately and the
- # backfiller can do a single append from it's end datum and
- # then prepends backward to that from the current time
- # step.
- tf2mem: dict = {
- 1: rt_shm,
- 60: hist_shm,
- }
- async with open_sample_stream(
- period_s=1.,
- shms_by_period={
- 1.: rt_shm.token,
- 60.: hist_shm.token,
- },
-
- # NOTE: we want to only open a stream for doing
- # broadcasts on backfill operations, not receive the
- # sample index-stream (since there's no code in this
- # data feed layer that needs to consume it).
- open_index_stream=True,
- sub_for_broadcasts=False,
-
- ) as sample_stream:
- # register 1s and 1m buffers with the global
- # incrementer task
- log.info(f'Connected to sampler stream: {sample_stream}')
-
- for timeframe in [60, 1]:
- await tn.start(partial(
- tsdb_backfill,
- mod=mod,
- storemod=storemod,
- storage=client,
- mkt=mkt,
- shm=tf2mem[timeframe],
- timeframe=timeframe,
- sampler_stream=sample_stream,
- ))
-
- # indicate to caller that feed can be delivered to
- # remote requesting client since we've loaded history
- # data that can be used.
- some_data_ready.set()
-
- # wait for a live feed before starting the sampler.
- await feed_is_live.wait()
-
- # yield back after client connect with filled shm
- task_status.started((
- hist_zero_index,
- hist_shm,
- rt_zero_index,
- rt_shm,
- ))
-
- # history retreival loop depending on user interaction
- # and thus a small RPC-prot for remotely controllinlg
- # what data is loaded for viewing.
- await trio.sleep_forever()
-
-
-def iter_dfs_from_shms(
- fqme: str
-) -> Generator[
- tuple[Path, ShmArray, pl.DataFrame],
- None,
- None,
-]:
- # shm buffer size table based on known sample rates
- sizes: dict[str, int] = {
- 'hist': _default_hist_size,
- 'rt': _default_rt_size,
- }
-
- # load all detected shm buffer files which have the
- # passed FQME pattern in the file name.
- shmfiles: list[Path] = []
- shmdir = Path('/dev/shm/')
-
- for shmfile in shmdir.glob(f'*{fqme}*'):
- filename: str = shmfile.name
-
- # skip index files
- if (
- '_first' in filename
- or '_last' in filename
- ):
- continue
-
- assert shmfile.is_file()
- log.debug(f'Found matching shm buffer file: {filename}')
- shmfiles.append(shmfile)
-
- for shmfile in shmfiles:
-
- # lookup array buffer size based on file suffix
- # being either .rt or .hist
- key: str = shmfile.name.rsplit('.')[-1]
-
- # skip FSP buffers for now..
- if key not in sizes:
- continue
-
- size: int = sizes[key]
-
- # attach to any shm buffer, load array into polars df,
- # write to local parquet file.
- shm, opened = maybe_open_shm_array(
- key=shmfile.name,
- size=size,
- dtype=def_iohlcv_fields,
- readonly=True,
- )
- assert not opened
- ohlcv: np.ndarray = shm.array
- df: pl.DataFrame = np2pl(ohlcv)
-
- yield (
- shmfile,
- shm,
- df,
- )
diff --git a/piker/tsp/_anal.py b/piker/tsp/_anal.py
index 42c3aa6c..30cc6b59 100644
--- a/piker/tsp/_anal.py
+++ b/piker/tsp/_anal.py
@@ -275,6 +275,18 @@ def get_null_segs(
# diff of abs index steps between each zeroed row
absi_zdiff: np.ndarray = np.diff(absi_zeros)
+ if zero_t.size < 2:
+ try:
+ breakpoint()
+ except RuntimeError:
+ # XXX, if greenback not active from
+ # piker store ldshm cmd..
+ log.exception(
+ "Can't debug single-sample null!\n"
+ )
+
+ return None
+
# scan for all frame-indices where the
# zeroed-row-abs-index-step-diff is greater then the
# expected increment of 1.
@@ -434,8 +446,8 @@ def get_null_segs(
def iter_null_segs(
timeframe: float,
- frame: Frame | None = None,
- null_segs: tuple | None = None,
+ frame: Frame|None = None,
+ null_segs: tuple|None = None,
) -> Generator[
tuple[
@@ -487,7 +499,8 @@ def iter_null_segs(
start_dt = None
if (
absi_start is not None
- and start_t != 0
+ and
+ start_t != 0
):
fi_start: int = absi_start - absi_first
start_row: Seq = frame[fi_start]
@@ -501,8 +514,8 @@ def iter_null_segs(
yield (
absi_start, absi_end, # abs indices
fi_start, fi_end, # relative "frame" indices
- start_t, end_t,
- start_dt, end_dt,
+ start_t, end_t, # epoch times
+ start_dt, end_dt, # dts
)
@@ -578,11 +591,22 @@ def detect_time_gaps(
# NOTE: this flag is to indicate that on this (sampling) time
# scale we expect to only be filtering against larger venue
# closures-scale time gaps.
+ #
+ # Map to total_ method since `dt_diff` is a duration type,
+ # not datetime - modern polars requires `total_*` methods
+ # for duration types (e.g. `total_days()` not `day()`)
+ # Ensure plural form for polars API (e.g. 'day' -> 'days')
+ unit_plural: str = (
+ gap_dt_unit
+ if gap_dt_unit.endswith('s')
+ else f'{gap_dt_unit}s'
+ )
+ duration_method: str = f'total_{unit_plural}'
return step_gaps.filter(
# Second by an arbitrary dt-unit step size
getattr(
pl.col('dt_diff').dt,
- gap_dt_unit,
+ duration_method,
)().abs() > gap_thresh
)
diff --git a/piker/tsp/_annotate.py b/piker/tsp/_annotate.py
new file mode 100644
index 00000000..7e91300f
--- /dev/null
+++ b/piker/tsp/_annotate.py
@@ -0,0 +1,306 @@
+# piker: trading gear for hackers
+# Copyright (C) 2018-present Tyler Goodlet (in stewardship of pikers)
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see .
+
+"""
+Time-series (remote) annotation APIs.
+
+"""
+from __future__ import annotations
+from math import copysign
+from typing import (
+ Any,
+ TYPE_CHECKING,
+)
+
+import polars as pl
+import tractor
+
+from piker.data._formatters import BGM
+from piker.storage import log
+from piker.ui._style import get_fonts
+
+if TYPE_CHECKING:
+ from piker.ui._remote_ctl import AnnotCtl
+
+
+def humanize_duration(
+ seconds: float,
+) -> str:
+ '''
+ Convert duration in seconds to short human-readable form.
+
+ Uses smallest appropriate time unit:
+ - d: days
+ - h: hours
+ - m: minutes
+ - s: seconds
+
+ Examples:
+ - 86400 -> "1d"
+ - 28800 -> "8h"
+ - 180 -> "3m"
+ - 45 -> "45s"
+
+ '''
+ abs_secs: float = abs(seconds)
+
+ if abs_secs >= 86400:
+ days: float = abs_secs / 86400
+ if days >= 10 or days == int(days):
+ return f'{int(days)}d'
+ return f'{days:.1f}d'
+
+ elif abs_secs >= 3600:
+ hours: float = abs_secs / 3600
+ if hours >= 10 or hours == int(hours):
+ return f'{int(hours)}h'
+ return f'{hours:.1f}h'
+
+ elif abs_secs >= 60:
+ mins: float = abs_secs / 60
+ if mins >= 10 or mins == int(mins):
+ return f'{int(mins)}m'
+ return f'{mins:.1f}m'
+
+ else:
+ if abs_secs >= 10 or abs_secs == int(abs_secs):
+ return f'{int(abs_secs)}s'
+ return f'{abs_secs:.1f}s'
+
+
+async def markup_gaps(
+ fqme: str,
+ timeframe: float,
+ actl: AnnotCtl,
+ wdts: pl.DataFrame,
+ gaps: pl.DataFrame,
+
+ # XXX, switch on to see txt showing a "humanized" label of each
+ # gap's duration.
+ show_txt: bool = False,
+
+) -> dict[int, dict]:
+ '''
+ Remote annotate time-gaps in a dt-fielded ts (normally OHLC)
+ with rectangles.
+
+ '''
+ # XXX: force chart redraw FIRST to ensure PlotItem coordinate
+ # system is properly initialized before we position annotations!
+ # Without this, annotations may be misaligned on first creation
+ # due to Qt/pyqtgraph initialization race conditions.
+ await actl.redraw(
+ fqme=fqme,
+ timeframe=timeframe,
+ )
+
+ aids: dict[int] = {}
+ for i in range(gaps.height):
+ row: pl.DataFrame = gaps[i]
+
+ # the gap's RIGHT-most bar's OPEN value
+ # at that time (sample) step.
+ iend: int = row['index'][0]
+
+ # dt: datetime = row['dt'][0]
+ # dt_prev: datetime = row['dt_prev'][0]
+ # dt_end_t: float = dt.timestamp()
+
+
+ # TODO: can we eventually remove this
+ # once we figure out why the epoch cols
+ # don't match?
+ # TODO: FIX HOW/WHY these aren't matching
+ # and are instead off by 4hours (EST
+ # vs. UTC?!?!)
+ # end_t: float = row['time']
+ # assert (
+ # dt.timestamp()
+ # ==
+ # end_t
+ # )
+
+ # the gap's LEFT-most bar's CLOSE value
+ # at that time (sample) step.
+ prev_r: pl.DataFrame = wdts.filter(
+ pl.col('index') == iend - 1
+ )
+ # XXX: probably a gap in the (newly sorted or de-duplicated)
+ # dt-df, so we might need to re-index first..
+ dt: pl.Series = row['dt']
+ dt_prev: pl.Series = row['dt_prev']
+ if prev_r.is_empty():
+
+ # XXX, filter out any special ignore cases,
+ # - UNIX-epoch stamped datums
+ # - first row
+ if (
+ dt_prev.dt.epoch()[0] == 0
+ or
+ dt.dt.epoch()[0] == 0
+ ):
+ log.warning('Skipping row with UNIX epoch timestamp ??')
+ continue
+
+ if wdts[0]['index'][0] == iend: # first row
+ log.warning('Skipping first-row (has no previous obvi) !!')
+ continue
+
+ # XXX, if the previous-row by shm-index is missing,
+ # meaning there is a missing sample (set), get the prior
+ # row by df index and attempt to use it?
+ i_wdts: pl.DataFrame = wdts.with_row_index(name='i')
+ i_row: int = i_wdts.filter(pl.col('index') == iend)['i'][0]
+ prev_row_by_i = wdts[i_row]
+ prev_r: pl.DataFrame = prev_row_by_i
+
+ # debug any missing pre-row
+ if tractor._state.is_debug_mode():
+ await tractor.pause()
+
+ istart: int = prev_r['index'][0]
+ # TODO: implement px-col width measure
+ # and ensure at least as many px-cols
+ # shown per rect as configured by user.
+ # gap_w: float = abs((iend - istart))
+ # if gap_w < 6:
+ # margin: float = 6
+ # iend += margin
+ # istart -= margin
+
+ opn: float = row['open'][0]
+ cls: float = prev_r['close'][0]
+
+ # get gap duration for humanized label
+ gap_dur_s: float = row['s_diff'][0]
+ gap_label: str = humanize_duration(gap_dur_s)
+
+ # XXX: get timestamps for server-side index lookup
+ start_time: float = prev_r['time'][0]
+ end_time: float = row['time'][0]
+
+ # BGM=0.16 is the normal diff from overlap between bars, SO
+ # just go slightly "in" from that "between them".
+ from_idx: int = BGM - .06 # = .10
+ lc: tuple[float, float] = (
+ istart + 1 - from_idx,
+ cls,
+ )
+ ro: tuple[float, float] = (
+ iend + from_idx,
+ opn,
+ )
+
+ diff: float = cls - opn
+ sgn: float = copysign(1, diff)
+ up_gap: bool = sgn == -1
+ down_gap: bool = sgn == 1
+ flat: bool = sgn == 0
+
+ color: str = 'dad_blue'
+ # TODO? mks more sense to have up/down coloring?
+ # color: str = {
+ # -1: 'lilypad_green', # up-gap
+ # 1: 'wine', # down-gap
+ # }[sgn]
+
+ rect_kwargs: dict[str, Any] = dict(
+ fqme=fqme,
+ timeframe=timeframe,
+ start_pos=lc,
+ end_pos=ro,
+ color=color,
+ start_time=start_time,
+ end_time=end_time,
+ )
+
+ # add up/down rects
+ aid: int|None = await actl.add_rect(**rect_kwargs)
+ if aid is None:
+ log.error(
+ f'Failed to add rect for,\n'
+ f'{rect_kwargs!r}\n'
+ f'\n'
+ f'Skipping to next gap!\n'
+ )
+ continue
+
+ assert aid
+ aids[aid] = rect_kwargs
+ direction: str = (
+ 'down' if down_gap
+ else 'up'
+ )
+ # TODO! mk this a `msgspec.Struct` which we deserialize
+ # on the server side!
+ # XXX: send timestamp for server-side index lookup
+ # to ensure alignment with current shm state
+ gap_time: float = row['time'][0]
+ arrow_kwargs: dict[str, Any] = dict(
+ fqme=fqme,
+ timeframe=timeframe,
+ x=iend, # fallback if timestamp lookup fails
+ y=cls,
+ time=gap_time, # for server-side index lookup
+ color=color,
+ alpha=169,
+ pointing=direction,
+ # TODO: expose these as params to markup_gaps()?
+ headLen=10,
+ headWidth=2.222,
+ pxMode=True,
+ )
+
+ aid: int = await actl.add_arrow(
+ **arrow_kwargs
+ )
+
+ # add duration label to RHS of arrow
+ if up_gap:
+ anchor = (0, 0)
+ # ^XXX? i dun get dese dims.. XD
+ elif down_gap:
+ anchor = (0, 1) # XXX y, x?
+ else: # no-gap?
+ assert flat
+ anchor = (0, 0) # up from bottom
+
+ # use a slightly smaller font for gap label txt.
+ font, small_font = get_fonts()
+ font_size: int = small_font.px_size - 1
+ assert isinstance(font_size, int)
+
+ if show_txt:
+ text_aid: int = await actl.add_text(
+ fqme=fqme,
+ timeframe=timeframe,
+ text=gap_label,
+ x=iend + 1, # fallback if timestamp lookup fails
+ y=cls,
+ time=gap_time, # server-side index lookup
+ color=color,
+ anchor=anchor,
+ font_size=font_size,
+ )
+ aids[text_aid] = {'text': gap_label}
+
+ # tell chart to redraw all its
+ # graphics view layers Bo
+ await actl.redraw(
+ fqme=fqme,
+ timeframe=timeframe,
+ )
+ return aids
diff --git a/piker/tsp/_dedupe_smart.py b/piker/tsp/_dedupe_smart.py
new file mode 100644
index 00000000..8c0ac55a
--- /dev/null
+++ b/piker/tsp/_dedupe_smart.py
@@ -0,0 +1,206 @@
+'''
+Smart OHLCV deduplication with data quality validation.
+
+Handles concurrent write conflicts by keeping the most complete bar
+(highest volume) while detecting data quality anomalies.
+
+'''
+import polars as pl
+
+from ._anal import with_dts
+
+
+def dedupe_ohlcv_smart(
+ src_df: pl.DataFrame,
+ time_col: str = 'time',
+ volume_col: str = 'volume',
+ sort: bool = True,
+
+) -> tuple[
+ pl.DataFrame, # with dts
+ pl.DataFrame, # deduped (keeping higher volume bars)
+ int, # count of dupes removed
+ pl.DataFrame|None, # valid race conditions
+ pl.DataFrame|None, # data quality violations
+]:
+ '''
+ Smart OHLCV deduplication keeping most complete bars.
+
+ For duplicate timestamps, keeps bar with highest volume under
+ the assumption that higher volume indicates more complete/final
+ data from backfill vs partial live updates.
+
+ Returns
+ -------
+ Tuple of:
+ - wdts: original dataframe with datetime columns added
+ - deduped: deduplicated frame keeping highest-volume bars
+ - diff: number of duplicate rows removed
+ - valid_races: duplicates meeting expected race condition pattern
+ (volume monotonic, OHLC ranges valid)
+ - data_quality_issues: duplicates violating expected relationships
+ indicating provider data problems
+
+ '''
+ wdts: pl.DataFrame = with_dts(src_df)
+
+ # Find duplicate timestamps
+ dupes: pl.DataFrame = wdts.filter(
+ pl.col(time_col).is_duplicated()
+ )
+
+ if dupes.is_empty():
+ # No duplicates, return as-is
+ return (wdts, wdts, 0, None, None)
+
+ # Analyze duplicate groups for validation
+ dupe_analysis: pl.DataFrame = (
+ dupes
+ .sort([time_col, 'index'])
+ .group_by(time_col, maintain_order=True)
+ .agg([
+ pl.col('index').alias('indices'),
+ pl.col('volume').alias('volumes'),
+ pl.col('high').alias('highs'),
+ pl.col('low').alias('lows'),
+ pl.col('open').alias('opens'),
+ pl.col('close').alias('closes'),
+ pl.col('dt').first().alias('dt'),
+ pl.len().alias('count'),
+ ])
+ )
+
+ # Validate OHLCV monotonicity for each duplicate group
+ def check_ohlcv_validity(row) -> dict[str, bool]:
+ '''
+ Check if duplicate bars follow expected race condition pattern.
+
+ For a valid live-update → backfill race:
+ - volume should be monotonically increasing
+ - high should be monotonically non-decreasing
+ - low should be monotonically non-increasing
+ - open should be identical (fixed at bar start)
+
+ Returns dict of violation flags.
+
+ '''
+ vols: list = row['volumes']
+ highs: list = row['highs']
+ lows: list = row['lows']
+ opens: list = row['opens']
+
+ violations: dict[str, bool] = {
+ 'volume_non_monotonic': False,
+ 'high_decreased': False,
+ 'low_increased': False,
+ 'open_mismatch': False,
+ 'identical_bars': False,
+ }
+
+ # Check if all bars are identical (pure duplicate)
+ if (
+ len(set(vols)) == 1
+ and len(set(highs)) == 1
+ and len(set(lows)) == 1
+ and len(set(opens)) == 1
+ ):
+ violations['identical_bars'] = True
+ return violations
+
+ # Check volume monotonicity
+ for i in range(1, len(vols)):
+ if vols[i] < vols[i-1]:
+ violations['volume_non_monotonic'] = True
+ break
+
+ # Check high monotonicity (can only increase or stay same)
+ for i in range(1, len(highs)):
+ if highs[i] < highs[i-1]:
+ violations['high_decreased'] = True
+ break
+
+ # Check low monotonicity (can only decrease or stay same)
+ for i in range(1, len(lows)):
+ if lows[i] > lows[i-1]:
+ violations['low_increased'] = True
+ break
+
+ # Check open consistency (should be fixed)
+ if len(set(opens)) > 1:
+ violations['open_mismatch'] = True
+
+ return violations
+
+ # Apply validation
+ dupe_analysis = dupe_analysis.with_columns([
+ pl.struct(['volumes', 'highs', 'lows', 'opens'])
+ .map_elements(
+ check_ohlcv_validity,
+ return_dtype=pl.Struct([
+ pl.Field('volume_non_monotonic', pl.Boolean),
+ pl.Field('high_decreased', pl.Boolean),
+ pl.Field('low_increased', pl.Boolean),
+ pl.Field('open_mismatch', pl.Boolean),
+ pl.Field('identical_bars', pl.Boolean),
+ ])
+ )
+ .alias('validity')
+ ])
+
+ # Unnest validity struct
+ dupe_analysis = dupe_analysis.unnest('validity')
+
+ # Separate valid races from data quality issues
+ valid_races: pl.DataFrame|None = (
+ dupe_analysis
+ .filter(
+ # Valid if no violations OR just identical bars
+ ~pl.col('volume_non_monotonic')
+ & ~pl.col('high_decreased')
+ & ~pl.col('low_increased')
+ & ~pl.col('open_mismatch')
+ )
+ )
+ if valid_races.is_empty():
+ valid_races = None
+
+ data_quality_issues: pl.DataFrame|None = (
+ dupe_analysis
+ .filter(
+ # Issues if any non-identical violation exists
+ (
+ pl.col('volume_non_monotonic')
+ | pl.col('high_decreased')
+ | pl.col('low_increased')
+ | pl.col('open_mismatch')
+ )
+ & ~pl.col('identical_bars')
+ )
+ )
+ if data_quality_issues.is_empty():
+ data_quality_issues = None
+
+ # Deduplicate: keep highest volume bar for each timestamp
+ deduped: pl.DataFrame = (
+ wdts
+ .sort([time_col, volume_col])
+ .unique(
+ subset=[time_col],
+ keep='last',
+ maintain_order=False,
+ )
+ )
+
+ # Re-sort by time or index
+ if sort:
+ deduped = deduped.sort(by=time_col)
+
+ diff: int = wdts.height - deduped.height
+
+ return (
+ wdts,
+ deduped,
+ diff,
+ valid_races,
+ data_quality_issues,
+ )
diff --git a/piker/tsp/_history.py b/piker/tsp/_history.py
new file mode 100644
index 00000000..a47e8e9d
--- /dev/null
+++ b/piker/tsp/_history.py
@@ -0,0 +1,1600 @@
+# piker: trading gear for hackers
+# Copyright (C) Tyler Goodlet (in stewardship for pikers)
+
+# This program is free software: you can redistribute it and/or
+# modify it under the terms of the GNU Affero General Public
+# License as published by the Free Software Foundation, either
+# version 3 of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Affero General Public License for more details.
+
+# You should have received a copy of the GNU Affero General Public
+# License along with this program. If not, see
+# .
+
+'''
+Historical TSP (time-series processing) lowlevel mgmt machinery and biz logic for,
+
+- hi-level biz logics using the `.storage` subpkg APIs for (I/O)
+ orchestration and mgmt of tsdb data sets.
+- core data-provider history backfilling middleware (as task-funcs) via
+ (what will eventually be `datad`, but are rn is the) `.brokers` backend
+ APIs.
+- various data set cleaning, repairing and issue-detection/analysis
+ routines to ensure consistent series whether in shm or when
+ stored offline (in a tsdb).
+
+'''
+from __future__ import annotations
+from datetime import datetime
+from functools import partial
+from pathlib import Path
+from pprint import pformat
+from types import ModuleType
+from typing import (
+ Callable,
+ Generator,
+ TYPE_CHECKING,
+)
+
+import trio
+from trio_typing import TaskStatus
+import tractor
+from pendulum import (
+ Interval,
+ DateTime,
+ Duration,
+ duration as mk_duration,
+ from_timestamp,
+ timezone,
+)
+import numpy as np
+import polars as pl
+
+from piker.brokers import NoData
+from piker.accounting import (
+ MktPair,
+)
+from piker.log import get_logger
+from ..data._sharedmem import (
+ maybe_open_shm_array,
+ ShmArray,
+)
+from ..data._source import def_iohlcv_fields
+from ..data._sampling import (
+ open_sample_stream,
+)
+
+
+from piker.brokers._util import (
+ DataUnavailable,
+)
+from piker.storage import TimeseriesNotFound
+from ._anal import (
+ dedupe,
+ get_null_segs,
+ iter_null_segs,
+ Frame,
+
+ # codec-ish
+ np2pl as np2pl,
+
+ # `polars` specific
+ # with_dts,
+ # sort_diff,
+
+ # TODO, use this to correct conc-issues during backfill?
+ # detect_price_gaps,
+)
+
+if TYPE_CHECKING:
+ from bidict import bidict
+ from ..service.marketstore import StorageClient
+ # from .feed import _FeedsBus
+
+
+log = get_logger(__name__)
+
+
+# `ShmArray` buffer sizing configuration:
+_mins_in_day = int(60 * 24)
+# how much is probably dependent on lifestyle
+# but we reco a buncha times (but only on a
+# run-every-other-day kinda week).
+_secs_in_day = int(60 * _mins_in_day)
+_days_in_week: int = 7
+
+_days_worth: int = 3
+_default_hist_size: int = 6 * 365 * _mins_in_day
+_hist_buffer_start = int(
+ _default_hist_size - round(7 * _mins_in_day)
+)
+
+_default_rt_size: int = _days_worth * _secs_in_day
+# NOTE: start the append index in rt buffer such that 1 day's worth
+# can be appenened before overrun.
+_rt_buffer_start = int((_days_worth - 1) * _secs_in_day)
+
+
+def diff_history(
+ array: np.ndarray,
+ append_until_dt: datetime|None = None,
+ prepend_until_dt: datetime|None = None,
+
+) -> np.ndarray:
+
+ # no diffing with tsdb dt index possible..
+ if (
+ prepend_until_dt is None
+ and
+ append_until_dt is None
+ ):
+ return array
+
+ times = array['time']
+
+ if append_until_dt:
+ return array[times < append_until_dt.timestamp()]
+ else:
+ return array[times >= prepend_until_dt.timestamp()]
+
+
+async def shm_push_in_between(
+ shm: ShmArray,
+ to_push: np.ndarray,
+ prepend_index: int,
+ backfill_until_dt: datetime,
+
+ update_start_on_prepend: bool = False,
+
+) -> int:
+
+ # XXX, try to catch bad inserts by peeking at the first/last
+ # times and ensure we don't violate order.
+ f_times: np.ndarray = to_push['time']
+ f_start: float = f_times[0]
+ f_start_dt = from_timestamp(f_start)
+ if (
+ f_start_dt < backfill_until_dt
+ ):
+ await tractor.pause()
+
+ # XXX: extremely important, there can be no checkpoints
+ # in the body of this func to avoid entering new ``frames``
+ # values while we're pipelining the current ones to
+ # memory...
+ shm.push(
+ to_push,
+ prepend=True,
+
+ # XXX: only update the ._first index if no tsdb
+ # segment was previously prepended by the
+ # parent task.
+ update_first=update_start_on_prepend,
+
+ # XXX: only prepend from a manually calculated shm
+ # index if there was already a tsdb history
+ # segment prepended (since then the
+ # ._first.value is going to be wayyy in the
+ # past!)
+ start=(
+ prepend_index
+ if not update_start_on_prepend
+ else None
+ ),
+ )
+
+
+async def maybe_fill_null_segments(
+ shm: ShmArray,
+ timeframe: float,
+ get_hist: Callable,
+ sampler_stream: tractor.MsgStream,
+ mkt: MktPair,
+ backfill_until_dt: datetime,
+
+ task_status: TaskStatus[trio.Event] = trio.TASK_STATUS_IGNORED,
+
+) -> list[Frame]:
+
+ null_segs_detected = trio.Event()
+ task_status.started(null_segs_detected)
+
+ frame: Frame = shm.array
+
+ # TODO, put in parent task/daemon root!
+ import greenback
+ await greenback.ensure_portal()
+
+ null_segs: tuple|None = get_null_segs(
+ frame,
+ period=timeframe,
+ )
+ for (
+ absi_start, absi_end,
+ fi_start, fi_end,
+ start_t, end_t,
+ start_dt, end_dt,
+ ) in iter_null_segs(
+ null_segs=null_segs,
+ frame=frame,
+ timeframe=timeframe,
+ ):
+
+ # XXX NOTE: ?if we get a badly ordered timestamp
+ # pair, immediately stop backfilling?
+ if (
+ start_dt
+ and
+ end_dt < start_dt
+ ):
+ await tractor.pause()
+ break
+
+ (
+ array,
+ next_start_dt,
+ next_end_dt,
+ ) = await get_hist(
+ timeframe,
+ start_dt=start_dt,
+ end_dt=end_dt,
+ )
+
+ if (
+ frame_start_dt := (
+ from_timestamp(array['time'][0])
+ ) < backfill_until_dt
+ ):
+ log.error(
+ f'Invalid frame_start !?\n'
+ f'frame_start_dt: {frame_start_dt!r}\n'
+ f'backfill_until_dt: {backfill_until_dt!r}\n'
+ )
+ await tractor.pause()
+
+ # XXX TODO: pretty sure if i plot tsla, btcusdt.binance
+ # and mnq.cme.ib this causes a Qt crash XXDDD
+
+ # make sure we don't overrun the buffer start
+ len_to_push: int = min(absi_end, array.size)
+ to_push: np.ndarray = array[-len_to_push:]
+
+ await shm_push_in_between(
+ shm,
+ to_push,
+ prepend_index=absi_end,
+ backfill_until_dt=backfill_until_dt,
+ update_start_on_prepend=False,
+ )
+ # TODO: UI side needs IPC event to update..
+ # - make sure the UI actually always handles
+ # this update!
+ # - remember that in the display side, only refersh this
+ # if the respective history is actually "in view".
+ # loop
+ try:
+ await sampler_stream.send({
+ 'broadcast_all': {
+
+ # XXX NOTE XXX: see the
+ # `.ui._display.increment_history_view()` if block
+ # that looks for this info to FORCE a hard viz
+ # redraw!
+ 'backfilling': (mkt.fqme, timeframe),
+ },
+ })
+ except tractor.ContextCancelled:
+ # log.exception
+ await tractor.pause()
+ raise
+
+ null_segs_detected.set()
+ # RECHECK for more null-gaps
+ frame: Frame = shm.array
+ null_segs: tuple | None = get_null_segs(
+ frame,
+ period=timeframe,
+ )
+ if (
+ null_segs
+ and
+ len(null_segs[-1])
+ ):
+ (
+ iabs_slices,
+ iabs_zero_rows,
+ zero_t,
+ ) = null_segs
+ log.warning(
+ f'{len(iabs_slices)} NULL TIME SEGMENTS DETECTED!\n'
+ f'{pformat(iabs_slices)}'
+ )
+
+ # TODO: always backfill gaps with the earliest (price) datum's
+ # value to avoid the y-ranger including zeros and completely
+ # stretching the y-axis..
+ # array: np.ndarray = shm.array
+ # zeros = array[array['low'] == 0]
+ ohlc_fields: list[str] = [
+ 'open',
+ 'high',
+ 'low',
+ 'close',
+ ]
+
+ for istart, istop in iabs_slices:
+
+ # get view into buffer for null-segment
+ gap: np.ndarray = shm._array[istart:istop]
+
+ # copy the oldest OHLC samples forward
+ cls: float = shm._array[istart]['close']
+
+ # TODO: how can we mark this range as being a gap tho?
+ # -[ ] maybe pg finally supports nulls in ndarray to
+ # show empty space somehow?
+ # -[ ] we could put a special value in the vlm or
+ # another col/field to denote?
+ gap[ohlc_fields] = cls
+
+ start_t: float = shm._array[istart]['time']
+ t_diff: float = (istop - istart)*timeframe
+
+ gap['time'] = np.arange(
+ start=start_t,
+ stop=start_t + t_diff,
+ step=timeframe,
+ )
+
+ # TODO: reimpl using the new `.ui._remote_ctl` ctx
+ # ideally using some kinda decent
+ # tractory-reverse-lookup-connnection from some other
+ # `Context` type thingy?
+ await sampler_stream.send({
+ 'broadcast_all': {
+
+ # XXX NOTE XXX: see the
+ # `.ui._display.increment_history_view()` if block
+ # that looks for this info to FORCE a hard viz
+ # redraw!
+ 'backfilling': (mkt.fqme, timeframe),
+ },
+ })
+
+ # TODO: interatively step through any remaining
+ # time-gaps/null-segments and spawn piecewise backfiller
+ # tasks in a nursery?
+ # -[ ] not sure that's going to work so well on the ib
+ # backend but worth a shot?
+ # -[ ] mk new history connections to make it properly
+ # parallel possible no matter the backend?
+ # -[ ] fill algo: do queries in alternating "latest, then
+ # earliest, then latest.. etc?"
+
+
+async def start_backfill(
+ get_hist,
+ def_frame_duration: Duration,
+ mod: ModuleType,
+ mkt: MktPair,
+ shm: ShmArray,
+ timeframe: float,
+ backfill_from_shm_index: int,
+ backfill_from_dt: datetime,
+ sampler_stream: tractor.MsgStream,
+
+ backfill_until_dt: datetime|None = None,
+ storage: StorageClient|None = None,
+ write_tsdb: bool = True,
+
+ task_status: TaskStatus[tuple] = trio.TASK_STATUS_IGNORED,
+
+) -> int:
+
+ # let caller unblock and deliver latest history frame
+ # and use to signal that backfilling the shm gap until
+ # the tsdb end is complete!
+ bf_done = trio.Event()
+ task_status.started(bf_done)
+
+ # based on the sample step size, maybe load a certain amount history
+ update_start_on_prepend: bool = False
+ if (
+ _until_was_none := (backfill_until_dt is None)
+ ):
+
+ # TODO: per-provider default history-durations?
+ # -[ ] inside the `open_history_client()` config allow
+ # declaring the history duration limits instead of
+ # guessing and/or applying the same limits to all?
+ #
+ # -[ ] allow declaring (default) per-provider backfill
+ # limits inside a [storage] sub-section in conf.toml?
+ #
+ # NOTE, when no tsdb "last datum" is provided, we just
+ # load some near-term history by presuming a "decently
+ # large" 60s duration limit and a much shorter 1s range.
+ periods = {
+ 1: {'days': 2},
+ 60: {'years': 6},
+ }
+ period_duration: int = periods[timeframe]
+ update_start_on_prepend: bool = True
+
+ # NOTE: manually set the "latest" datetime which we intend to
+ # backfill history "until" so as to adhere to the history
+ # settings above when the tsdb is detected as being empty.
+ backfill_until_dt = backfill_from_dt.subtract(**period_duration)
+
+ # STAGE NOTE: "backward history gap filling":
+ # - we push to the shm buffer until we have history back
+ # until the latest entry loaded from the tsdb's table B)
+ # - after this loop continue to check for other gaps in the
+ # (tsdb) history and (at least report) maybe fill them
+ # from new frame queries to the backend?
+ last_start_dt: datetime = backfill_from_dt
+ next_prepend_index: int = backfill_from_shm_index
+
+ est = timezone('EST')
+
+ while last_start_dt > backfill_until_dt:
+ log.info(
+ f'Requesting {timeframe}s frame:\n'
+ f'backfill_until_dt: {backfill_until_dt}\n'
+ f'last_start_dt: {last_start_dt}\n'
+ )
+ try:
+ (
+ array,
+ next_start_dt,
+ next_end_dt,
+ ) = await get_hist(
+ timeframe,
+ end_dt=(end_dt_param := last_start_dt),
+ )
+ except NoData as nodata:
+ _nodata = nodata
+ orig_last_start_dt: datetime = last_start_dt
+ gap_report: str = (
+ f'EMPTY FRAME for `end_dt: {last_start_dt}`?\n'
+ f'{mod.name} -> tf@fqme: {timeframe}@{mkt.fqme}\n'
+ f'last_start_dt: {orig_last_start_dt}\n\n'
+ f'bf_until: {backfill_until_dt}\n'
+ )
+ # EMPTY FRAME signal with 3 (likely) causes:
+ #
+ # 1. range contains legit gap in venue history
+ # 2. history actually (edge case) **began** at the
+ # value `last_start_dt`
+ # 3. some other unknown error (ib blocking the
+ # history-query bc they don't want you seeing how
+ # they cucked all the tinas.. like with options
+ # hist)
+ #
+ if def_frame_duration:
+ # decrement by a duration's (frame) worth of time
+ # as maybe indicated by the backend to see if we
+ # can get older data before this possible
+ # "history gap".
+ last_start_dt: datetime = last_start_dt.subtract(
+ seconds=def_frame_duration.total_seconds()
+ )
+ gap_report += (
+ f'Decrementing `end_dt` and retrying with,\n'
+ f'def_frame_duration: {def_frame_duration}\n'
+ f'(new) last_start_dt: {last_start_dt}\n'
+ )
+ log.warning(gap_report)
+ # skip writing to shm/tsdb and try the next
+ # duration's worth of prior history.
+ continue
+
+ else:
+ # await tractor.pause()
+ raise DataUnavailable(gap_report)
+
+ # broker says there never was or is no more history to pull
+ except DataUnavailable as due:
+ message: str = due.args[0]
+ log.warning(
+ f'Provider {mod.name!r} halted backfill due to,\n\n'
+
+ f'{message}\n'
+
+ f'fqme: {mkt.fqme}\n'
+ f'timeframe: {timeframe}\n'
+ f'last_start_dt: {last_start_dt}\n'
+ f'bf_until: {backfill_until_dt}\n'
+ )
+ # UGH: what's a better way?
+ # TODO: backends are responsible for being correct on
+ # this right!?
+ # -[ ] in the `ib` case we could maybe offer some way
+ # to halt the request loop until the condition is
+ # resolved or should the backend be entirely in
+ # charge of solving such faults? yes, right?
+ return
+
+ time: np.ndarray = array['time']
+ assert (
+ time[0]
+ ==
+ next_start_dt.timestamp()
+ )
+ assert (
+ (last_time := time[-1])
+ ==
+ next_end_dt.timestamp()
+ )
+
+ frame_last_dt = from_timestamp(last_time)
+ if (
+ frame_last_dt.add(seconds=timeframe)
+ <
+ end_dt_param
+ ):
+ est_frame_last_dt = est.convert(frame_last_dt)
+ est_end_dt_param = est.convert(end_dt_param)
+ log.warning(
+ f'Provider frame ending BEFORE requested end_dt={end_dt_param} ??\n'
+ f'frame_last_dt (EST): {est_frame_last_dt!r}\n'
+ f'end_dt_param (EST): {est_end_dt_param!r}\n'
+ f'\n'
+ f'Likely contains,\n'
+ f'- a venue closure.\n'
+ f'- (maybe?) missing data ?\n'
+ )
+ # ?TODO, check against venue closure hours
+ # if/when provided by backend?
+ await tractor.pause()
+
+ expected_dur: Interval = (
+ last_start_dt.subtract(
+ seconds=timeframe
+ # ^XXX, always "up to" the bar *before*
+ )
+ -
+ next_start_dt
+ )
+
+ # frame's worth of sample-period-steps, in seconds
+ frame_size_s: float = len(array) * timeframe
+ recv_frame_dur: Duration = (
+ from_timestamp(array[-1]['time'])
+ -
+ from_timestamp(array[0]['time'])
+ )
+ if (
+ (lt_frame := (recv_frame_dur < expected_dur))
+ or
+ (null_frame := (frame_size_s == 0))
+ # ^XXX, should NEVER hit now!
+ ):
+ # XXX: query result includes a start point prior to our
+ # expected "frame size" and thus is likely some kind of
+ # history gap (eg. market closed period, outage, etc.)
+ # so just report it to console for now.
+ if lt_frame:
+ reason = 'Possible GAP (or first-datum)'
+ else:
+ assert null_frame
+ reason = 'NULL-FRAME'
+
+ missing_dur: Interval = expected_dur.end - recv_frame_dur.end
+ log.warning(
+ f'{timeframe}s-series {reason} detected!\n'
+ f'fqme: {mkt.fqme}\n'
+ f'last_start_dt: {last_start_dt}\n\n'
+ f'recv interval: {recv_frame_dur}\n'
+ f'expected interval: {expected_dur}\n\n'
+
+ f'Missing duration of history of {missing_dur.in_words()!r}\n'
+ f'{missing_dur}\n'
+ )
+ # await tractor.pause()
+
+ to_push = diff_history(
+ array,
+ prepend_until_dt=backfill_until_dt,
+ )
+ ln: int = len(to_push)
+ if ln:
+ log.info(
+ f'{ln} bars for {next_start_dt} -> {last_start_dt}'
+ )
+
+ else:
+ log.warning(
+ '0 BARS TO PUSH after diff!?\n'
+ f'{next_start_dt} -> {last_start_dt}'
+ )
+ await tractor.pause()
+
+ # Check if we're about to exceed buffer capacity BEFORE
+ # attempting the push
+ if (next_prepend_index - ln) < 0:
+ log.warning(
+ f'Backfill would exceed buffer capacity!\n'
+ f'next_prepend_index: {next_prepend_index}\n'
+ f'frame size: {ln}\n'
+ f'Truncating to fit remaining space..\n'
+ )
+ # only push what fits
+ to_push = to_push[-(next_prepend_index):]
+ ln = len(to_push)
+
+ if ln == 0:
+ log.warning(
+ 'No space left in buffer, stopping backfill!'
+ )
+ break
+
+ # bail gracefully on shm allocation overrun/full
+ # condition
+ try:
+ await shm_push_in_between(
+ shm,
+ to_push,
+ prepend_index=next_prepend_index,
+ backfill_until_dt=backfill_until_dt,
+ update_start_on_prepend=update_start_on_prepend,
+ )
+ await sampler_stream.send({
+ 'broadcast_all': {
+ 'backfilling': (mkt.fqme, timeframe),
+ },
+ })
+
+ # decrement next prepend point
+ next_prepend_index = next_prepend_index - ln
+ last_start_dt = next_start_dt
+
+ # Stop if we've hit buffer start
+ if next_prepend_index <= 0:
+ log.warning(
+ f'Reached buffer start (index={next_prepend_index}), '
+ f'stopping backfill'
+ )
+ break
+
+ except ValueError as ve:
+ _ve = ve
+ log.error(
+ f'Shm prepend OVERRUN on: {next_start_dt} -> {last_start_dt}?'
+ )
+
+ if next_prepend_index < ln:
+ log.warning(
+ f'Shm buffer can only hold {next_prepend_index} more rows..\n'
+ f'Appending those from recent {ln}-sized frame, no more!'
+ )
+
+ to_push = to_push[-next_prepend_index + 1:]
+ await shm_push_in_between(
+ shm,
+ to_push,
+ prepend_index=next_prepend_index,
+ backfill_until_dt=backfill_until_dt,
+ update_start_on_prepend=update_start_on_prepend,
+ )
+ await sampler_stream.send({
+ 'broadcast_all': {
+ 'backfilling': (mkt.fqme, timeframe),
+ },
+ })
+
+ # XXX, can't push the entire frame? so
+ # push only the amount that can fit..
+ break
+
+ log.info(
+ f'Shm pushed {ln} frame:\n'
+ f'{next_start_dt} -> {last_start_dt}'
+ )
+
+ # FINALLY, maybe write immediately to the tsdb backend for
+ # long-term storage.
+ if (
+ storage is not None
+ and
+ write_tsdb
+ ):
+ log.info(
+ f'Writing {ln} frame to storage:\n'
+ f'{next_start_dt} -> {last_start_dt}'
+ )
+
+ # NOTE, always drop the src asset token for
+ # non-currency-pair like market types (for now)
+ #
+ # THAT IS, for now our table key schema is NOT
+ # including the dst[/src] source asset token. SO,
+ # 'tsla.nasdaq.ib' over 'tsla/usd.nasdaq.ib' for
+ # historical reasons ONLY.
+ if mkt.dst.atype not in {
+ 'crypto',
+ 'crypto_currency',
+ 'fiat', # a "forex pair"
+ 'perpetual_future', # stupid "perps" from cex land
+ }:
+ col_sym_key: str = mkt.get_fqme(
+ delim_char='',
+ without_src=True,
+ )
+ else:
+ col_sym_key: str = mkt.get_fqme(
+ delim_char='',
+ )
+
+ await storage.write_ohlcv(
+ col_sym_key,
+ shm.array,
+ timeframe,
+ )
+ df: pl.DataFrame = await storage.as_df(
+ fqme=mkt.fqme,
+ period=timeframe,
+ load_from_offline=False,
+ )
+ (
+ wdts,
+ deduped,
+ diff,
+ ) = dedupe(df)
+ if diff:
+ log.warning(
+ f'Found {diff!r} duplicates in tsdb! '
+ f'=> Overwriting with `deduped` data !! <=\n'
+ )
+ await storage.write_ohlcv(
+ col_sym_key,
+ deduped,
+ timeframe,
+ )
+
+ else:
+ # finally filled gap
+ log.info(
+ f'Finished filling gap to tsdb start @ {backfill_until_dt}!'
+ )
+
+ # XXX: extremely important, there can be no checkpoints
+ # in the block above to avoid entering new ``frames``
+ # values while we're pipelining the current ones to
+ # memory...
+ # await sampler_stream.send('broadcast_all')
+
+ # short-circuit (for now)
+ bf_done.set()
+
+
+# NOTE: originally this was used to cope with a tsdb (marketstore)
+# which could not delivery very large frames of history over gRPC
+# (thanks goolag) due to corruption issues.
+#
+# NOW, using apache parquet (by default in the local filesys) we
+# don't have this requirement since the files can be loaded very
+# quickly in entirety to memory via `polars.read_parquet()`.
+#
+async def back_load_from_tsdb(
+ storemod: ModuleType,
+ storage: StorageClient,
+
+ fqme: str,
+
+ tsdb_history: np.ndarray,
+
+ last_tsdb_dt: datetime,
+ latest_start_dt: datetime,
+ latest_end_dt: datetime,
+
+ bf_done: trio.Event,
+
+ timeframe: int,
+ shm: ShmArray,
+):
+ assert len(tsdb_history)
+
+ # sync to backend history task's query/load completion
+ # if bf_done:
+ # await bf_done.wait()
+
+ # TODO: eventually it'd be nice to not require a shm array/buffer
+ # to accomplish this.. maybe we can do some kind of tsdb direct to
+ # graphics format eventually in a child-actor?
+ if storemod.name == 'nativedb':
+ return
+
+ await tractor.pause()
+ assert shm._first.value == 0
+
+ array = shm.array
+
+ # if timeframe == 1:
+ # times = shm.array['time']
+ # assert (times[1] - times[0]) == 1
+
+ if len(array):
+ shm_last_dt = from_timestamp(
+ shm.array[0]['time']
+ )
+ else:
+ shm_last_dt = None
+
+ if last_tsdb_dt:
+ assert shm_last_dt >= last_tsdb_dt
+
+ # do diff against start index of last frame of history and only
+ # fill in an amount of datums from tsdb allows for most recent
+ # to be loaded into mem *before* tsdb data.
+ if (
+ last_tsdb_dt
+ and latest_start_dt
+ ):
+ backfilled_size_s: Duration = (
+ latest_start_dt - last_tsdb_dt
+ ).seconds
+ # if the shm buffer len is not large enough to contain
+ # all missing data between the most recent backend-queried frame
+ # and the most recent dt-index in the db we warn that we only
+ # want to load a portion of the next tsdb query to fill that
+ # space.
+ log.info(
+ f'{backfilled_size_s} seconds worth of {timeframe}s loaded'
+ )
+
+ # Load TSDB history into shm buffer (for display) if there is
+ # remaining buffer space.
+
+ time_key: str = 'time'
+ if getattr(storemod, 'ohlc_key_map', False):
+ keymap: bidict = storemod.ohlc_key_map
+ time_key: str = keymap.inverse['time']
+
+ # if (
+ # not len(tsdb_history)
+ # ):
+ # return
+
+ tsdb_last_frame_start: datetime = last_tsdb_dt
+ # load as much from storage into shm possible (depends on
+ # user's shm size settings).
+ while shm._first.value > 0:
+
+ tsdb_history = await storage.read_ohlcv(
+ fqme,
+ timeframe=timeframe,
+ end=tsdb_last_frame_start,
+ )
+
+ # # empty query
+ # if not len(tsdb_history):
+ # break
+
+ next_start = tsdb_history[time_key][0]
+ if next_start >= tsdb_last_frame_start:
+ # no earlier data detected
+ break
+
+ else:
+ tsdb_last_frame_start = next_start
+
+ # TODO: see if there's faster multi-field reads:
+ # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
+ # re-index with a `time` and index field
+ prepend_start = shm._first.value
+
+ to_push = tsdb_history[-prepend_start:]
+ shm.push(
+ to_push,
+
+ # insert the history pre a "days worth" of samples
+ # to leave some real-time buffer space at the end.
+ prepend=True,
+ # update_first=False,
+ # start=prepend_start,
+ field_map=storemod.ohlc_key_map,
+ )
+
+ log.info(f'Loaded {to_push.shape} datums from storage')
+ tsdb_last_frame_start = tsdb_history[time_key][0]
+
+ # manually trigger step update to update charts/fsps
+ # which need an incremental update.
+ # NOTE: the way this works is super duper
+ # un-intuitive right now:
+ # - the broadcaster fires a msg to the fsp subsystem.
+ # - fsp subsys then checks for a sample step diff and
+ # possibly recomputes prepended history.
+ # - the fsp then sends back to the parent actor
+ # (usually a chart showing graphics for said fsp)
+ # which tells the chart to conduct a manual full
+ # graphics loop cycle.
+ # await sampler_stream.send('broadcast_all')
+
+
+async def push_latest_frame(
+ # box-type only that should get packed with the datetime
+ # objects received for the latest history frame
+ dt_eps: list[DateTime, DateTime],
+ shm: ShmArray,
+ get_hist: Callable[
+ [int, datetime, datetime],
+ tuple[np.ndarray, str]
+ ],
+ timeframe: float,
+ config: dict,
+
+ task_status: TaskStatus[
+ Exception | list[datetime, datetime]
+ ] = trio.TASK_STATUS_IGNORED,
+
+) -> list[datetime, datetime] | None:
+ # get latest query's worth of history all the way
+ # back to what is recorded in the tsdb
+ try:
+ (
+ array,
+ mr_start_dt,
+ mr_end_dt,
+ ) = await get_hist(
+ timeframe,
+ end_dt=None,
+ )
+ # so caller can access these ep values
+ dt_eps.extend([
+ mr_start_dt,
+ mr_end_dt,
+ ])
+ task_status.started(dt_eps)
+
+ # XXX: timeframe not supported for backend (since
+ # above exception type), terminate immediately since
+ # there's no backfilling possible.
+ except DataUnavailable:
+ task_status.started(None)
+
+ if timeframe > 1:
+ await tractor.pause()
+
+ # prolly tf not supported
+ return None
+
+ # NOTE: on the first history, most recent history
+ # frame we PREPEND from the current shm ._last index
+ # and thus a gap between the earliest datum loaded here
+ # and the latest loaded from the tsdb may exist!
+ log.info(f'Pushing {array.size} to shm!')
+ shm.push(
+ array,
+ prepend=True, # append on first frame
+ )
+
+ return dt_eps
+
+
+async def load_tsdb_hist(
+ storage: StorageClient,
+ mkt: MktPair,
+ timeframe: float,
+
+ task_status: TaskStatus[None] = trio.TASK_STATUS_IGNORED,
+
+) -> tuple[
+ np.ndarray,
+ DateTime,
+ DateTime,
+]|None:
+ # loads a (large) frame of data from the tsdb depending
+ # on the db's query size limit; our "nativedb" (using
+ # parquet) generally can load the entire history into mem
+ # but if not then below the remaining history can be lazy
+ # loaded?
+ fqme: str = mkt.fqme
+ tsdb_entry: tuple[
+ np.ndarray,
+ DateTime,
+ DateTime,
+ ]
+ try:
+ tsdb_entry: tuple|None = await storage.load(
+ fqme,
+ timeframe=timeframe,
+ )
+ return tsdb_entry
+
+ except TimeseriesNotFound:
+ log.warning(
+ f'No timeseries yet for {timeframe}@{fqme}'
+ )
+ return None
+
+
+async def tsdb_backfill(
+ mod: ModuleType,
+ storemod: ModuleType,
+
+ storage: StorageClient,
+ mkt: MktPair,
+ shm: ShmArray,
+ timeframe: float,
+
+ sampler_stream: tractor.MsgStream,
+
+ task_status: TaskStatus[
+ tuple[ShmArray, ShmArray]
+ ] = trio.TASK_STATUS_IGNORED,
+
+) -> None:
+
+ if timeframe not in (1, 60):
+ raise ValueError(
+ '`piker` only needs to support 1m and 1s sampling '
+ 'but ur api is trying to deliver a longer '
+ f'timeframe of {timeframe} seconds..\n'
+ 'So yuh.. dun do dat brudder.'
+ )
+
+ get_hist: Callable[
+ [int, datetime, datetime],
+ tuple[np.ndarray, str]
+ ]
+ config: dict[str, int]
+ async with (
+ mod.open_history_client(
+ mkt,
+ ) as (get_hist, config),
+
+ # NOTE: this sub-nursery splits to tasks for the given
+ # sampling rate to concurrently load offline tsdb
+ # timeseries as well as new data from the venue backend!
+ ):
+ log.info(
+ f'`{mod}` history client returned backfill config:\n'
+ f'{pformat(config)}\n'
+ )
+
+ # concurrently load the provider's most-recent-frame AND any
+ # pre-existing tsdb history already saved in `piker` storage.
+ dt_eps: list[DateTime, DateTime] = []
+ async with (
+ tractor.trionics.collapse_eg(),
+ trio.open_nursery() as tn
+ ):
+ tn.start_soon(
+ push_latest_frame,
+ dt_eps,
+ shm,
+ get_hist,
+ timeframe,
+ config,
+ )
+ tsdb_entry: tuple = await load_tsdb_hist(
+ storage,
+ mkt,
+ timeframe,
+ )
+
+ # tell parent task to continue
+ # TODO: really we'd want this the other way with the
+ # tsdb load happening asap and the since the latest
+ # frame query will normally be the main source of
+ # latency?
+ task_status.started()
+
+ # NOTE: iabs to start backfilling from, reverse chronological,
+ # ONLY AFTER the first history frame has been pushed to
+ # mem!
+ backfill_gap_from_shm_index: int = shm._first.value + 1
+
+ # Prepend any tsdb history into the rt-shm-buffer which
+ # should NOW be getting filled with the most recent history
+ # pulled from the data-backend.
+ if dt_eps:
+ # well then, unpack the latest (gap) backfilled frame dts
+ (
+ mr_start_dt,
+ mr_end_dt,
+ ) = dt_eps
+
+ first_frame_dur_s: Duration = (mr_end_dt - mr_start_dt).seconds
+ calced_frame_size: Duration = mk_duration(
+ seconds=first_frame_dur_s,
+ )
+ # NOTE, attempt to use the backend declared default frame
+ # sizing (as allowed by their time-series query APIs) and
+ # if not provided try to construct a default from the
+ # first frame received above.
+ def_frame_durs: dict[
+ int,
+ Duration,
+ ]|None = config.get('frame_types', None)
+
+ if def_frame_durs:
+ def_frame_size: Duration = def_frame_durs[timeframe]
+
+ if def_frame_size != calced_frame_size:
+ log.warning(
+ f'Expected frame size {def_frame_size}\n'
+ f'Rxed frame {calced_frame_size}\n'
+ )
+ # await tractor.pause()
+ else:
+ # use what we calced from first frame above.
+ def_frame_size = calced_frame_size
+
+ # NOTE: when there's no offline data, there's 2 cases:
+ # - data backend doesn't support timeframe/sample
+ # period (in which case `dt_eps` should be `None` and
+ # we shouldn't be here!), or
+ # - no prior history has been stored (yet) and we need
+ # todo full backfill of the history now.
+ if tsdb_entry is None:
+ # indicate to backfill task to fill the whole
+ # shm buffer as much as it can!
+ last_tsdb_dt = None
+
+ # there's existing tsdb history from (offline) storage
+ # so only backfill the gap between the
+ # most-recent-frame (mrf) and that latest sample.
+ else:
+ (
+ tsdb_history,
+ first_tsdb_dt,
+ last_tsdb_dt,
+ ) = tsdb_entry
+
+ # await tractor.pause()
+
+ # if there is a gap to backfill from the first
+ # history frame until the last datum loaded from the tsdb
+ # continue that now in the background
+ async with (
+ tractor.trionics.collapse_eg(),
+ trio.open_nursery() as tn,
+ ):
+
+ bf_done: trio.Event = await tn.start(
+ partial(
+ start_backfill,
+ get_hist=get_hist,
+ def_frame_duration=def_frame_size,
+ mod=mod,
+ mkt=mkt,
+ shm=shm,
+ timeframe=timeframe,
+
+ backfill_from_shm_index=backfill_gap_from_shm_index,
+ backfill_from_dt=mr_start_dt,
+
+ sampler_stream=sampler_stream,
+ backfill_until_dt=last_tsdb_dt,
+
+ storage=storage,
+ write_tsdb=True,
+ )
+ )
+ nulls_detected: trio.Event|None = None
+
+ if last_tsdb_dt is not None:
+
+ # calc the index from which the tsdb data should be
+ # prepended, presuming there is a gap between the
+ # latest frame (loaded/read above) and the latest
+ # sample loaded from the tsdb.
+ backfill_diff: Duration = mr_start_dt - last_tsdb_dt
+ offset_s: float = backfill_diff.in_seconds()
+
+ # XXX EDGE CASEs: the most recent frame overlaps with
+ # prior tsdb history!!
+ # - so the latest frame's start time is earlier then
+ # the tsdb's latest sample.
+ # - alternatively this may also more generally occur
+ # when the venue was closed (say over the weeknd)
+ # causing a timeseries gap, AND the query frames size
+ # (eg. for ib's 1s we rx 2k datums ~= 33.33m) IS
+ # GREATER THAN the current venue-market's operating
+ # session (time) we will receive datums from BEFORE THE
+ # CLOSURE GAP and thus the `offset_s` value will be
+ # NEGATIVE! In this case we need to ensure we don't try
+ # to push datums that have already been recorded in the
+ # tsdb. In this case we instead only retreive and push
+ # the series portion missing from the db's data set.
+ # if offset_s < 0:
+ # non_overlap_diff: Duration = mr_end_dt - last_tsdb_dt
+ # non_overlap_offset_s: float = backfill_diff.in_seconds()
+
+ offset_samples: int = round(offset_s / timeframe)
+
+ # TODO: see if there's faster multi-field reads:
+ # https://numpy.org/doc/stable/user/basics.rec.html#accessing-multiple-fields
+ # re-index with a `time` and index field
+ if offset_s > 0:
+ # NOTE XXX: ONLY when there is an actual gap
+ # between the earliest sample in the latest history
+ # frame do we want to NOT stick the latest tsdb
+ # history adjacent to that latest frame!
+ prepend_start = shm._first.value - offset_samples + 1
+ to_push = tsdb_history[-prepend_start:]
+ else:
+ # when there is overlap we want to remove the
+ # overlapping samples from the tsdb portion (taking
+ # instead the latest frame's values since THEY
+ # SHOULD BE THE SAME) and prepend DIRECTLY adjacent
+ # to the latest frame!
+ # TODO: assert the overlap segment array contains
+ # the same values!?!
+ prepend_start = shm._first.value
+ to_push = tsdb_history[-(shm._first.value):offset_samples - 1]
+
+ # tsdb history is so far in the past we can't fit it in
+ # shm buffer space so simply don't load it!
+ if prepend_start > 0:
+ shm.push(
+ to_push,
+
+ # insert the history pre a "days worth" of samples
+ # to leave some real-time buffer space at the end.
+ prepend=True,
+ # update_first=False,
+ start=prepend_start,
+ field_map=storemod.ohlc_key_map,
+ )
+
+ log.info(f'Loaded {to_push.shape} datums from storage')
+
+ # NOTE: ASYNC-conduct tsdb timestamp gap detection and backfill any
+ # seemingly missing (null-time) segments..
+ # TODO: ideally these can never exist!
+ # -[ ] somehow it seems sometimes we're writing zero-ed
+ # segments to tsdbs during teardown?
+ # -[ ] can we ensure that the backfiller tasks do this
+ # work PREVENTAVELY instead?
+ # -[ ] fill in non-zero epoch time values ALWAYS!
+ # await maybe_fill_null_segments(
+ nulls_detected: trio.Event = await tn.start(partial(
+ maybe_fill_null_segments,
+
+ shm=shm,
+ timeframe=timeframe,
+ get_hist=get_hist,
+ sampler_stream=sampler_stream,
+ mkt=mkt,
+ backfill_until_dt=last_tsdb_dt,
+ ))
+
+ # 2nd nursery END
+
+ # TODO: who would want to?
+ if nulls_detected:
+ await nulls_detected.wait()
+
+ await bf_done.wait()
+ # TODO: maybe start history anal and load missing "history
+ # gaps" via backend..
+
+ # if len(hist_shm.array) < 2:
+ # TODO: there's an edge case here to solve where if the last
+ # frame before market close (at least on ib) was pushed and
+ # there was only "1 new" row pushed from the first backfill
+ # query-iteration, then the sample step sizing calcs will
+ # break upstream from here since you can't diff on at least
+ # 2 steps... probably should also add logic to compute from
+ # the tsdb series and stash that somewhere as meta data on
+ # the shm buffer?.. no se.
+
+ # backload any further data from tsdb (concurrently per
+ # timeframe) if not all data was able to be loaded (in memory)
+ # from the ``StorageClient.load()`` call above.
+ await trio.sleep_forever()
+
+ # XXX NOTE: this is legacy from when we were using
+ # marketstore and we needed to continue backloading
+ # incrementally from the tsdb client.. (bc it couldn't
+ # handle a single large query with gRPC for some
+ # reason.. classic goolag pos)
+ # tn.start_soon(
+ # back_load_from_tsdb,
+
+ # storemod,
+ # storage,
+ # fqme,
+
+ # tsdb_history,
+ # last_tsdb_dt,
+ # mr_start_dt,
+ # mr_end_dt,
+ # bf_done,
+
+ # timeframe,
+ # shm,
+ # )
+
+
+async def manage_history(
+ mod: ModuleType,
+ mkt: MktPair,
+ some_data_ready: trio.Event,
+ feed_is_live: trio.Event,
+ timeframe: float = 60, # in seconds
+ wait_for_live_timeout: float = 0.5,
+
+ task_status: TaskStatus[
+ tuple[ShmArray, ShmArray]
+ ] = trio.TASK_STATUS_IGNORED,
+
+) -> None:
+ '''
+ Load historical series data from offline-storage (tsdb) and any
+ missing new datums from data provider(s).
+
+ This is the primary "backfilling service" `trio.Task` entrypoint
+ and conducts,
+
+ - time-series retreival for offline-data previously stored in
+ any (connected) tsdb,
+
+ - queries for missing new datums (compared with the latest found
+ from ^) onward to the present by pulling from available
+ `datad`-provider backends.
+
+ - real-time update of both the existing tsdb-records and the
+ allocated shared-memory-buffer as required by downstream
+ `piker.data`-layer consumer-wares.
+
+ Init sequence:
+ -------------
+ - allocate shm (numpy array) buffers for 60s & 1s sample rates
+ - configure "zero index" for each buffer: the index where
+ history will prepended *to* and new live data will be
+ appened *from*.
+ - open a ``.storage.StorageClient`` and load any existing tsdb
+ history as well as (async) start a backfill task which loads
+ missing (newer) history from the data provider backend:
+ - tsdb history is loaded first and pushed to shm ASAP.
+ - the backfill task loads the most recent history before
+ unblocking its parent task, so that the `ShmArray._last` is
+ up to date to allow the OHLC sampler to begin writing new
+ samples as the correct buffer index once the provider feed
+ engages.
+
+ '''
+ # TODO: is there a way to make each shm file key
+ # actor-tree-discovery-addr unique so we avoid collisions
+ # when doing tests which also allocate shms for certain instruments
+ # that may be in use on the system by some other running daemons?
+ # from tractor._state import _runtime_vars
+ # port = _runtime_vars['_root_mailbox'][1]
+
+ uid: tuple = tractor.current_actor().uid
+ name, uuid = uid
+ service: str = name.rstrip(f'.{mod.name}')
+ fqme: str = mkt.get_fqme(delim_char='')
+
+ # (maybe) allocate shm array for this broker/symbol which will
+ # be used for fast near-term history capture and processing.
+ hist_shm, opened = maybe_open_shm_array(
+ size=_default_hist_size,
+ append_start_index=_hist_buffer_start,
+
+ key=f'piker.{service}[{uuid[:16]}].{fqme}.hist',
+
+ # use any broker defined ohlc dtype:
+ dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields),
+
+ # we expect the sub-actor to write
+ readonly=False,
+ )
+ hist_zero_index = hist_shm.index - 1
+
+ # TODO: history validation
+ if not opened:
+ raise RuntimeError(
+ "Persistent shm for sym was already open?!"
+ )
+
+ rt_shm, opened = maybe_open_shm_array(
+ size=_default_rt_size,
+ append_start_index=_rt_buffer_start,
+ key=f'piker.{service}[{uuid[:16]}].{fqme}.rt',
+
+ # use any broker defined ohlc dtype:
+ dtype=getattr(mod, '_ohlc_dtype', def_iohlcv_fields),
+
+ # we expect the sub-actor to write
+ readonly=False,
+ )
+
+ # (for now) set the rt (hft) shm array with space to prepend
+ # only a few days worth of 1s history.
+ days: int = 2
+ start_index: int = days*_secs_in_day
+ rt_shm._first.value = start_index
+ rt_shm._last.value = start_index
+ rt_zero_index = rt_shm.index - 1
+
+ if not opened:
+ raise RuntimeError(
+ "Persistent shm for sym was already open?!"
+ )
+
+ open_history_client = getattr(
+ mod,
+ 'open_history_client',
+ )
+ assert open_history_client
+
+ # TODO: maybe it should be a subpkg of `.data`?
+ from piker import storage
+
+ storemod: ModuleType
+ client: StorageClient
+ tn: trio.Nursery
+ async with (
+ storage.open_storage_client() as (
+ storemod,
+ client,
+ ),
+
+ # NOTE: this nursery spawns a task per "timeframe" (aka
+ # sampling period) data set since normally differently
+ # sampled timeseries can be loaded / process independently
+ # ;)
+ tractor.trionics.collapse_eg(),
+ trio.open_nursery() as tn,
+ ):
+ log.info(
+ f'Connecting to storage backend `{storemod.name}`:\n'
+ f'location: {client.address}\n'
+ f'db cardinality: {client.cardinality}\n'
+ # TODO: show backend config, eg:
+ # - network settings
+ # - storage size with compression
+ # - number of loaded time series?
+ )
+
+ # NOTE: this call ONLY UNBLOCKS once the latest-most frame
+ # (i.e. history just before the live feed latest datum) of
+ # history has been loaded and written to the shm buffer:
+ # - the backfiller task can write in reverse chronological
+ # to the shm and tsdb
+ # - the tsdb data can be loaded immediately and the
+ # backfiller can do a single append from it's end datum and
+ # then prepends backward to that from the current time
+ # step.
+ tf2mem: dict = {
+ 1: rt_shm,
+ 60: hist_shm,
+ }
+
+ shms_by_period: dict|None = None
+ with trio.move_on_after(wait_for_live_timeout) as cs:
+ await feed_is_live.wait()
+
+ if cs.cancelled_caught:
+ log.warning(
+ f'No live feed within {wait_for_live_timeout!r}s\n'
+ f'fqme: {mkt.fqme!r}\n'
+ f'NOT activating shm-buffer-sampler!!\n'
+ )
+
+ if feed_is_live.is_set():
+ shms_by_period: dict[int, dict] = {
+ 1.: rt_shm.token,
+ 60.: hist_shm.token,
+ }
+ async with open_sample_stream(
+ period_s=1.,
+ shms_by_period=shms_by_period,
+
+ # NOTE: we want to only open a stream for doing
+ # broadcasts on backfill operations, not receive the
+ # sample index-stream (since there's no code in this
+ # data feed layer that needs to consume it).
+ open_index_stream=True,
+ sub_for_broadcasts=False,
+
+ ) as sample_stream:
+ # register 1s and 1m buffers with the global
+ # incrementer task
+ log.info(f'Connected to sampler stream: {sample_stream}')
+
+ for timeframe in [60, 1]:
+ await tn.start(partial(
+ tsdb_backfill,
+ mod=mod,
+ storemod=storemod,
+ storage=client,
+ mkt=mkt,
+ shm=tf2mem[timeframe],
+ timeframe=timeframe,
+ sampler_stream=sample_stream,
+ ))
+
+ # indicate to caller that feed can be delivered to
+ # remote requesting client since we've loaded history
+ # data that can be used.
+ some_data_ready.set()
+
+ # wait for a live feed before starting the sampler.
+ # await feed_is_live.wait()
+
+ # yield back after client connect with filled shm
+ task_status.started((
+ hist_zero_index,
+ hist_shm,
+ rt_zero_index,
+ rt_shm,
+ ))
+
+ # history retreival loop depending on user interaction
+ # and thus a small RPC-prot for remotely controllinlg
+ # what data is loaded for viewing.
+ await trio.sleep_forever()
+
+
+def iter_dfs_from_shms(
+ fqme: str
+) -> Generator[
+ tuple[Path, ShmArray, pl.DataFrame],
+ None,
+ None,
+]:
+ # shm buffer size table based on known sample rates
+ sizes: dict[str, int] = {
+ 'hist': _default_hist_size,
+ 'rt': _default_rt_size,
+ }
+
+ # load all detected shm buffer files which have the
+ # passed FQME pattern in the file name.
+ shmfiles: list[Path] = []
+ shmdir = Path('/dev/shm/')
+
+ for shmfile in shmdir.glob(f'*{fqme}*'):
+ filename: str = shmfile.name
+
+ # skip index files
+ if (
+ '_first' in filename
+ or '_last' in filename
+ ):
+ continue
+
+ assert shmfile.is_file()
+ log.debug(f'Found matching shm buffer file: {filename}')
+ shmfiles.append(shmfile)
+
+ for shmfile in shmfiles:
+
+ # lookup array buffer size based on file suffix
+ # being either .rt or .hist
+ key: str = shmfile.name.rsplit('.')[-1]
+
+ # skip FSP buffers for now..
+ if key not in sizes:
+ continue
+
+ size: int = sizes[key]
+
+ # attach to any shm buffer, load array into polars df,
+ # write to local parquet file.
+ shm, opened = maybe_open_shm_array(
+ key=shmfile.name,
+ size=size,
+ dtype=def_iohlcv_fields,
+ readonly=True,
+ )
+ assert not opened
+ ohlcv: np.ndarray = shm.array
+ df: pl.DataFrame = np2pl(ohlcv)
+
+ yield (
+ shmfile,
+ shm,
+ df,
+ )
diff --git a/piker/ui/_editors.py b/piker/ui/_editors.py
index 9809ba71..872ec910 100644
--- a/piker/ui/_editors.py
+++ b/piker/ui/_editors.py
@@ -21,6 +21,7 @@ Higher level annotation editors.
from __future__ import annotations
from collections import defaultdict
from typing import (
+ Literal,
Sequence,
TYPE_CHECKING,
)
@@ -71,9 +72,18 @@ log = get_logger(__name__)
class ArrowEditor(Struct):
+ '''
+ Annotate a chart-view with arrows most often used for indicating,
+ - order txns/clears,
+ - positions directions,
+ - general points-of-interest like nooz events.
+ '''
godw: GodWidget = None # type: ignore # noqa
- _arrows: dict[str, list[pg.ArrowItem]] = {}
+ _arrows: dict[
+ str,
+ list[pg.ArrowItem]
+ ] = {}
def add(
self,
@@ -81,8 +91,19 @@ class ArrowEditor(Struct):
uid: str,
x: float,
y: float,
- color: str = 'default',
- pointing: str | None = None,
+ color: str|None = None,
+ pointing: Literal[
+ 'up',
+ 'down',
+ None,
+ ] = None,
+ alpha: int = 255,
+ zval: float = 1e9,
+ headLen: float|None = None,
+ headWidth: float|None = None,
+ tailLen: float|None = None,
+ tailWidth: float|None = None,
+ pxMode: bool = True,
) -> pg.ArrowItem:
'''
@@ -98,29 +119,83 @@ class ArrowEditor(Struct):
# scale arrow sizing to dpi-aware font
size = _font.font.pixelSize() * 0.8
+ # allow caller override of head dimensions
+ if headLen is None:
+ headLen = size
+ if headWidth is None:
+ headWidth = size/2
+ # tail params default to None (no tail)
+ if tailWidth is None:
+ tailWidth = 3
+
+ color = color or 'default'
+ color = QColor(hcolor(color))
+ color.setAlpha(alpha)
+ pen = fn.mkPen(color, width=1)
+ brush = fn.mkBrush(color)
arrow = pg.ArrowItem(
angle=angle,
baseAngle=0,
- headLen=size,
- headWidth=size/2,
- tailLen=None,
- pxMode=True,
-
+ headLen=headLen,
+ headWidth=headWidth,
+ tailLen=tailLen,
+ tailWidth=tailWidth,
+ pxMode=pxMode,
# coloring
- pen=pg.mkPen(hcolor('papas_special')),
- brush=pg.mkBrush(hcolor(color)),
+ pen=pen,
+ brush=brush,
)
+ arrow.setZValue(zval)
arrow.setPos(x, y)
- self._arrows.setdefault(uid, []).append(arrow)
+ plot.addItem(arrow) # render to view
- # render to view
- plot.addItem(arrow)
+ # register for removal
+ arrow._uid = uid
+ self._arrows.setdefault(
+ uid, []
+ ).append(arrow)
return arrow
- def remove(self, arrow) -> bool:
+ def remove(
+ self,
+ arrow: pg.ArrowItem,
+ ) -> None:
+ '''
+ Remove a *single arrow* from all chart views to which it was
+ added.
+
+ '''
+ uid: str = arrow._uid
+ arrows: list[pg.ArrowItem] = self._arrows[uid]
+ log.info(
+ f'Removing arrow from views\n'
+ f'uid: {uid!r}\n'
+ f'{arrow!r}\n'
+ )
for linked in self.godw.iter_linked():
- linked.chart.plotItem.removeItem(arrow)
+ if not (chart := linked.chart):
+ continue
+
+ chart.plotItem.removeItem(arrow)
+ try:
+ arrows.remove(arrow)
+ except ValueError:
+ log.warning(
+ f'Arrow was already removed?\n'
+ f'uid: {uid!r}\n'
+ f'{arrow!r}\n'
+ )
+
+ def remove_all(self) -> set[pg.ArrowItem]:
+ '''
+ Remove all arrows added by this editor from all
+ chart-views.
+
+ '''
+ for uid, arrows in self._arrows.items():
+ for arrow in arrows:
+ self.remove(arrow)
class LineEditor(Struct):
@@ -266,6 +341,9 @@ class LineEditor(Struct):
return lines
+ # compat with ArrowEditor
+ remove = remove_line
+
def as_point(
pair: Sequence[float, float] | QPointF,
@@ -298,7 +376,7 @@ class SelectRect(QtWidgets.QGraphicsRectItem):
def __init__(
self,
viewbox: ViewBox,
- color: str | None = None,
+ color: str|None = None,
) -> None:
super().__init__(0, 0, 1, 1)
@@ -614,3 +692,6 @@ class SelectRect(QtWidgets.QGraphicsRectItem):
):
scen.removeItem(self._label_proxy)
+
+ # compat with ArrowEditor
+ remove = delete
diff --git a/piker/ui/_remote_ctl.py b/piker/ui/_remote_ctl.py
index 05e145e7..f67f80ad 100644
--- a/piker/ui/_remote_ctl.py
+++ b/piker/ui/_remote_ctl.py
@@ -27,10 +27,12 @@ from contextlib import (
from functools import partial
from pprint import pformat
from typing import (
- # Any,
AsyncContextManager,
+ Literal,
)
+from uuid import uuid4
+import pyqtgraph as pg
import tractor
import trio
from tractor import trionics
@@ -47,12 +49,16 @@ from piker.brokers import SymbolNotFound
from piker.ui.qt import (
QGraphicsItem,
)
+from PyQt6.QtGui import QFont
from ._display import DisplayState
from ._interaction import ChartView
-from ._editors import SelectRect
+from ._editors import (
+ SelectRect,
+ ArrowEditor,
+)
from ._chart import ChartPlotWidget
from ._dataviz import Viz
-
+from ._style import hcolor
log = get_logger(__name__)
@@ -83,8 +89,40 @@ _ctxs: IpcCtxTable = {}
# the "annotations server" which actually renders to a Qt canvas).
# type AnnotsTable = dict[int, QGraphicsItem]
AnnotsTable = dict[int, QGraphicsItem]
+EditorsTable = dict[int, ArrowEditor]
_annots: AnnotsTable = {}
+_editors: EditorsTable = {}
+
+def rm_annot(
+ annot: ArrowEditor|SelectRect|pg.TextItem
+) -> bool:
+ global _editors
+ match annot:
+ case pg.ArrowItem():
+ editor = _editors[annot._uid]
+ editor.remove(annot)
+ # ^TODO? only remove each arrow or all?
+ # if editor._arrows:
+ # editor.remove_all()
+ # else:
+ # log.warning(
+ # f'Annot already removed!\n'
+ # f'{annot!r}\n'
+ # )
+ return True
+
+ case SelectRect():
+ annot.delete()
+ return True
+
+ case pg.TextItem():
+ scene = annot.scene()
+ if scene:
+ scene.removeItem(annot)
+ return True
+
+ return False
async def serve_rc_annots(
@@ -95,6 +133,12 @@ async def serve_rc_annots(
annots: AnnotsTable,
) -> None:
+ '''
+ A small viz(ualization) server for remote ctl of chart
+ annotations.
+
+ '''
+ global _editors
async for msg in annot_req_stream:
match msg:
case {
@@ -104,14 +148,77 @@ async def serve_rc_annots(
'meth': str(meth),
'kwargs': dict(kwargs),
}:
-
ds: DisplayState = _dss[fqme]
- chart: ChartPlotWidget = {
- 60: ds.hist_chart,
- 1: ds.chart,
- }[timeframe]
+ try:
+ chart: ChartPlotWidget = {
+ 60: ds.hist_chart,
+ 1: ds.chart,
+ }[timeframe]
+ except KeyError:
+ msg: str = (
+ f'No chart for timeframe={timeframe}s, '
+ f'skipping rect annotation'
+ )
+ log.exeception(msg)
+ await annot_req_stream.send({'error': msg})
+ continue
+
cv: ChartView = chart.cv
+ # NEW: if timestamps provided, lookup current indices
+ # from shm to ensure alignment with current buffer
+ # state
+ start_time = kwargs.pop('start_time', None)
+ end_time = kwargs.pop('end_time', None)
+ if (
+ start_time is not None
+ and end_time is not None
+ ):
+ viz: Viz = chart.get_viz(fqme)
+ shm = viz.shm
+ arr = shm.array
+
+ # lookup start index
+ start_matches = arr[arr['time'] == start_time]
+ if len(start_matches) == 0:
+ msg: str = (
+ f'No shm entry for start_time={start_time}, '
+ f'skipping rect'
+ )
+ log.error(msg)
+ await annot_req_stream.send({'error': msg})
+ continue
+
+ # lookup end index
+ end_matches = arr[arr['time'] == end_time]
+ if len(end_matches) == 0:
+ msg: str = (
+ f'No shm entry for end_time={end_time}, '
+ f'skipping rect'
+ )
+ log.error(msg)
+ await annot_req_stream.send({'error': msg})
+ continue
+
+ # get close price from start bar, open from end
+ # bar
+ start_idx = float(start_matches[0]['index'])
+ end_idx = float(end_matches[0]['index'])
+ start_close = float(start_matches[0]['close'])
+ end_open = float(end_matches[0]['open'])
+
+ # reconstruct start_pos and end_pos with
+ # looked-up indices
+ from_idx: float = 0.16 - 0.06 # BGM offset
+ kwargs['start_pos'] = (
+ start_idx + 1 - from_idx,
+ start_close,
+ )
+ kwargs['end_pos'] = (
+ end_idx + from_idx,
+ end_open,
+ )
+
# annot type lookup from cmd
rect = SelectRect(
viewbox=cv,
@@ -130,21 +237,207 @@ async def serve_rc_annots(
# delegate generically to the requested method
getattr(rect, meth)(**kwargs)
rect.show()
+
+ # XXX: store absolute coords for repositioning
+ # during viz redraws (eg backfill updates)
+ rect._meth = meth
+ rect._kwargs = kwargs
+
aid: int = id(rect)
annots[aid] = rect
aids: set[int] = ctxs[ipc_key][1]
aids.add(aid)
await annot_req_stream.send(aid)
+ case {
+ 'cmd': 'ArrowEditor',
+ 'fqme': fqme,
+ 'timeframe': timeframe,
+ 'meth': 'add'|'remove' as meth,
+ 'kwargs': {
+ 'x': float(x),
+ 'y': float(y),
+ 'pointing': pointing,
+ 'color': color,
+ 'aid': str()|None as aid,
+ 'alpha': int(alpha),
+ 'headLen': int()|float()|None as headLen,
+ 'headWidth': int()|float()|None as headWidth,
+ 'tailLen': int()|float()|None as tailLen,
+ 'tailWidth': int()|float()|None as tailWidth,
+ 'pxMode': bool(pxMode),
+ 'time': int()|float()|None as timestamp,
+ },
+ # ?TODO? split based on method fn-sigs?
+ # 'pointing',
+ }:
+ ds: DisplayState = _dss[fqme]
+ try:
+ chart: ChartPlotWidget = {
+ 60: ds.hist_chart,
+ 1: ds.chart,
+ }[timeframe]
+ except KeyError:
+ log.warning(
+ f'No chart for timeframe={timeframe}s, '
+ f'skipping arrow annotation'
+ )
+ # return -1 to indicate failure
+ await annot_req_stream.send(-1)
+ continue
+ cv: ChartView = chart.cv
+ godw = chart.linked.godwidget
+
+ # NEW: if timestamp provided, lookup current index
+ # from shm to ensure alignment with current buffer
+ # state
+ if timestamp is not None:
+ viz: Viz = chart.get_viz(fqme)
+ shm = viz.shm
+ arr = shm.array
+ # find index where time matches timestamp
+ matches = arr[arr['time'] == timestamp]
+ if len(matches) == 0:
+ log.error(
+ f'No shm entry for timestamp={timestamp}, '
+ f'skipping arrow annotation'
+ )
+ await annot_req_stream.send(-1)
+ continue
+ # use the matched row's index as x
+ x = float(matches[0]['index'])
+
+ arrows = ArrowEditor(godw=godw)
+ # `.add/.remove()` API
+ if meth != 'add':
+ # await tractor.pause()
+ raise ValueError(
+ f'Invalid arrow-edit request ?\n'
+ f'{msg!r}\n'
+ )
+
+ aid: str = str(uuid4())
+ arrow: pg.ArrowItem = arrows.add(
+ plot=chart.plotItem,
+ uid=aid,
+ x=x,
+ y=y,
+ pointing=pointing,
+ color=color,
+ alpha=alpha,
+ headLen=headLen,
+ headWidth=headWidth,
+ tailLen=tailLen,
+ tailWidth=tailWidth,
+ pxMode=pxMode,
+ )
+ # XXX: store absolute coords for repositioning
+ # during viz redraws (eg backfill updates)
+ arrow._abs_x = x
+ arrow._abs_y = y
+
+ annots[aid] = arrow
+ _editors[aid] = arrows
+ aids: set[int] = ctxs[ipc_key][1]
+ aids.add(aid)
+ await annot_req_stream.send(aid)
+
+ case {
+ 'cmd': 'TextItem',
+ 'fqme': fqme,
+ 'timeframe': timeframe,
+ 'kwargs': {
+ 'text': str(text),
+ 'x': int()|float() as x,
+ 'y': int()|float() as y,
+ 'color': color,
+ 'anchor': list(anchor),
+ 'font_size': int()|None as font_size,
+ 'time': int()|float()|None as timestamp,
+ },
+ }:
+ ds: DisplayState = _dss[fqme]
+ try:
+ chart: ChartPlotWidget = {
+ 60: ds.hist_chart,
+ 1: ds.chart,
+ }[timeframe]
+ except KeyError:
+ log.warning(
+ f'No chart for timeframe={timeframe}s, '
+ f'skipping text annotation'
+ )
+ await annot_req_stream.send(-1)
+ continue
+
+ # NEW: if timestamp provided, lookup current index
+ # from shm to ensure alignment with current buffer
+ # state
+ if timestamp is not None:
+ viz: Viz = chart.get_viz(fqme)
+ shm = viz.shm
+ arr = shm.array
+ # find index where time matches timestamp
+ matches = arr[arr['time'] == timestamp]
+ if len(matches) == 0:
+ log.error(
+ f'No shm entry for timestamp={timestamp}, '
+ f'skipping text annotation'
+ )
+ await annot_req_stream.send(-1)
+ continue
+ # use the matched row's index as x, +1 for text
+ # offset
+ x = float(matches[0]['index']) + 1
+
+ # convert named color to hex
+ color_hex: str = hcolor(color)
+
+ # create text item
+ text_item: pg.TextItem = pg.TextItem(
+ text=text,
+ color=color_hex,
+ anchor=anchor,
+
+ # ?TODO, pin to github:main for this?
+ # legacy, can have scaling ish?
+ # ensureInBounds=True,
+ )
+
+ # apply font size (default to DpiAwareFont if not
+ # provided)
+ if font_size is None:
+ from ._style import get_fonts
+ font, font_small = get_fonts()
+ font_size = font_small.px_size - 1
+
+ qfont: QFont = text_item.textItem.font()
+ qfont.setPixelSize(font_size)
+ text_item.setFont(qfont)
+
+ text_item.setPos(x, y)
+ chart.plotItem.addItem(text_item)
+
+ # XXX: store absolute coords for repositioning
+ # during viz redraws (eg backfill updates)
+ text_item._abs_x = x
+ text_item._abs_y = y
+
+ aid: str = str(uuid4())
+ annots[aid] = text_item
+ aids: set[int] = ctxs[ipc_key][1]
+ aids.add(aid)
+ await annot_req_stream.send(aid)
+
case {
'cmd': 'remove',
- 'aid': int(aid),
+ 'aid': int(aid)|str(aid),
}:
# NOTE: this is normally entered on
# a client's annotation de-alloc normally
# prior to detach or modify.
annot: QGraphicsItem = annots[aid]
- annot.delete()
+ assert rm_annot(annot)
# respond to client indicating annot
# was indeed deleted.
@@ -175,6 +468,38 @@ async def serve_rc_annots(
)
viz.reset_graphics()
+ # XXX: reposition all annotations to ensure they
+ # stay aligned with viz data after reset (eg during
+ # backfill when abs-index range changes)
+ n_repositioned: int = 0
+ for aid, annot in annots.items():
+ # arrows and text items use abs x,y coords
+ if (
+ hasattr(annot, '_abs_x')
+ and
+ hasattr(annot, '_abs_y')
+ ):
+ annot.setPos(
+ annot._abs_x,
+ annot._abs_y,
+ )
+ n_repositioned += 1
+
+ # rects use method + kwargs
+ elif (
+ hasattr(annot, '_meth')
+ and
+ hasattr(annot, '_kwargs')
+ ):
+ getattr(annot, annot._meth)(**annot._kwargs)
+ n_repositioned += 1
+
+ if n_repositioned:
+ log.info(
+ f'Repositioned {n_repositioned} annotation(s) '
+ f'after viz redraw'
+ )
+
case _:
log.error(
'Unknown remote annotation cmd:\n'
@@ -188,6 +513,12 @@ async def remote_annotate(
) -> None:
global _dss, _ctxs
+ if not _dss:
+ raise RuntimeError(
+ 'Race condition on chart-init state ??\n'
+ 'Anoter actor is trying to annoate this chart '
+ 'before it has fully spawned.\n'
+ )
assert _dss
_ctxs[ctx.cid] = (ctx, set())
@@ -212,7 +543,7 @@ async def remote_annotate(
assert _ctx is ctx
for aid in aids:
annot: QGraphicsItem = _annots[aid]
- annot.delete()
+ assert rm_annot(annot)
class AnnotCtl(Struct):
@@ -257,36 +588,47 @@ class AnnotCtl(Struct):
from_acm: bool = False,
- ) -> int:
+ # NEW: optional timestamps for server-side index lookup
+ start_time: float|None = None,
+ end_time: float|None = None,
+
+ ) -> int|None:
'''
Add a `SelectRect` annotation to the target view, return
the instances `id(obj)` from the remote UI actor.
'''
ipc: MsgStream = self._get_ipc(fqme)
- await ipc.send({
- 'fqme': fqme,
- 'cmd': 'SelectRect',
- 'timeframe': timeframe,
- # 'meth': str(meth),
- 'meth': 'set_view_pos' if domain == 'view' else 'set_scene_pos',
- 'kwargs': {
- 'start_pos': tuple(start_pos),
- 'end_pos': tuple(end_pos),
- 'color': color,
- 'update_label': False,
- },
- })
- aid: int = await ipc.receive()
- self._ipcs[aid] = ipc
- if not from_acm:
- self._annot_stack.push_async_callback(
- partial(
- self.remove,
- aid,
+ with trio.fail_after(3):
+ await ipc.send({
+ 'fqme': fqme,
+ 'cmd': 'SelectRect',
+ 'timeframe': timeframe,
+ # 'meth': str(meth),
+ 'meth': 'set_view_pos' if domain == 'view' else 'set_scene_pos',
+ 'kwargs': {
+ 'start_pos': tuple(start_pos),
+ 'end_pos': tuple(end_pos),
+ 'color': color,
+ 'update_label': False,
+ 'start_time': start_time,
+ 'end_time': end_time,
+ },
+ })
+ aid: int|dict = await ipc.receive()
+ match aid:
+ case {'error': str(msg)}:
+ log.error(msg)
+ return None
+ self._ipcs[aid] = ipc
+ if not from_acm:
+ self._annot_stack.push_async_callback(
+ partial(
+ self.remove,
+ aid,
+ )
)
- )
- return aid
+ return aid
async def remove(
self,
@@ -334,20 +676,130 @@ class AnnotCtl(Struct):
'timeframe': timeframe,
})
- # TODO: do we even need this?
- # async def modify(
- # self,
- # aid: int, # annotation id
- # meth: str, # far end graphics object method to invoke
- # params: dict[str, Any], # far end `meth(**kwargs)`
- # ) -> bool:
- # '''
- # Modify an existing (remote) annotation's graphics
- # paramters, thus changing it's appearance / state in real
- # time.
+ async def add_arrow(
+ self,
+ fqme: str,
+ timeframe: float,
+ x: float,
+ y: float,
+ pointing: Literal[
+ 'up',
+ 'down',
+ ],
+ # TODO: a `Literal['view', 'scene']` for this?
+ # domain: str = 'view', # or 'scene'
+ color: str = 'dad_blue',
+ alpha: int = 116,
+ headLen: float|None = None,
+ headWidth: float|None = None,
+ tailLen: float|None = None,
+ tailWidth: float|None = None,
+ pxMode: bool = True,
- # '''
- # raise NotImplementedError
+ from_acm: bool = False,
+
+ # NEW: optional timestamp for server-side index lookup
+ time: float|None = None,
+
+ ) -> int|None:
+ '''
+ Add a `SelectRect` annotation to the target view, return
+ the instances `id(obj)` from the remote UI actor.
+
+ '''
+ ipc: MsgStream = self._get_ipc(fqme)
+ with trio.fail_after(3):
+ await ipc.send({
+ 'fqme': fqme,
+ 'cmd': 'ArrowEditor',
+ 'timeframe': timeframe,
+ # 'meth': str(meth),
+ 'meth': 'add',
+ 'kwargs': {
+ 'x': float(x),
+ 'y': float(y),
+ 'color': color,
+ 'pointing': pointing, # up|down
+ 'alpha': alpha,
+ 'aid': None,
+ 'headLen': headLen,
+ 'headWidth': headWidth,
+ 'tailLen': tailLen,
+ 'tailWidth': tailWidth,
+ 'pxMode': pxMode,
+ 'time': time, # for server-side index lookup
+ },
+ })
+ aid: int|dict = await ipc.receive()
+ match aid:
+ case {'error': str(msg)}:
+ log.error(msg)
+ return None
+
+ self._ipcs[aid] = ipc
+ if not from_acm:
+ self._annot_stack.push_async_callback(
+ partial(
+ self.remove,
+ aid,
+ )
+ )
+ return aid
+
+ async def add_text(
+ self,
+ fqme: str,
+ timeframe: float,
+ text: str,
+ x: float,
+ y: float,
+ color: str|tuple = 'dad_blue',
+ anchor: tuple[float, float] = (0, 1),
+ font_size: int|None = None,
+
+ from_acm: bool = False,
+
+ # NEW: optional timestamp for server-side index lookup
+ time: float|None = None,
+
+ ) -> int|None:
+ '''
+ Add a `pg.TextItem` annotation to the target view.
+
+ anchor: (x, y) where (0,0) is upper-left, (1,1) is lower-right
+ font_size: pixel size for font, defaults to `_font.font.pixelSize()`
+
+ '''
+ ipc: MsgStream = self._get_ipc(fqme)
+ with trio.fail_after(3):
+ await ipc.send({
+ 'fqme': fqme,
+ 'cmd': 'TextItem',
+ 'timeframe': timeframe,
+ 'kwargs': {
+ 'text': text,
+ 'x': float(x),
+ 'y': float(y),
+ 'color': color,
+ 'anchor': tuple(anchor),
+ 'font_size': font_size,
+ 'time': time, # for server-side index lookup
+ },
+ })
+ aid: int|dict = await ipc.receive()
+ match aid:
+ case {'error': str(msg)}:
+ log.error(msg)
+ return None
+ self._ipcs[aid] = ipc
+ if not from_acm:
+ self._annot_stack.push_async_callback(
+ partial(
+ self.remove,
+ aid,
+ )
+ )
+ return aid
@acm
@@ -374,7 +826,9 @@ async def open_annot_ctl(
# TODO: print the current discoverable actor UID set
# here as well?
if not maybe_portals:
- raise RuntimeError('No chart UI actors found in service domain?')
+ raise RuntimeError(
+ 'No chart actors found in service domain?'
+ )
for portal in maybe_portals:
ctx_mngrs.append(
diff --git a/piker/ui/_style.py b/piker/ui/_style.py
index b6c47817..893308b0 100644
--- a/piker/ui/_style.py
+++ b/piker/ui/_style.py
@@ -61,7 +61,7 @@ class DpiAwareFont:
) -> None:
self._font_size_calc_key: str = _font_size_key
- self._font_size: int | None = None
+ self._font_size: int|None = None
# Read preferred font size from main config file if it exists
conf, path = config.load('conf', touch_if_dne=True)
@@ -107,7 +107,22 @@ class DpiAwareFont:
@property
def px_size(self) -> int:
- return self._qfont.pixelSize()
+ size: int = self._qfont.pixelSize()
+
+ # XXX, when no Qt app has been spawned this will always be
+ # invalid..
+ # SO, just return any conf.toml value.
+ if size == -1:
+ if (conf_size := self._font_size) is None:
+ raise ValueError(
+ f'No valid `{type(_font).__name__}.px_size` set?\n'
+ f'\n'
+ f'-> `ui.font_size` is NOT set in `conf.toml`\n'
+ f'-> no Qt app is active ??\n'
+ )
+ return conf_size
+
+ return size
def configure_to_dpi(self, screen: QtGui.QScreen | None = None):
'''
@@ -221,6 +236,20 @@ def _config_fonts_to_screen() -> None:
_font_small.configure_to_dpi()
+def get_fonts() -> tuple[
+ DpiAwareFont,
+ DpiAwareFont,
+]:
+ '''
+ Get the singleton font pair (of instances) from which all other
+ UI/UX should be "scaled around".
+
+ See `DpiAwareFont` for (internal) deats.
+
+ '''
+ return _font, _font_small
+
+
# TODO: re-compute font size when main widget switches screens?
# https://forum.qt.io/topic/54136/how-do-i-get-the-qscreen-my-widget-is-on-qapplication-desktop-screen-returns-a-qwidget-and-qobject_cast-qscreen-returns-null/3
diff --git a/pyproject.toml b/pyproject.toml
index d8b28257..13a7fbe3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -98,6 +98,7 @@ python-downloads = 'manual'
# https://docs.astral.sh/uv/concepts/projects/dependencies/#default-groups
default-groups = [
'uis',
+ 'repl',
]
# ------ tool.uv ------
@@ -116,7 +117,6 @@ uis = [
dev = [
# https://docs.astral.sh/uv/concepts/projects/dependencies/#development-dependencies
"cython >=3.0.0, <4.0.0",
-
# nested deps-groups
# https://docs.astral.sh/uv/concepts/projects/dependencies/#nesting-groups
{include-group = 'uis'},
@@ -130,10 +130,14 @@ repl = [
"greenback >=1.1.1, <2.0.0",
# @goodboy's preferred console toolz
- "xonsh",
+ "xonsh>=0.22.2",
"prompt-toolkit ==3.0.40",
"pyperclip>=1.9.0",
+ # for @claude's `snippets/claude_debug_helper.py` it uses to do
+ # "offline" debug/crash REPL-in alongside a dev.
+ "pexpect>=4.9.0",
+
# ?TODO, new stuff to consider..
# "visidata" # console numerics
# "xxh" # for remote `xonsh`-ing
@@ -191,6 +195,11 @@ pyqtgraph = { git = "https://github.com/pikers/pyqtgraph.git" }
tomlkit = { git = "https://github.com/pikers/tomlkit.git", branch ="piker_pin" }
pyvnc = { git = "https://github.com/regulad/pyvnc.git" }
+# to get fancy next-cmd/suggestion feats prior to 0.22.2 B)
+# https://github.com/xonsh/xonsh/pull/6037
+# https://github.com/xonsh/xonsh/pull/6048
+# xonsh = { git = 'https://github.com/xonsh/xonsh.git', branch = 'main' }
+
# XXX since, we're like, always hacking new shite all-the-time. Bp
tractor = { git = "https://github.com/goodboy/tractor.git", branch ="piker_pin" }
# tractor = { git = "https://pikers.dev/goodboy/tractor", branch = "piker_pin" }
diff --git a/snippets/claude_debug_helper.py b/snippets/claude_debug_helper.py
new file mode 100755
index 00000000..97467d8a
--- /dev/null
+++ b/snippets/claude_debug_helper.py
@@ -0,0 +1,256 @@
+#!/usr/bin/env python
+'''
+Programmatic debugging helper for `pdbp` REPL human-like
+interaction but built to allow `claude` to interact with
+crashes and `tractor.pause()` breakpoints along side a human dev.
+
+Originally written by `clauded` during a backfiller inspection
+session with @goodboy trying to resolve duplicate/gappy ohlcv ts
+issues discovered while testing the new `nativedb` tsdb.
+
+Allows `claude` to run `pdb` commands and capture output in an "offline"
+manner but generating similar output as if it was iteracting with
+the debug REPL.
+
+The use of `pexpect` is heavily based on tractor's REPL UX test
+suite(s), namely various `tests/devx/test_debugger.py` patterns.
+
+'''
+import sys
+import os
+import time
+
+import pexpect
+from pexpect.exceptions import (
+ TIMEOUT,
+ EOF,
+)
+
+
+PROMPT: str = r'\(Pdb\+\)'
+
+
+def expect(
+ child: pexpect.spawn,
+ patt: str,
+ **kwargs,
+) -> None:
+ '''
+ Expect wrapper that prints last console data before failing.
+
+ '''
+ try:
+ child.expect(
+ patt,
+ **kwargs,
+ )
+ except TIMEOUT:
+ before: str = (
+ str(child.before.decode())
+ if isinstance(child.before, bytes)
+ else str(child.before)
+ )
+ print(
+ f'TIMEOUT waiting for pattern: {patt}\n'
+ f'Last seen output:\n{before}'
+ )
+ raise
+
+
+def run_pdb_commands(
+ commands: list[str],
+ initial_cmd: str = 'piker store ldshm xmrusdt.usdtm.perp.binance',
+ timeout: int = 30,
+ print_output: bool = True,
+) -> dict[str, str]:
+ '''
+ Spawn piker process, wait for pdb prompt, execute commands.
+
+ Returns dict mapping command -> output.
+
+ '''
+ results: dict[str, str] = {}
+
+ # Disable colored output for easier parsing
+ os.environ['PYTHON_COLORS'] = '0'
+
+ # Spawn the process
+ if print_output:
+ print(f'Spawning: {initial_cmd}')
+
+ child: pexpect.spawn = pexpect.spawn(
+ initial_cmd,
+ timeout=timeout,
+ encoding='utf-8',
+ echo=False,
+ )
+
+ # Wait for pdb prompt
+ try:
+ expect(child, PROMPT, timeout=timeout)
+ if print_output:
+ print('Reached pdb prompt!')
+
+ # Execute each command
+ for cmd in commands:
+ if print_output:
+ print(f'\n>>> {cmd}')
+
+ child.sendline(cmd)
+ time.sleep(0.1)
+
+ # Wait for next prompt
+ expect(child, PROMPT, timeout=timeout)
+
+ # Capture output (everything before the prompt)
+ output: str = (
+ str(child.before.decode())
+ if isinstance(child.before, bytes)
+ else str(child.before)
+ )
+ results[cmd] = output
+
+ if print_output:
+ print(output)
+
+ # Quit debugger gracefully
+ child.sendline('quit')
+ try:
+ child.expect(EOF, timeout=5)
+ except (TIMEOUT, EOF):
+ pass
+
+ except TIMEOUT as e:
+ print(f'Timeout: {e}')
+ if child.before:
+ before: str = (
+ str(child.before.decode())
+ if isinstance(child.before, bytes)
+ else str(child.before)
+ )
+ print(f'Buffer:\n{before}')
+ results['_error'] = str(e)
+
+ finally:
+ if child.isalive():
+ child.close(force=True)
+
+ return results
+
+
+class InteractivePdbSession:
+ '''
+ Interactive pdb session manager for incremental debugging.
+
+ '''
+ def __init__(
+ self,
+ cmd: str = 'piker store ldshm xmrusdt.usdtm.perp.binance',
+ timeout: int = 30,
+ ):
+ self.cmd: str = cmd
+ self.timeout: int = timeout
+ self.child: pexpect.spawn|None = None
+ self.history: list[tuple[str, str]] = []
+
+ def start(self) -> None:
+ '''
+ Start the piker process and wait for first prompt.
+
+ '''
+ os.environ['PYTHON_COLORS'] = '0'
+
+ print(f'Starting: {self.cmd}')
+ self.child = pexpect.spawn(
+ self.cmd,
+ timeout=self.timeout,
+ encoding='utf-8',
+ echo=False,
+ )
+
+ # Wait for initial prompt
+ expect(self.child, PROMPT, timeout=self.timeout)
+ print('Ready at pdb prompt!')
+
+ def run(
+ self,
+ cmd: str,
+ print_output: bool = True,
+ ) -> str:
+ '''
+ Execute a single pdb command and return output.
+
+ '''
+ if not self.child or not self.child.isalive():
+ raise RuntimeError('Session not started or dead')
+
+ if print_output:
+ print(f'\n>>> {cmd}')
+
+ self.child.sendline(cmd)
+ time.sleep(0.1)
+
+ # Wait for next prompt
+ expect(self.child, PROMPT, timeout=self.timeout)
+
+ output: str = (
+ str(self.child.before.decode())
+ if isinstance(self.child.before, bytes)
+ else str(self.child.before)
+ )
+ self.history.append((cmd, output))
+
+ if print_output:
+ print(output)
+
+ return output
+
+ def quit(self) -> None:
+ '''
+ Exit the debugger and cleanup.
+
+ '''
+ if self.child and self.child.isalive():
+ self.child.sendline('quit')
+ try:
+ self.child.expect(EOF, timeout=5)
+ except (TIMEOUT, EOF):
+ pass
+ self.child.close(force=True)
+
+ def __enter__(self):
+ self.start()
+ return self
+
+ def __exit__(self, *args):
+ self.quit()
+
+
+if __name__ == '__main__':
+ # Example inspection commands
+ inspect_cmds: list[str] = [
+ 'locals().keys()',
+ 'type(deduped)',
+ 'deduped.shape',
+ (
+ 'step_gaps.shape '
+ 'if "step_gaps" in locals() '
+ 'else "N/A"'
+ ),
+ (
+ 'venue_gaps.shape '
+ 'if "venue_gaps" in locals() '
+ 'else "N/A"'
+ ),
+ ]
+
+ # Allow commands from CLI args
+ if len(sys.argv) > 1:
+ inspect_cmds = sys.argv[1:]
+
+ # Interactive session example
+ with InteractivePdbSession() as session:
+ for cmd in inspect_cmds:
+ session.run(cmd)
+
+ print('\n=== Session Complete ===')
diff --git a/uv.lock b/uv.lock
index e0f3d7fd..9de1420d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1000,6 +1000,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/6e/23/e98758924d1b3aac11a626268eabf7f3cf177e7837c28d47bf84c64532d0/pendulum-3.1.0-py3-none-any.whl", hash = "sha256:f9178c2a8e291758ade1e8dd6371b1d26d08371b4c7730a6e9a3ef8b16ebae0f", size = 111799, upload-time = "2025-04-19T14:02:34.739Z" },
]
+[[package]]
+name = "pexpect"
+version = "4.9.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "ptyprocess" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" },
+]
+
[[package]]
name = "piker"
version = "0.1.0a0.dev0"
@@ -1047,6 +1059,7 @@ dev = [
{ name = "greenback" },
{ name = "i3ipc" },
{ name = "pdbp" },
+ { name = "pexpect" },
{ name = "prompt-toolkit" },
{ name = "pyperclip" },
{ name = "pyqt6" },
@@ -1062,6 +1075,7 @@ lint = [
repl = [
{ name = "greenback" },
{ name = "pdbp" },
+ { name = "pexpect" },
{ name = "prompt-toolkit" },
{ name = "pyperclip" },
{ name = "xonsh" },
@@ -1116,6 +1130,7 @@ dev = [
{ name = "greenback", specifier = ">=1.1.1,<2.0.0" },
{ name = "i3ipc", specifier = ">=2.2.1" },
{ name = "pdbp", specifier = ">=1.8.2,<2.0.0" },
+ { name = "pexpect", specifier = ">=4.9.0" },
{ name = "prompt-toolkit", specifier = "==3.0.40" },
{ name = "pyperclip", specifier = ">=1.9.0" },
{ name = "pyqt6", specifier = ">=6.7.0,<7.0.0" },
@@ -1123,15 +1138,16 @@ dev = [
{ name = "pytest" },
{ name = "qdarkstyle", specifier = ">=3.0.2,<4.0.0" },
{ name = "rapidfuzz", specifier = ">=3.2.0,<4.0.0" },
- { name = "xonsh" },
+ { name = "xonsh", specifier = ">=0.22.2" },
]
lint = [{ name = "ruff", specifier = ">=0.9.6" }]
repl = [
{ name = "greenback", specifier = ">=1.1.1,<2.0.0" },
{ name = "pdbp", specifier = ">=1.8.2,<2.0.0" },
+ { name = "pexpect", specifier = ">=4.9.0" },
{ name = "prompt-toolkit", specifier = "==3.0.40" },
{ name = "pyperclip", specifier = ">=1.9.0" },
- { name = "xonsh" },
+ { name = "xonsh", specifier = ">=0.22.2" },
]
testing = [{ name = "pytest" }]
uis = [
@@ -1297,6 +1313,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
]
+[[package]]
+name = "ptyprocess"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762, upload-time = "2020-12-28T15:15:30.155Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993, upload-time = "2020-12-28T15:15:28.35Z" },
+]
+
[[package]]
name = "pyarrow"
version = "22.0.0"
@@ -1843,7 +1868,7 @@ source = { git = "https://github.com/pikers/tomlkit.git?branch=piker_pin#8e0239a
[[package]]
name = "tractor"
version = "0.1.0a6.dev0"
-source = { git = "https://github.com/goodboy/tractor.git?branch=piker_pin#e232d9dd06f41b8dca997f0647f2083d27cc34f2" }
+source = { git = "https://github.com/goodboy/tractor.git?branch=piker_pin#36307c59175a1d04fecc77ef2c28f5c943b5f3d1" }
dependencies = [
{ name = "bidict" },
{ name = "cffi" },
@@ -2095,13 +2120,13 @@ wheels = [
[[package]]
name = "xonsh"
-version = "0.20.0"
+version = "0.22.4"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/af/7e2ba3885da44cbe03c7ff46f90ea917ba10d91dc74d68604001ea28055f/xonsh-0.20.0.tar.gz", hash = "sha256:d44a50ee9f288ff96bd0456f0a38988ef6d4985637140ea793beeef5ec5d2d38", size = 811907, upload-time = "2025-11-24T07:50:50.847Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/48/df/1fc9ed62b3d7c14612e1713e9eb7bd41d54f6ad1028a8fbb6b7cddebc345/xonsh-0.22.4.tar.gz", hash = "sha256:6be346563fec2db75778ba5d2caee155525e634e99d9cc8cc347626025c0b3fa", size = 826665, upload-time = "2026-02-17T07:53:39.424Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/e8/db/1c5c057c0b2a89b8919477726558685720ae0849ea1a98a3803e93550824/xonsh-0.20.0-py311-none-any.whl", hash = "sha256:65d27ba31d558f79010d6c652751449fd3ed4df1f1eda78040a6427fa0a0f03e", size = 646312, upload-time = "2025-11-24T07:50:49.488Z" },
- { url = "https://files.pythonhosted.org/packages/d2/a2/d6f7534f31489a4b8b54bd2a2496248f86f7c21a6a6ce9bfdcdd389fe4e7/xonsh-0.20.0-py312-none-any.whl", hash = "sha256:3148900e67b9c2796bef6f2eda003b0a64d4c6f50a0db23324f786d9e1af9353", size = 646323, upload-time = "2025-11-24T07:50:43.028Z" },
- { url = "https://files.pythonhosted.org/packages/bd/48/bcb1e4d329c3d522bc29b066b0b6ee86938ec392376a29c36fac0ad1c586/xonsh-0.20.0-py313-none-any.whl", hash = "sha256:c83daaf6eb2960180fc5a507459dbdf6c0d6d63e1733c43f4e43db77255c7278", size = 646830, upload-time = "2025-11-24T07:50:45.078Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/00/7cbc0c1fb64365a0a317c54ce3a151c9644eea5a509d9cbaae61c9fd1426/xonsh-0.22.4-py311-none-any.whl", hash = "sha256:38b29b29fa85aa756462d9d9bbcaa1d85478c2108da3de6cc590a69a4bcd1a01", size = 654375, upload-time = "2026-02-17T07:53:37.702Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/c2/3dd498dc28d8f89cdd52e39950c5e591499ae423f61694c0bb4d03ed1d82/xonsh-0.22.4-py312-none-any.whl", hash = "sha256:4e538fac9f4c3d866ddbdeca068f0c0515469c997ed58d3bfee963878c6df5a5", size = 654300, upload-time = "2026-02-17T07:53:35.813Z" },
+ { url = "https://files.pythonhosted.org/packages/82/7d/1f9c7147518e9f03f6ce081b5bfc4f1aceb6ec5caba849024d005e41d3be/xonsh-0.22.4-py313-none-any.whl", hash = "sha256:cc5fabf0ad0c56a2a11bed1e6a43c4ec6416a5b30f24f126b8e768547c3793e2", size = 654818, upload-time = "2026-02-17T07:53:33.477Z" },
]
[[package]]