Online learner: let realized PnL pick the strategy
UCB1 bandit that learns from the realized pnlBps (lifecycle)
Mirrors runner/src/plugins/19-exampleBandit
What this teaches
- Reward attribution: map a ClosureEvent (marketId/strike/isUp) back to the arm that opened it, via a posKey -> armId map persisted in ctx.state
- An online estimator as LEARNED POLICY in state (see bandit.ts: UCB1)
- Using ctx.lifecycle.closures.pnlBps as a control signal, not just a log
New vs 12-exampleAuditLog / 15-exampleChaos
- AuditLog logs PnL but never reads it back. Chaos reads pnlBps only to benchmark. This one FEEDS it back into the policy. (It is exactly the thing the Chaos coin-flip baseline exists to be beaten by.) Modular split: bandit.ts (PURE UCB1) | arms.ts (PURE registry + identity) | here (glue).
import { noop, type StrategyPlugin } from "@automark/runtime-core";
import { Market } from "@automark/sdk/market";
import { bigintMin } from "@automark/sdk/math";
import { hours, minutes } from "@automark/sdk/duration";
import {
pickArm,
scoreArms,
seedStats,
updateArm,
type ArmStat,
} from "./bandit";
import { parseArms, posKey } from "./arms";
interface MintBinaryEvent {
oracle_id: string;
strike: string;
is_up: boolean;
quantity: string;
}
type AttrMap = Record<string, string>; // posKey -> armId
interface PendingArm {
armId: string;
marketId: string;
strike: string;
isUp: boolean;
}
export default function createExampleBandit(): StrategyPlugin {
const vaultId = process.env.VAULT_ID;
if (!vaultId) throw new Error("exampleBandit: VAULT_ID not set");
const arms = parseArms(process.env.ARMS ?? "BTC:1:up,BTC:1:down,ETH:1:up");
const armsById = new Map(arms.map((a) => [a.id, a] as const));
const expiryWindowMs = Number(process.env.EXPIRY_WINDOW_MS ?? hours(6));
const ucbC = Number(process.env.UCB_C ?? 1.4);
const sizeFrac = Number(process.env.SIZE_FRAC_OF_HEADROOM ?? 0.25);
return {
name: "exampleBandit",
vaultId,
triggers: [{ kind: "cron", everySeconds: 90 }],
async decide(ctx) {
// 1) Learn: credit positions that closed since last tick to their arm.
let stats = seedStats(
arms.map((a) => a.id),
await ctx.state.getOrDefault<Record<string, ArmStat>>("arms", {}),
);
const attr = await ctx.state.getOrDefault<AttrMap>("attr", {});
let learned = false;
for (const c of ctx.lifecycle.closures) {
if (c.shape !== "binary" || c.pnlBps === undefined) continue;
if (c.strike === undefined || c.isUp === undefined) continue;
const key = posKey(c.marketId, c.strike, c.isUp);
const armId = attr[key];
if (!armId || !stats[armId]) continue;
stats[armId] = updateArm(stats[armId], c.pnlBps);
learned = true;
if (c.remainingQuantity === 0n) delete attr[key];
}
if (learned) {
await ctx.state.set("arms", stats);
await ctx.state.set("attr", attr);
ctx.logger.info("bandit learned", {
updated: ctx.lifecycle.closures.length,
});
}
// 2) Act: pick the best arm and mint it.
if (ctx.vault.isFrozen) return [noop("frozen")];
if (!ctx.vault.canMintBinary) return [noop("no MINT_BINARY permission")];
if (arms.length === 0) return [noop("no arms configured")];
const scores = scoreArms(stats, ucbC);
const armId = pickArm(scores);
const arm = armId ? armsById.get(armId) : undefined;
if (!arm) return [noop("no arm selected")];
const market = await Market.find({
asset: arm.asset,
expiryAfterMs: ctx.now + minutes(5),
expiringWithinMs: expiryWindowMs,
client: ctx.suiClient,
}).catch(() => null);
if (!market) return [noop(`no market for ${arm.asset}`)];
const p = await market.price();
const strike = market.strikeAtSigma(p, {
k: arm.k,
direction: arm.direction,
atMs: ctx.now,
});
const isUp = arm.direction === "up";
const ceiling = bigintMin(ctx.vault.maxSinglePosition, ctx.vault.exposureHeadroom);
const quantity = (ceiling * BigInt(Math.round(sizeFrac * 10_000))) / 10_000n;
if (quantity === 0n) return [noop("no headroom")];
// Remember which arm we pulled so onExecuted can attribute the open key.
const pending: PendingArm = {
armId: arm.id,
marketId: market.id,
strike: strike.toString(),
isUp,
};
await ctx.state.set("pendingArm", pending);
ctx.logger.info("bandit pull", {
arm: arm.id,
score: Number.isFinite(scores[arm.id]) ? scores[arm.id].toFixed(1) : "inf",
pulls: stats[arm.id]?.pulls ?? 0,
});
return [{ kind: "vault.mintBinary", params: { marketId: market.id, strike, isUp, quantity } }];
},
// Attribute the minted on-chain key to the arm we pulled, so a future
// closure resolves back to it for reward crediting.
async onExecuted(ctx, result) {
if (result.outcome !== "submitted") return;
const pending = await ctx.state.get<PendingArm>("pendingArm");
if (!pending) return;
for (const ev of result.events ?? []) {
if (!ev.type.endsWith("::vault::VaultMintBinary")) continue;
const m = ev.parsedJson as MintBinaryEvent;
if (
m.oracle_id === pending.marketId &&
m.strike === pending.strike &&
m.is_up === pending.isUp
) {
const attr = await ctx.state.getOrDefault<AttrMap>("attr", {});
attr[posKey(m.oracle_id, BigInt(m.strike), m.is_up)] = pending.armId;
await ctx.state.set("attr", attr);
}
}
await ctx.state.delete("pendingArm");
},
};
}
Environment variables
- VAULT_ID
- ARMS csv "asset:k:dir", default "BTC:1:up,BTC:1:down,ETH:1:up"
- EXPIRY_WINDOW_MS market to act on, expiring within this (default 6h)
- UCB_C exploration constant (default 1.4)
- SIZE_FRAC_OF_HEADROOM fraction of the binding cap to deploy (default 0.25)