From 236089f5c89804007b3a9dfd9879d07a7d961d0e Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 17:43:41 +0000
Subject: [PATCH 01/16] Add long-running agents anomaly monitoring end-to-end

Co-authored-by: leor <leor@fortresslabs.com>
---
 docs/agents_monitoring_handoff.md          | 128 +++
 electron-ui/index.html                     |  93 +++
 electron-ui/main.js                        | 316 ++++++-
 electron-ui/preload.js                     |  29 +
 electron-ui/renderer.js                    | 379 +++++++++
 electron-ui/styles.css                     | 275 +++++++
 scripts/anomaly_monitor.py                 | 912 +++++++++++++++++++++
 scripts/anomaly_rules.py                   | 217 +++++
 scripts/fixtures/anomaly_replay_cases.json |  32 +
 scripts/graph_api.py                       |  11 +-
 scripts/ignition_api_client.py             |  69 +-
 scripts/neo4j_ontology.py                  | 148 +++-
 12 files changed, 2565 insertions(+), 44 deletions(-)
 create mode 100644 docs/agents_monitoring_handoff.md
 create mode 100644 scripts/anomaly_monitor.py
 create mode 100644 scripts/anomaly_rules.py
 create mode 100644 scripts/fixtures/anomaly_replay_cases.json

diff --git a/docs/agents_monitoring_handoff.md b/docs/agents_monitoring_handoff.md
new file mode 100644
index 0000000..a5368fb
--- /dev/null
+++ b/docs/agents_monitoring_handoff.md
@@ -0,0 +1,128 @@
+# Long-Running Agents Monitoring Handoff
+
+## Summary
+
+This handoff documents the implemented V1 monitoring capability:
+
+- New **Agents** tab in Electron UI for starting/stopping long-running monitoring.
+- Continuous Python worker (`anomaly_monitor.py`) with:
+  - deterministic historical-deviation scoring,
+  - quality/staleness gates,
+  - optional LLM triage,
+  - Neo4j persistence for `AgentRun` and `AnomalyEvent`,
+  - event dedup and retention cleanup.
+- IPC surface and stream channels from Electron main to renderer:
+  - `agents:start`, `agents:status`, `agents:stop`,
+  - `agents:list-events`, `agents:get-event`, `agents:ack-event`, `agents:cleanup`,
+  - channels: `agent-status`, `agent-event`, `agent-error`, `agent-complete`.
+- Graph drill-down integration with anomaly node support.
+
+## Files Changed
+
+### Electron
+
+- `electron-ui/index.html`
+  - Added **Agents** nav button.
+  - Added `tab-agents` page shell with controls, filters, feed, and detail panel.
+  - Added graph filter option for anomaly layer.
+
+- `electron-ui/styles.css`
+  - Added Agents tab styles (`agents-*`, `status-chip`, feed cards, detail panel).
+
+- `electron-ui/preload.js`
+  - Added `agents*` API bridge methods.
+  - Added event listeners for `agent-status/event/error/complete`.
+
+- `electron-ui/main.js`
+  - Added background agent runtime management (`activeAgentRun`).
+  - Added stream parser for monitor stdout markers (`[AGENT_STATUS]`, etc.).
+  - Added full `agents:*` IPC handlers.
+  - Added graceful stop handling on app shutdown.
+
+- `electron-ui/renderer.js`
+  - Added Agents tab state management.
+  - Added start/stop/refresh/cleanup/ack handlers.
+  - Added realtime feed updates from agent channels.
+  - Added event detail rendering and graph drill-down action.
+
+### Python backend
+
+- `scripts/anomaly_rules.py` (new)
+  - Deterministic scoring logic (`z`, `MAD`, rate, drift trend, flatline).
+  - Quality/staleness helpers and dedup key generator.
+
+- `scripts/anomaly_monitor.py` (new)
+  - Long-running monitoring worker with CLI subcommands:
+    - `run`, `status`, `list-events`, `get-event`, `ack-event`, `cleanup`, `replay-fixtures`.
+  - Neo4j persistence + dedup + retention cleanup.
+  - Optional LLM triage with structured JSON fallback.
+
+- `scripts/ignition_api_client.py`
+  - Added `query_tag_history(...)` and local-time-to-UTC conversion helper.
+
+- `scripts/neo4j_ontology.py`
+  - Added monitoring schema constraints/indexes for `AgentRun` / `AnomalyEvent`.
+  - Added helper methods: list/get/cleanup anomaly events.
+  - Added CLI commands:
+    - `init-agent-schema`
+    - `list-anomaly-events`
+    - `get-anomaly-event`
+    - `cleanup-anomaly-events`
+
+- `scripts/graph_api.py`
+  - Added node groups/colors for `AgentRun` and `AnomalyEvent`.
+  - Extended neighbor center-node lookup to support `event_id` and `run_id`.
+
+### Fixtures
+
+- `scripts/fixtures/anomaly_replay_cases.json` (new)
+  - Deterministic replay cases:
+    - normal baseline,
+    - sudden spike,
+    - slow drift,
+    - flatline/stuck.
+
+## Runtime Commands
+
+### Deterministic replay validation
+
+```bash
+python3 scripts/anomaly_monitor.py replay-fixtures --fixture-file scripts/fixtures/anomaly_replay_cases.json
+```
+
+### Monitor worker manual run
+
+```bash
+python3 scripts/anomaly_monitor.py run --run-id demo-run --config-json '{"pollIntervalMs":15000}'
+```
+
+### Event operations
+
+```bash
+python3 scripts/anomaly_monitor.py list-events --limit 50
+python3 scripts/anomaly_monitor.py get-event --event-id <event_id>
+python3 scripts/anomaly_monitor.py ack-event --event-id <event_id> --note "Reviewed by operator"
+python3 scripts/anomaly_monitor.py cleanup --retention-days 14
+```
+
+## Known Environment Requirements
+
+The Python environment must include packages from `requirements.txt`:
+
+- `neo4j`
+- `anthropic` (for LLM triage; deterministic fallback works without API key)
+- `python-dotenv`
+- `requests`
+
+If `ANTHROPIC_API_KEY` is absent, triage automatically falls back to deterministic explanations.
+
+## Validation Status
+
+- Syntax checks passed:
+  - Python (`py_compile`) for all modified scripts.
+  - JS syntax checks (`node --check`) for Electron files.
+- Fixture replay passed:
+  - `4/4` deterministic scenarios.
+
+Live end-to-end validation against actual Ignition + Neo4j + Anthropic requires connected runtime services.
+
diff --git a/electron-ui/index.html b/electron-ui/index.html
index 03b808e..7e5e8a7 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -36,6 +36,13 @@
           </svg>
           <span class="nav-label">Assist</span>
         </button>
+        <button class="nav-btn" data-tab="agents" title="Long-Running Agents">
+          <svg class="nav-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+            <path d="M4 5h7v7H4zM13 5h7v7h-7zM4 14h7v7H4z"/>
+            <path d="M16.5 14.5l1.5 1.5 3-3"/>
+          </svg>
+          <span class="nav-label">Agents</span>
+        </button>
         <button class="nav-btn" data-tab="database" title="Database">
           <svg class="nav-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
             <ellipse cx="12" cy="5" rx="9" ry="3"/>
@@ -532,6 +539,91 @@ <h2>Troubleshooting Assistant</h2>
         </div>
       </section>
 
+      <!-- Agents Tab -->
+      <section class="tab-content" id="tab-agents">
+        <header class="tab-header">
+          <h2>Long-Running Agents</h2>
+          <p>Continuously monitor live process data and triage anomalies with ontology context</p>
+        </header>
+
+        <div class="agents-topbar">
+          <div class="agents-run-controls">
+            <button class="btn btn-primary" id="btn-agents-start">Start Monitoring</button>
+            <button class="btn btn-secondary" id="btn-agents-stop" disabled>Stop</button>
+            <button class="btn btn-ghost" id="btn-agents-refresh">Refresh Events</button>
+            <button class="btn btn-ghost" id="btn-agents-cleanup">Cleanup Old</button>
+          </div>
+          <div class="agents-run-status">
+            <span class="status-chip" id="agents-status-chip">Idle</span>
+            <span class="agents-status-text" id="agents-status-text">No active run</span>
+          </div>
+        </div>
+
+        <div class="agents-config-row">
+          <label>Poll (ms)</label>
+          <input class="input input-sm" id="agents-config-poll-ms" type="number" min="5000" step="1000" value="15000">
+          <label>History (min)</label>
+          <input class="input input-sm" id="agents-config-history-min" type="number" min="10" step="10" value="360">
+          <label>Min Points</label>
+          <input class="input input-sm" id="agents-config-min-points" type="number" min="10" step="5" value="30">
+          <label>Max LLM/Cycle</label>
+          <input class="input input-sm" id="agents-config-max-llm" type="number" min="0" step="1" value="5">
+          <label>Z</label>
+          <input class="input input-sm" id="agents-config-threshold-z" type="number" min="0.5" step="0.5" value="3">
+          <label>MAD</label>
+          <input class="input input-sm" id="agents-config-threshold-mad" type="number" min="0.5" step="0.5" value="3.5">
+          <label>Stale (sec)</label>
+          <input class="input input-sm" id="agents-config-staleness-sec" type="number" min="10" step="10" value="120">
+        </div>
+
+        <div class="agents-metrics-row">
+          <div class="metric-card"><span class="metric-label">Cycle (ms)</span><span class="metric-value" id="agents-metric-cycle">0</span></div>
+          <div class="metric-card"><span class="metric-label">Candidates</span><span class="metric-value" id="agents-metric-candidates">0</span></div>
+          <div class="metric-card"><span class="metric-label">Triaged</span><span class="metric-value" id="agents-metric-triaged">0</span></div>
+          <div class="metric-card"><span class="metric-label">Emitted</span><span class="metric-value" id="agents-metric-emitted">0</span></div>
+          <div class="metric-card"><span class="metric-label">Last heartbeat</span><span class="metric-value" id="agents-metric-heartbeat">n/a</span></div>
+        </div>
+
+        <div class="agents-main">
+          <aside class="agents-feed-panel">
+            <div class="agents-feed-header">
+              <h3>Anomaly Feed</h3>
+              <div class="agents-feed-filters">
+                <select class="input input-sm" id="agents-filter-state">
+                  <option value="">All states</option>
+                  <option value="open">Open</option>
+                  <option value="acknowledged">Acknowledged</option>
+                </select>
+                <select class="input input-sm" id="agents-filter-severity">
+                  <option value="">All severity</option>
+                  <option value="critical">Critical</option>
+                  <option value="high">High</option>
+                  <option value="medium">Medium</option>
+                  <option value="low">Low</option>
+                </select>
+                <input class="input input-sm" id="agents-filter-search" placeholder="Search tag/equipment">
+              </div>
+            </div>
+            <div class="agents-event-list" id="agents-event-list">
+              <div class="agents-empty">No anomaly events yet.</div>
+            </div>
+          </aside>
+
+          <section class="agents-detail-panel">
+            <div class="agents-detail-header">
+              <h3>Event Details</h3>
+              <div class="agents-detail-actions">
+                <button class="btn btn-sm btn-secondary" id="btn-agents-open-graph" disabled>Open in Graph</button>
+                <button class="btn btn-sm btn-ghost" id="btn-agents-ack" disabled>Acknowledge</button>
+              </div>
+            </div>
+            <div class="agents-detail-content" id="agents-event-detail">
+              <p class="text-muted">Select an anomaly event from the feed.</p>
+            </div>
+          </section>
+        </div>
+      </section>
+
       <!-- Database Tab -->
       <section class="tab-content" id="tab-database">
         <header class="tab-header">
@@ -630,6 +722,7 @@ <h2>Ontology Graph</h2>
                   <option value="siemens-hmi">Siemens HMI</option>
                   <option value="mes">MES Layer</option>
                   <option value="troubleshooting">Troubleshooting</option>
+                  <option value="anomaly">Anomalies</option>
                   <option value="flows">Flows</option>
                 </select>
               </div>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index b5cdb4d..e215fb4 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -4,6 +4,7 @@ const fs = require('fs');
 const { spawn } = require('child_process');
 
 let mainWindow;
+let activeAgentRun = null;
 
 // ---------------------------------------------------------------------------
 // Python backend configuration  (works in both dev and packaged modes)
@@ -103,6 +104,16 @@ app.on('window-all-closed', () => {
   }
 });
 
+app.on('before-quit', () => {
+  if (activeAgentRun && activeAgentRun.process && !activeAgentRun.process.killed) {
+    try {
+      activeAgentRun.process.kill('SIGTERM');
+    } catch (err) {
+      // Ignore termination errors during shutdown.
+    }
+  }
+});
+
 app.on('activate', () => {
   if (BrowserWindow.getAllWindows().length === 0) {
     createWindow();
@@ -185,6 +196,132 @@ function runPythonScript(scriptName, args = [], options = {}) {
   });
 }
 
+function normalizeAgentConfig(config = {}) {
+  const thresholds = (config && typeof config.thresholds === 'object' && config.thresholds) || {};
+  const scope = (config && typeof config.scope === 'object' && config.scope) || {};
+  return {
+    pollIntervalMs: Math.max(5000, Number(config.pollIntervalMs || 15000)),
+    historyWindowMinutes: Math.max(10, Number(config.historyWindowMinutes || 360)),
+    minHistoryPoints: Math.max(10, Number(config.minHistoryPoints || 30)),
+    maxMonitoredTags: Math.max(10, Number(config.maxMonitoredTags || 200)),
+    maxCandidatesPerCycle: Math.max(1, Number(config.maxCandidatesPerCycle || 25)),
+    maxLlmTriagesPerCycle: Math.max(0, Number(config.maxLlmTriagesPerCycle || 5)),
+    dedupCooldownMinutes: Math.max(1, Number(config.dedupCooldownMinutes || 10)),
+    retentionDays: Math.max(1, Number(config.retentionDays || 14)),
+    cleanupEveryCycles: Math.max(1, Number(config.cleanupEveryCycles || 40)),
+    thresholds: {
+      z: Number(thresholds.z ?? 3.0),
+      mad: Number(thresholds.mad ?? 3.5),
+      rate: Number(thresholds.rate ?? 0.0),
+      stalenessSec: Number(thresholds.stalenessSec ?? 120),
+      flatline_std_epsilon: Number(thresholds.flatline_std_epsilon ?? 1e-6),
+      stuck_window_size: Number(thresholds.stuck_window_size ?? 20),
+    },
+    scope: {
+      project: scope.project || null,
+      equipmentTags: Array.isArray(scope.equipmentTags) ? scope.equipmentTags : [],
+      tagRegex: scope.tagRegex || null,
+    },
+  };
+}
+
+function routeAgentMessage(channel, payload) {
+  if (mainWindow) {
+    mainWindow.webContents.send(channel, payload);
+  }
+}
+
+function parseAgentLine(line) {
+  const trimmed = (line || '').trim();
+  if (!trimmed) return null;
+  const prefixes = [
+    { key: '[AGENT_STATUS]', channel: 'agent-status' },
+    { key: '[AGENT_EVENT]', channel: 'agent-event' },
+    { key: '[AGENT_ERROR]', channel: 'agent-error' },
+    { key: '[AGENT_COMPLETE]', channel: 'agent-complete' },
+  ];
+  for (const prefix of prefixes) {
+    if (!trimmed.startsWith(prefix.key)) continue;
+    const jsonText = trimmed.slice(prefix.key.length).trim();
+    try {
+      const payload = JSON.parse(jsonText);
+      return { channel: prefix.channel, payload };
+    } catch (err) {
+      return {
+        channel: 'agent-error',
+        payload: {
+          runId: activeAgentRun ? activeAgentRun.runId : null,
+          code: 'invalid_agent_json',
+          message: `Failed to parse agent stream line: ${trimmed.slice(0, 200)}`,
+          recoverable: true,
+          timestamp: new Date().toISOString(),
+        },
+      };
+    }
+  }
+  return null;
+}
+
+function handleAgentStdoutChunk(text) {
+  if (!activeAgentRun) return;
+  activeAgentRun.stdoutBuffer += text;
+  const lines = activeAgentRun.stdoutBuffer.split(/\r?\n/);
+  activeAgentRun.stdoutBuffer = lines.pop() || '';
+  for (const line of lines) {
+    const parsed = parseAgentLine(line);
+    if (!parsed) continue;
+    if (parsed.channel === 'agent-status' && parsed.payload) {
+      activeAgentRun.status = parsed.payload.state || activeAgentRun.status;
+      activeAgentRun.metrics = {
+        cycleMs: parsed.payload.cycleMs || 0,
+        candidates: parsed.payload.candidates || 0,
+        triaged: parsed.payload.triaged || 0,
+        emitted: parsed.payload.emitted || 0,
+        timestamp: parsed.payload.timestamp || new Date().toISOString(),
+      };
+    }
+    routeAgentMessage(parsed.channel, parsed.payload);
+  }
+}
+
+async function stopActiveAgent(reason = 'stopped_by_user') {
+  if (!activeAgentRun || !activeAgentRun.process || activeAgentRun.process.killed) {
+    return { success: false, error: 'No active agent run' };
+  }
+  const runId = activeAgentRun.runId;
+  activeAgentRun.status = 'stopping';
+
+  return new Promise((resolve) => {
+    const proc = activeAgentRun.process;
+    let settled = false;
+    const done = (result) => {
+      if (settled) return;
+      settled = true;
+      resolve(result);
+    };
+
+    proc.once('close', () => {
+      done({ success: true, runId, stoppedAt: new Date().toISOString(), reason });
+    });
+
+    try {
+      proc.kill('SIGTERM');
+    } catch (err) {
+      done({ success: false, error: err.message });
+      return;
+    }
+
+    setTimeout(() => {
+      if (proc.killed) return;
+      try {
+        proc.kill('SIGKILL');
+      } catch (err) {
+        // Ignore forced termination errors.
+      }
+    }, 5000);
+  });
+}
+
 // IPC Handlers
 
 // Select file dialog
@@ -1304,7 +1441,9 @@ function readDbCredentials() {
   if (!fs.existsSync(credPath)) return {};
   try {
     return JSON.parse(fs.readFileSync(credPath, 'utf-8'));
-  } catch { return {}; }
+  } catch {
+    return {};
+  }
 }
 
 // Get database connections from Neo4j + credential status from db_credentials.json
@@ -1314,10 +1453,8 @@ ipcMain.handle('get-db-connections', async () => {
       const proc = spawnPythonProcess('neo4j_ontology.py', ['db-connections', '--json']);
 
       let stdout = '';
-      let stderr = '';
 
       proc.stdout.on('data', (data) => { stdout += data.toString(); });
-      proc.stderr.on('data', (data) => { stderr += data.toString(); });
 
       proc.on('close', (code) => {
         if (code !== 0) {
@@ -1335,7 +1472,7 @@ ipcMain.handle('get-db-connections', async () => {
           }));
 
           resolve({ success: true, connections: enriched });
-        } catch (e) {
+        } catch {
           resolve({ success: true, connections: [] });
         }
       });
@@ -1349,7 +1486,7 @@ ipcMain.handle('get-db-connections', async () => {
 ipcMain.handle('save-db-credentials', async (event, credentials) => {
   try {
     const credPath = getDbCredentialsPath();
-    let existing = readDbCredentials();
+    const existing = readDbCredentials();
 
     for (const [name, cred] of Object.entries(credentials)) {
       existing[name] = {
@@ -1392,4 +1529,173 @@ ipcMain.handle('test-db-connection', async (event, connectionName) => {
   } catch (error) {
     return { success: false, error: error.message };
   }
+});
+
+// ============================================
+// Long-running Agent Monitoring IPC Handlers
+// ============================================
+
+ipcMain.handle('agents:start', async (event, rawConfig = {}) => {
+  if (activeAgentRun && activeAgentRun.process && !activeAgentRun.process.killed) {
+    return { success: false, error: `Agent run already active: ${activeAgentRun.runId}`, runId: activeAgentRun.runId };
+  }
+
+  const runId = `agent-${Date.now()}`;
+  const config = normalizeAgentConfig(rawConfig);
+
+  try {
+    const proc = spawnPythonProcess('anomaly_monitor.py', [
+      'run',
+      '--run-id',
+      runId,
+      '--config-json',
+      JSON.stringify(config),
+    ]);
+
+    activeAgentRun = {
+      runId,
+      process: proc,
+      status: 'starting',
+      startedAt: new Date().toISOString(),
+      metrics: {
+        cycleMs: 0,
+        candidates: 0,
+        triaged: 0,
+        emitted: 0,
+        timestamp: new Date().toISOString(),
+      },
+      stdoutBuffer: '',
+      config,
+    };
+
+    proc.stdout.on('data', (data) => {
+      handleAgentStdoutChunk(data.toString());
+    });
+
+    proc.stderr.on('data', (data) => {
+      const text = data.toString().trim();
+      if (!text) return;
+      routeAgentMessage('agent-error', {
+        runId,
+        code: 'worker_stderr',
+        message: text,
+        recoverable: true,
+        timestamp: new Date().toISOString(),
+      });
+    });
+
+    proc.on('close', (code) => {
+      const hadActive = activeAgentRun && activeAgentRun.runId === runId;
+      if (hadActive) {
+        routeAgentMessage('agent-complete', {
+          runId,
+          success: code === 0,
+          reason: code === 0 ? 'completed' : 'worker_exit_error',
+          stoppedAt: new Date().toISOString(),
+        });
+        activeAgentRun = null;
+      }
+    });
+
+    proc.on('error', (err) => {
+      routeAgentMessage('agent-error', {
+        runId,
+        code: 'worker_spawn_error',
+        message: err.message,
+        recoverable: false,
+        timestamp: new Date().toISOString(),
+      });
+      activeAgentRun = null;
+    });
+
+    return { success: true, runId, startedAt: activeAgentRun.startedAt, config };
+  } catch (error) {
+    activeAgentRun = null;
+    return { success: false, error: error.message, runId };
+  }
+});
+
+ipcMain.handle('agents:status', async (event, runId) => {
+  if (activeAgentRun && (!runId || runId === activeAgentRun.runId)) {
+    return {
+      success: true,
+      runId: activeAgentRun.runId,
+      status: activeAgentRun.status,
+      metrics: activeAgentRun.metrics,
+      lastHeartbeatAt: activeAgentRun.metrics.timestamp,
+      startedAt: activeAgentRun.startedAt,
+      config: activeAgentRun.config,
+      active: true,
+    };
+  }
+
+  if (!runId) {
+    return { success: true, active: false, status: 'idle' };
+  }
+
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', ['status', '--run-id', runId]);
+    const parsed = JSON.parse(output || '{}');
+    return parsed;
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
+ipcMain.handle('agents:stop', async (event, runId = null) => {
+  if (!activeAgentRun) {
+    return { success: false, error: 'No active agent run' };
+  }
+  if (runId && runId !== activeAgentRun.runId) {
+    return { success: false, error: `Requested run ${runId} does not match active run ${activeAgentRun.runId}` };
+  }
+  return stopActiveAgent('stopped_by_user');
+});
+
+ipcMain.handle('agents:list-events', async (event, filters = {}) => {
+  const args = ['list-events'];
+  if (filters.limit) args.push('--limit', String(filters.limit));
+  if (filters.state) args.push('--state', String(filters.state));
+  if (filters.severity) args.push('--severity', String(filters.severity));
+  if (filters.runId) args.push('--run-id', String(filters.runId));
+
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', args);
+    return JSON.parse(output || '{"success":true,"events":[]}');
+  } catch (error) {
+    return { success: false, error: error.message, events: [] };
+  }
+});
+
+ipcMain.handle('agents:get-event', async (event, eventId) => {
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', ['get-event', '--event-id', String(eventId)]);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
+ipcMain.handle('agents:ack-event', async (event, eventId, note = '') => {
+  try {
+    const args = ['ack-event', '--event-id', String(eventId)];
+    if (note) args.push('--note', String(note));
+    const output = await runPythonScript('anomaly_monitor.py', args);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
+ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', [
+      'cleanup',
+      '--retention-days',
+      String(retentionDays),
+    ]);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
 });
\ No newline at end of file
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index d3c8171..1e0930c 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -70,6 +70,15 @@ contextBridge.exposeInMainWorld('api', {
   getSettings: () => ipcRenderer.invoke('get-settings'),
   saveSettings: (settings) => ipcRenderer.invoke('save-settings', settings),
   testIgnitionConnection: (options) => ipcRenderer.invoke('test-ignition-connection', options),
+
+  // Long-running agents monitoring
+  agentsStart: (config) => ipcRenderer.invoke('agents:start', config),
+  agentsStatus: (runId) => ipcRenderer.invoke('agents:status', runId),
+  agentsStop: (runId) => ipcRenderer.invoke('agents:stop', runId),
+  agentsListEvents: (filters) => ipcRenderer.invoke('agents:list-events', filters),
+  agentsGetEvent: (eventId) => ipcRenderer.invoke('agents:get-event', eventId),
+  agentsAckEvent: (eventId, note) => ipcRenderer.invoke('agents:ack-event', eventId, note),
+  agentsCleanup: (retentionDays) => ipcRenderer.invoke('agents:cleanup', retentionDays),
   
   // Database connections
   getDbConnections: () => ipcRenderer.invoke('get-db-connections'),
@@ -91,6 +100,26 @@ contextBridge.exposeInMainWorld('api', {
     const handler = (event, data) => callback(data);
     ipcRenderer.on('stream-complete', handler);
     return () => ipcRenderer.removeListener('stream-complete', handler);
+  },
+  onAgentStatus: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-status', handler);
+    return () => ipcRenderer.removeListener('agent-status', handler);
+  },
+  onAgentEvent: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-event', handler);
+    return () => ipcRenderer.removeListener('agent-event', handler);
+  },
+  onAgentError: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-error', handler);
+    return () => ipcRenderer.removeListener('agent-error', handler);
+  },
+  onAgentComplete: (callback) => {
+    const handler = (event, data) => callback(data);
+    ipcRenderer.on('agent-complete', handler);
+    return () => ipcRenderer.removeListener('agent-complete', handler);
   }
 });
 
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 53974f5..cab7e8b 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3536,6 +3536,381 @@ btnSaveDbCreds?.addEventListener('click', async () => {
     btnSaveDbCreds.disabled = false;
   }
 });
+// Agents Tab - Long-running monitoring
+// ============================================
+
+const agentsState = {
+  runId: null,
+  status: 'idle',
+  events: [],
+  selectedEventId: null,
+  listenersReady: false,
+};
+
+function getAgentsElements() {
+  return {
+    btnStart: document.getElementById('btn-agents-start'),
+    btnStop: document.getElementById('btn-agents-stop'),
+    btnRefresh: document.getElementById('btn-agents-refresh'),
+    btnCleanup: document.getElementById('btn-agents-cleanup'),
+    btnOpenGraph: document.getElementById('btn-agents-open-graph'),
+    btnAck: document.getElementById('btn-agents-ack'),
+    statusChip: document.getElementById('agents-status-chip'),
+    statusText: document.getElementById('agents-status-text'),
+    list: document.getElementById('agents-event-list'),
+    detail: document.getElementById('agents-event-detail'),
+    filterState: document.getElementById('agents-filter-state'),
+    filterSeverity: document.getElementById('agents-filter-severity'),
+    filterSearch: document.getElementById('agents-filter-search'),
+    metricCycle: document.getElementById('agents-metric-cycle'),
+    metricCandidates: document.getElementById('agents-metric-candidates'),
+    metricTriaged: document.getElementById('agents-metric-triaged'),
+    metricEmitted: document.getElementById('agents-metric-emitted'),
+    metricHeartbeat: document.getElementById('agents-metric-heartbeat'),
+    cfgPoll: document.getElementById('agents-config-poll-ms'),
+    cfgHist: document.getElementById('agents-config-history-min'),
+    cfgPoints: document.getElementById('agents-config-min-points'),
+    cfgMaxLlm: document.getElementById('agents-config-max-llm'),
+    cfgZ: document.getElementById('agents-config-threshold-z'),
+    cfgMad: document.getElementById('agents-config-threshold-mad'),
+    cfgStale: document.getElementById('agents-config-staleness-sec'),
+  };
+}
+
+function getAgentsConfigFromUI() {
+  const el = getAgentsElements();
+  return {
+    pollIntervalMs: Number(el.cfgPoll?.value || 15000),
+    historyWindowMinutes: Number(el.cfgHist?.value || 360),
+    minHistoryPoints: Number(el.cfgPoints?.value || 30),
+    maxLlmTriagesPerCycle: Number(el.cfgMaxLlm?.value || 5),
+    thresholds: {
+      z: Number(el.cfgZ?.value || 3),
+      mad: Number(el.cfgMad?.value || 3.5),
+      stalenessSec: Number(el.cfgStale?.value || 120),
+    },
+  };
+}
+
+function formatAgentTime(ts) {
+  if (!ts) return 'n/a';
+  const d = new Date(ts);
+  if (Number.isNaN(d.getTime())) return String(ts);
+  return d.toLocaleString();
+}
+
+function updateAgentStatusUi(status, text) {
+  const el = getAgentsElements();
+  if (!el.statusChip || !el.statusText) return;
+  el.statusChip.className = 'status-chip';
+  const normalized = (status || 'idle').toLowerCase();
+  if (normalized === 'running') el.statusChip.classList.add('running');
+  if (normalized === 'failed' || normalized === 'error') el.statusChip.classList.add('error');
+  el.statusChip.textContent = normalized;
+  el.statusText.textContent = text || normalized;
+  if (el.btnStart) el.btnStart.disabled = normalized === 'running' || normalized === 'starting';
+  if (el.btnStop) el.btnStop.disabled = !(normalized === 'running' || normalized === 'starting' || normalized === 'stopping');
+}
+
+function updateAgentMetrics(metrics = {}, heartbeatTs = null) {
+  const el = getAgentsElements();
+  if (el.metricCycle) el.metricCycle.textContent = String(metrics.cycleMs ?? metrics.lastCycleMs ?? 0);
+  if (el.metricCandidates) el.metricCandidates.textContent = String(metrics.candidates ?? metrics.lastCandidates ?? 0);
+  if (el.metricTriaged) el.metricTriaged.textContent = String(metrics.triaged ?? metrics.lastTriaged ?? 0);
+  if (el.metricEmitted) el.metricEmitted.textContent = String(metrics.emitted ?? metrics.lastEmitted ?? 0);
+  if (el.metricHeartbeat) el.metricHeartbeat.textContent = formatAgentTime(heartbeatTs || metrics.timestamp);
+}
+
+function getFilteredAgentEvents() {
+  const el = getAgentsElements();
+  const state = (el.filterState?.value || '').toLowerCase();
+  const severity = (el.filterSeverity?.value || '').toLowerCase();
+  const search = (el.filterSearch?.value || '').trim().toLowerCase();
+  return agentsState.events.filter((event) => {
+    if (state && String(event.state || '').toLowerCase() !== state) return false;
+    if (severity && String(event.severity || '').toLowerCase() !== severity) return false;
+    if (search) {
+      const haystack = [
+        event.summary,
+        event.source_tag,
+        event.tag_name,
+        ...(event.equipment || []),
+        ...(event.tags || []),
+      ]
+        .filter(Boolean)
+        .join(' ')
+        .toLowerCase();
+      if (!haystack.includes(search)) return false;
+    }
+    return true;
+  });
+}
+
+function renderAgentEventList() {
+  const el = getAgentsElements();
+  if (!el.list) return;
+  const events = getFilteredAgentEvents();
+  if (!events.length) {
+    el.list.innerHTML = '<div class="agents-empty">No anomaly events match the current filters.</div>';
+    return;
+  }
+  el.list.innerHTML = events
+    .map((event) => {
+      const active = event.event_id === agentsState.selectedEventId ? ' active' : '';
+      const sev = String(event.severity || 'low').toLowerCase();
+      const equipment = (event.equipment || []).slice(0, 2).join(', ');
+      return `
+        <div class="agents-event-card${active}" data-event-id="${escapeHtml(event.event_id || '')}">
+          <div class="agents-event-line-top">
+            <span class="agents-severity sev-${escapeHtml(sev)}">${escapeHtml(sev)}</span>
+            <span class="agents-event-time">${escapeHtml(formatAgentTime(event.created_at))}</span>
+          </div>
+          <div class="agents-event-summary">${escapeHtml(event.summary || 'Untitled anomaly')}</div>
+          <div class="agents-event-meta">${escapeHtml(event.tag_name || event.source_tag || '')}${equipment ? ` • ${escapeHtml(equipment)}` : ''}</div>
+        </div>
+      `;
+    })
+    .join('');
+
+  el.list.querySelectorAll('.agents-event-card').forEach((card) => {
+    card.addEventListener('click', () => {
+      const eventId = card.getAttribute('data-event-id');
+      if (!eventId) return;
+      selectAgentEvent(eventId);
+    });
+  });
+}
+
+function resolveAgentGraphTarget(event) {
+  const equipment = (event.equipment || []).find(Boolean);
+  if (equipment) return { name: equipment, type: 'Equipment' };
+  const tagName = event.tag_name || (event.tags || []).find(Boolean) || event.source_tag;
+  if (tagName) return { name: tagName, type: 'ScadaTag' };
+  return null;
+}
+
+function renderAgentEventDetails(event) {
+  const el = getAgentsElements();
+  if (!el.detail) return;
+  if (!event) {
+    el.detail.innerHTML = '<p class="text-muted">Select an anomaly event from the feed.</p>';
+    if (el.btnOpenGraph) el.btnOpenGraph.disabled = true;
+    if (el.btnAck) el.btnAck.disabled = true;
+    return;
+  }
+
+  let checks = [];
+  let causes = [];
+  let safety = [];
+  try { checks = JSON.parse(event.recommended_checks_json || '[]'); } catch (e) {}
+  try { causes = JSON.parse(event.probable_causes_json || '[]'); } catch (e) {}
+  try { safety = JSON.parse(event.safety_notes_json || '[]'); } catch (e) {}
+
+  el.detail.innerHTML = `
+    <div class="agents-detail-grid">
+      <div class="agents-detail-item"><span class="agents-detail-label">Event ID</span><span class="agents-detail-value">${escapeHtml(event.event_id || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">State</span><span class="agents-detail-value">${escapeHtml(event.state || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Severity</span><span class="agents-detail-value">${escapeHtml(event.severity || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Confidence</span><span class="agents-detail-value">${escapeHtml(String(event.confidence ?? ''))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Category</span><span class="agents-detail-value">${escapeHtml(event.category || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Timestamp</span><span class="agents-detail-value">${escapeHtml(formatAgentTime(event.created_at))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Source Tag</span><span class="agents-detail-value">${escapeHtml(event.source_tag || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Tag Name</span><span class="agents-detail-value">${escapeHtml(event.tag_name || '')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">z-score</span><span class="agents-detail-value">${escapeHtml(String(event.z_score ?? '0'))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">MAD score</span><span class="agents-detail-value">${escapeHtml(String(event.mad_score ?? '0'))}</span></div>
+    </div>
+    <div>
+      <div class="agents-detail-label">Summary</div>
+      <div>${escapeHtml(event.summary || '')}</div>
+    </div>
+    <div>
+      <div class="agents-detail-label">Explanation</div>
+      <div>${escapeHtml(event.explanation || '')}</div>
+    </div>
+    <div>
+      <div class="agents-detail-label">Probable Causes</div>
+      <ul class="agents-list">${(causes || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
+    </div>
+    <div>
+      <div class="agents-detail-label">Verification Checks</div>
+      <ul class="agents-list">${(checks || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
+    </div>
+    <div>
+      <div class="agents-detail-label">Safety Notes</div>
+      <ul class="agents-list">${(safety || []).map((x) => `<li>${escapeHtml(String(x))}</li>`).join('') || '<li>n/a</li>'}</ul>
+    </div>
+  `;
+
+  if (el.btnOpenGraph) el.btnOpenGraph.disabled = !resolveAgentGraphTarget(event);
+  if (el.btnAck) el.btnAck.disabled = event.state === 'acknowledged';
+}
+
+async function selectAgentEvent(eventId) {
+  agentsState.selectedEventId = eventId;
+  const existing = agentsState.events.find((e) => e.event_id === eventId);
+  if (existing && existing.explanation && existing.recommended_checks_json) {
+    renderAgentEventList();
+    renderAgentEventDetails(existing);
+    return;
+  }
+  const detailResult = await window.api.agentsGetEvent(eventId);
+  if (detailResult.success && detailResult.event) {
+    const idx = agentsState.events.findIndex((e) => e.event_id === eventId);
+    if (idx >= 0) {
+      agentsState.events[idx] = { ...agentsState.events[idx], ...detailResult.event };
+    } else {
+      agentsState.events.unshift(detailResult.event);
+    }
+    renderAgentEventList();
+    renderAgentEventDetails(detailResult.event);
+  }
+}
+
+async function loadAgentEvents() {
+  const el = getAgentsElements();
+  const result = await window.api.agentsListEvents({
+    limit: 200,
+    state: el.filterState?.value || undefined,
+    severity: el.filterSeverity?.value || undefined,
+    runId: agentsState.runId || undefined,
+  });
+  if (!result.success) return;
+  agentsState.events = Array.isArray(result.events) ? result.events : [];
+  renderAgentEventList();
+
+  if (agentsState.selectedEventId) {
+    const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+    renderAgentEventDetails(selected || null);
+  }
+}
+
+async function refreshAgentStatus() {
+  const status = await window.api.agentsStatus(agentsState.runId || undefined);
+  if (!status.success) {
+    updateAgentStatusUi('error', status.error || 'Failed to fetch status');
+    return;
+  }
+  if (status.active) {
+    agentsState.runId = status.runId || agentsState.runId;
+    agentsState.status = status.status || 'running';
+    updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId}`);
+    updateAgentMetrics(status.metrics || {}, status.lastHeartbeatAt);
+  } else {
+    agentsState.status = 'idle';
+    updateAgentStatusUi('idle', 'No active run');
+  }
+}
+
+async function startAgentsMonitoring() {
+  const config = getAgentsConfigFromUI();
+  const result = await window.api.agentsStart(config);
+  if (!result.success) {
+    updateAgentStatusUi('error', result.error || 'Failed to start monitoring');
+    return;
+  }
+  agentsState.runId = result.runId;
+  agentsState.status = 'running';
+  updateAgentStatusUi('running', `Run ${result.runId}`);
+  await loadAgentEvents();
+}
+
+async function stopAgentsMonitoring() {
+  const result = await window.api.agentsStop(agentsState.runId || undefined);
+  if (!result.success) {
+    updateAgentStatusUi('error', result.error || 'Failed to stop monitoring');
+    return;
+  }
+  agentsState.status = 'stopped';
+  updateAgentStatusUi('stopped', 'Monitoring stopped');
+}
+
+async function acknowledgeSelectedAgentEvent() {
+  if (!agentsState.selectedEventId) return;
+  const result = await window.api.agentsAckEvent(agentsState.selectedEventId, '');
+  if (!result.success) return;
+  await loadAgentEvents();
+  const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+  renderAgentEventDetails(selected || null);
+}
+
+function upsertRealtimeAgentEvent(payload) {
+  if (!payload || !payload.eventId) return;
+  const idx = agentsState.events.findIndex((e) => e.event_id === payload.eventId);
+  const next = {
+    event_id: payload.eventId,
+    severity: payload.severity || 'medium',
+    summary: payload.summary || 'Anomaly detected',
+    category: payload.category || 'deviation',
+    created_at: payload.createdAt || new Date().toISOString(),
+    source_tag: payload.entityRefs?.sourceTag || payload.entityRefs?.tag || '',
+    tag_name: payload.entityRefs?.tag || '',
+    state: 'open',
+  };
+  if (idx >= 0) {
+    agentsState.events[idx] = { ...agentsState.events[idx], ...next };
+  } else {
+    agentsState.events.unshift(next);
+  }
+  renderAgentEventList();
+}
+
+function ensureAgentListeners() {
+  if (agentsState.listenersReady) return;
+  agentsState.listenersReady = true;
+
+  window.api.onAgentStatus((payload) => {
+    if (!payload) return;
+    if (payload.runId) agentsState.runId = payload.runId;
+    agentsState.status = payload.state || agentsState.status;
+    updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
+    updateAgentMetrics(payload, payload.timestamp);
+  });
+
+  window.api.onAgentEvent((payload) => {
+    upsertRealtimeAgentEvent(payload);
+  });
+
+  window.api.onAgentError((payload) => {
+    if (!payload) return;
+    updateAgentStatusUi('error', payload.message || 'Agent runtime error');
+  });
+
+  window.api.onAgentComplete((payload) => {
+    if (!payload) return;
+    agentsState.status = payload.success ? 'stopped' : 'failed';
+    updateAgentStatusUi(agentsState.status, payload.reason || 'Run complete');
+    refreshAgentStatus();
+  });
+}
+
+function initAgentsTab() {
+  ensureAgentListeners();
+  const el = getAgentsElements();
+  if (!el.btnStart) return;
+  if (!el.btnStart.dataset.bound) {
+    el.btnStart.dataset.bound = '1';
+    el.btnStart.addEventListener('click', startAgentsMonitoring);
+    el.btnStop?.addEventListener('click', stopAgentsMonitoring);
+    el.btnRefresh?.addEventListener('click', loadAgentEvents);
+    el.btnCleanup?.addEventListener('click', async () => {
+      await window.api.agentsCleanup(14);
+      await loadAgentEvents();
+    });
+    el.btnAck?.addEventListener('click', acknowledgeSelectedAgentEvent);
+    el.btnOpenGraph?.addEventListener('click', () => {
+      const event = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+      if (!event) return;
+      const target = resolveAgentGraphTarget(event);
+      if (!target) return;
+      openGraphModal(target.name, target.type, event.summary || target.name);
+    });
+    el.filterState?.addEventListener('change', loadAgentEvents);
+    el.filterSeverity?.addEventListener('change', loadAgentEvents);
+    el.filterSearch?.addEventListener('input', renderAgentEventList);
+  }
+  refreshAgentStatus();
+  loadAgentEvents();
+}
 
 // Initialize graph tab when it's first shown
 navButtons.forEach(btn => {
@@ -3558,6 +3933,9 @@ navButtons.forEach(btn => {
       loadSettings();
       loadDbConnections();
     }
+    if (btn.dataset.tab === 'agents') {
+      setTimeout(initAgentsTab, 100);
+    }
   });
 });
 
@@ -3569,5 +3947,6 @@ setTimeout(() => {
   loadTiaProjects();
   loadSettings();
   loadDbConnections();
+  ensureAgentListeners();
 }, 500);
 
diff --git a/electron-ui/styles.css b/electron-ui/styles.css
index 5ba9186..f1e066e 100644
--- a/electron-ui/styles.css
+++ b/electron-ui/styles.css
@@ -2979,3 +2979,278 @@ select.input,
 .connection-status .status-dot {
   flex-shrink: 0;
 }
+
+/* ============================================
+   AGENTS TAB
+   ============================================ */
+
+.agents-topbar {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  gap: var(--space-4);
+  margin-bottom: var(--space-3);
+  flex-wrap: wrap;
+}
+
+.agents-run-controls {
+  display: flex;
+  gap: var(--space-2);
+  flex-wrap: wrap;
+}
+
+.agents-run-status {
+  display: flex;
+  align-items: center;
+  gap: var(--space-2);
+  color: var(--color-text-secondary);
+  font-size: var(--text-sm);
+}
+
+.status-chip {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  padding: 2px 8px;
+  border-radius: 999px;
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel-2);
+  color: var(--color-text-secondary);
+  font-size: var(--text-xs);
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.4px;
+}
+
+.status-chip.running {
+  color: var(--color-success);
+  border-color: rgba(34, 197, 94, 0.35);
+  background: rgba(34, 197, 94, 0.12);
+}
+
+.status-chip.error {
+  color: var(--color-danger);
+  border-color: rgba(239, 68, 68, 0.35);
+  background: rgba(239, 68, 68, 0.12);
+}
+
+.agents-config-row {
+  display: grid;
+  grid-template-columns: repeat(14, minmax(0, 1fr));
+  gap: var(--space-2);
+  margin-bottom: var(--space-4);
+  align-items: center;
+}
+
+.agents-config-row label {
+  font-size: var(--text-xs);
+  color: var(--color-text-secondary);
+  text-transform: uppercase;
+  letter-spacing: 0.35px;
+}
+
+.agents-config-row .input {
+  min-width: 0;
+}
+
+.agents-metrics-row {
+  display: grid;
+  grid-template-columns: repeat(5, minmax(0, 1fr));
+  gap: var(--space-2);
+  margin-bottom: var(--space-4);
+}
+
+.metric-card {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel);
+  border-radius: var(--radius-md);
+  padding: var(--space-2) var(--space-3);
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+}
+
+.metric-label {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+}
+
+.metric-value {
+  font-family: var(--font-mono);
+  font-size: var(--text-sm);
+  color: var(--color-text);
+}
+
+.agents-main {
+  display: grid;
+  grid-template-columns: minmax(300px, 38%) minmax(0, 1fr);
+  gap: var(--space-3);
+  min-height: 480px;
+}
+
+.agents-feed-panel,
+.agents-detail-panel {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel);
+  border-radius: var(--radius-lg);
+  overflow: hidden;
+  display: flex;
+  flex-direction: column;
+}
+
+.agents-feed-header,
+.agents-detail-header {
+  padding: var(--space-3);
+  border-bottom: 1px solid var(--color-border-subtle);
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  gap: var(--space-2);
+}
+
+.agents-feed-header h3,
+.agents-detail-header h3 {
+  font-size: var(--text-md);
+  font-weight: 600;
+}
+
+.agents-feed-filters {
+  display: flex;
+  gap: var(--space-2);
+  flex-wrap: wrap;
+}
+
+.agents-feed-filters .input {
+  min-width: 120px;
+}
+
+.agents-event-list {
+  overflow-y: auto;
+  padding: var(--space-2);
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-2);
+  flex: 1;
+}
+
+.agents-empty {
+  color: var(--color-text-muted);
+  font-size: var(--text-sm);
+  padding: var(--space-4);
+  text-align: center;
+}
+
+.agents-event-card {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel-2);
+  border-radius: var(--radius-md);
+  padding: var(--space-2) var(--space-3);
+  cursor: pointer;
+  transition: border-color var(--transition-fast), transform var(--transition-fast);
+}
+
+.agents-event-card:hover {
+  border-color: var(--color-border-active);
+  transform: translateY(-1px);
+}
+
+.agents-event-card.active {
+  border-color: var(--color-accent);
+  box-shadow: 0 0 0 1px rgba(34, 211, 238, 0.35) inset;
+}
+
+.agents-event-line-top {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 4px;
+  gap: var(--space-2);
+}
+
+.agents-severity {
+  font-size: var(--text-xs);
+  text-transform: uppercase;
+  letter-spacing: 0.4px;
+  padding: 2px 6px;
+  border-radius: 999px;
+  border: 1px solid transparent;
+}
+
+.agents-severity.sev-critical {
+  color: #fecaca;
+  background: rgba(239, 68, 68, 0.2);
+  border-color: rgba(239, 68, 68, 0.4);
+}
+
+.agents-severity.sev-high {
+  color: #fdba74;
+  background: rgba(249, 115, 22, 0.18);
+  border-color: rgba(249, 115, 22, 0.35);
+}
+
+.agents-severity.sev-medium {
+  color: #fde68a;
+  background: rgba(245, 158, 11, 0.15);
+  border-color: rgba(245, 158, 11, 0.35);
+}
+
+.agents-severity.sev-low {
+  color: #bfdbfe;
+  background: rgba(59, 130, 246, 0.15);
+  border-color: rgba(59, 130, 246, 0.35);
+}
+
+.agents-event-time {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+  font-family: var(--font-mono);
+}
+
+.agents-event-summary {
+  font-size: var(--text-sm);
+  color: var(--color-text);
+  margin-bottom: 4px;
+}
+
+.agents-event-meta {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+}
+
+.agents-detail-content {
+  padding: var(--space-3);
+  overflow-y: auto;
+  font-size: var(--text-sm);
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-3);
+}
+
+.agents-detail-grid {
+  display: grid;
+  grid-template-columns: 1fr 1fr;
+  gap: var(--space-2) var(--space-3);
+}
+
+.agents-detail-item {
+  display: flex;
+  flex-direction: column;
+  gap: 2px;
+}
+
+.agents-detail-label {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.3px;
+}
+
+.agents-detail-value {
+  font-family: var(--font-mono);
+  color: var(--color-text);
+}
+
+.agents-list {
+  margin-left: var(--space-4);
+  color: var(--color-text-secondary);
+}
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
new file mode 100644
index 0000000..70a0f4b
--- /dev/null
+++ b/scripts/anomaly_monitor.py
@@ -0,0 +1,912 @@
+#!/usr/bin/env python3
+"""
+Long-running anomaly monitor worker.
+
+Modes:
+  - run: start continuous monitoring loop
+  - status: get run status
+  - list-events: list persisted anomaly events
+  - get-event: fetch one anomaly event
+  - ack-event: mark event as acknowledged
+  - cleanup: delete old events by retention policy
+  - replay-fixtures: run deterministic fixture validation
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import signal
+import sys
+import time
+import uuid
+from datetime import datetime, timedelta, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+try:
+    from dotenv import load_dotenv
+except ImportError:  # pragma: no cover - optional fallback for minimal environments
+    def load_dotenv(*_args, **_kwargs):
+        return False
+
+from anomaly_rules import (
+    compute_deviation_scores,
+    dedup_key,
+    is_quality_good,
+    is_stale,
+    parse_timestamp,
+    safe_float,
+)
+
+
+load_dotenv()
+
+
+def utc_now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+
+
+def emit(prefix: str, payload: Dict[str, Any]) -> None:
+    """Emit machine-parseable messages for Electron main process."""
+    print(f"[{prefix}] {json.dumps(payload, default=str)}", flush=True)
+
+
+def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
+    raw = dict(config or {})
+    thresholds = raw.get("thresholds", {}) if isinstance(raw.get("thresholds"), dict) else {}
+    defaults = {
+        "pollIntervalMs": 15000,
+        "historyWindowMinutes": 360,
+        "minHistoryPoints": 30,
+        "maxMonitoredTags": 200,
+        "maxCandidatesPerCycle": 25,
+        "maxLlmTriagesPerCycle": 5,
+        "dedupCooldownMinutes": 10,
+        "retentionDays": 14,
+        "cleanupEveryCycles": 40,
+        "runMode": "live",
+        "scope": {
+            "project": None,
+            "equipmentTags": [],
+            "tagRegex": None,
+        },
+        "thresholds": {
+            "z": 3.0,
+            "mad": 3.5,
+            "rate": 0.0,
+            "stalenessSec": 120,
+            "flatline_std_epsilon": 1e-6,
+            "stuck_window_size": 20,
+        },
+    }
+    cfg = defaults
+    cfg.update({k: v for k, v in raw.items() if k in defaults and k != "thresholds"})
+    cfg["thresholds"].update({k: v for k, v in thresholds.items() if v is not None})
+    if isinstance(raw.get("scope"), dict):
+        cfg["scope"].update(raw["scope"])
+    return cfg
+
+
+class AnomalyMonitor:
+    def __init__(self, config: Dict[str, Any], run_id: Optional[str] = None):
+        self.config = merge_defaults(config)
+        self.run_id = run_id or f"agent-run-{uuid.uuid4()}"
+        from ignition_api_client import IgnitionApiClient
+        from neo4j_ontology import get_ontology_graph
+
+        self.graph = get_ontology_graph()
+
+        self.api = IgnitionApiClient(
+            base_url=self.config.get("ignitionApiUrl") or os.getenv("IGNITION_API_URL"),
+            api_token=self.config.get("ignitionApiToken") or os.getenv("IGNITION_API_TOKEN"),
+            timeout=15.0,
+        )
+
+        self.llm = None
+        self._llm_enabled = bool(os.getenv("ANTHROPIC_API_KEY"))
+        if self._llm_enabled:
+            try:
+                from claude_client import ClaudeClient
+
+                self.llm = ClaudeClient(
+                    enable_tools=False,
+                    ignition_api_url=self.config.get("ignitionApiUrl"),
+                    ignition_api_token=self.config.get("ignitionApiToken"),
+                )
+            except Exception as exc:
+                self._llm_enabled = False
+                emit("AGENT_ERROR", {
+                    "runId": self.run_id,
+                    "code": "llm_init_failed",
+                    "message": str(exc),
+                    "recoverable": True,
+                    "timestamp": utc_now_iso(),
+                })
+
+        self._running = True
+        self._cycle_count = 0
+        self._prev_values: Dict[str, float] = {}
+
+    # -----------------------------
+    # Schema / run lifecycle
+    # -----------------------------
+    def init_schema(self) -> None:
+        self.graph.init_agent_monitoring_schema()
+
+    def upsert_run(self, status: str, reason: Optional[str] = None) -> None:
+        with self.graph.session() as session:
+            session.run(
+                """
+                MERGE (r:AgentRun {run_id: $run_id})
+                SET r.status = $status,
+                    r.updated_at = datetime(),
+                    r.last_heartbeat_at = datetime(),
+                    r.config_json = $config_json,
+                    r.cycle_count = $cycle_count,
+                    r.started_at = coalesce(r.started_at, datetime()),
+                    r.stopped_at = CASE WHEN $status IN ['stopped', 'failed'] THEN datetime() ELSE r.stopped_at END,
+                    r.stop_reason = CASE WHEN $reason IS NULL THEN r.stop_reason ELSE $reason END
+                """,
+                run_id=self.run_id,
+                status=status,
+                config_json=json.dumps(self.config, default=str),
+                cycle_count=self._cycle_count,
+                reason=reason,
+            )
+
+    def heartbeat(self, metrics: Dict[str, Any]) -> None:
+        with self.graph.session() as session:
+            session.run(
+                """
+                MATCH (r:AgentRun {run_id: $run_id})
+                SET r.last_heartbeat_at = datetime(),
+                    r.cycle_count = $cycle_count,
+                    r.last_cycle_ms = $cycle_ms,
+                    r.last_candidates = $candidates,
+                    r.last_triaged = $triaged,
+                    r.last_emitted = $emitted
+                """,
+                run_id=self.run_id,
+                cycle_count=self._cycle_count,
+                cycle_ms=metrics.get("cycleMs", 0),
+                candidates=metrics.get("candidates", 0),
+                triaged=metrics.get("triaged", 0),
+                emitted=metrics.get("emitted", 0),
+            )
+
+    # -----------------------------
+    # Tag and context collection
+    # -----------------------------
+    def get_monitored_tags(self) -> List[Dict[str, str]]:
+        max_tags = int(self.config.get("maxMonitoredTags", 200))
+        scope = self.config.get("scope", {})
+        tag_regex = scope.get("tagRegex")
+        equipment_tags = set(scope.get("equipmentTags") or [])
+
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (t:ScadaTag)
+                WHERE coalesce(t.opc_item_path, t.name) IS NOT NULL
+                  AND coalesce(t.opc_item_path, t.name) <> ''
+                RETURN DISTINCT coalesce(t.opc_item_path, t.name) AS tag_path,
+                                coalesce(t.name, t.opc_item_path) AS tag_name
+                LIMIT $limit
+                """,
+                limit=max_tags * 3,
+            )
+            tags = [{"path": r["tag_path"], "name": r["tag_name"]} for r in result if r["tag_path"]]
+
+        if tag_regex:
+            import re
+            try:
+                pattern = re.compile(tag_regex, re.IGNORECASE)
+                tags = [t for t in tags if pattern.search(t["path"]) or pattern.search(t["name"])]
+            except re.error:
+                emit("AGENT_ERROR", {
+                    "runId": self.run_id,
+                    "code": "invalid_tag_regex",
+                    "message": f"Invalid regex: {tag_regex}",
+                    "recoverable": True,
+                    "timestamp": utc_now_iso(),
+                })
+
+        if equipment_tags:
+            tags = [t for t in tags if t["name"] in equipment_tags or t["path"] in equipment_tags]
+
+        return tags[:max_tags]
+
+    def _extract_history_values(self, history_data: Any, tag_path: str) -> List[float]:
+        """Normalize multiple gateway response shapes to numeric values list."""
+        values: List[float] = []
+        if history_data is None:
+            return values
+        if isinstance(history_data, dict) and history_data.get("error"):
+            return values
+
+        rows: List[Any] = []
+        if isinstance(history_data, list):
+            rows = history_data
+        elif isinstance(history_data, dict):
+            for key in ("rows", "data", "results", "values", "history"):
+                chunk = history_data.get(key)
+                if isinstance(chunk, list):
+                    rows = chunk
+                    break
+            if not rows and "tagHistory" in history_data and isinstance(history_data["tagHistory"], list):
+                rows = history_data["tagHistory"]
+
+        for row in rows:
+            if isinstance(row, (int, float, str)):
+                val = safe_float(row)
+                if val is not None:
+                    values.append(val)
+                continue
+            if not isinstance(row, dict):
+                continue
+            candidate = None
+            if "value" in row:
+                candidate = row.get("value")
+            elif tag_path in row:
+                candidate = row.get(tag_path)
+            else:
+                # Wide format often has timestamp + one tag column.
+                for k, v in row.items():
+                    if k.lower() in {"timestamp", "ts", "t", "time"}:
+                        continue
+                    candidate = v
+                    break
+            val = safe_float(candidate)
+            if val is not None:
+                values.append(val)
+        return values
+
+    def fetch_history_values(self, tag_path: str) -> List[float]:
+        minutes = int(self.config.get("historyWindowMinutes", 360))
+        end_dt = datetime.now(timezone.utc)
+        start_dt = end_dt - timedelta(minutes=minutes)
+        data = self.api.query_tag_history(
+            [tag_path],
+            start_dt.isoformat(),
+            end_dt.isoformat(),
+            return_size=max(100, int(self.config.get("minHistoryPoints", 30)) * 4),
+            aggregation_mode="Average",
+            return_format="Wide",
+        )
+        return self._extract_history_values(data, tag_path)
+
+    def get_context(self, tag_path: str) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (t:ScadaTag)
+                WHERE t.name = $tag OR t.opc_item_path = $tag
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
+                OPTIONAL MATCH (eq)-[:HAS_SYMPTOM]->(s:FaultSymptom)
+                OPTIONAL MATCH (s)-[:CAUSED_BY]->(c:FaultCause)
+                OPTIONAL MATCH (eq)-[:HAS_PATTERN]->(p:ControlPattern)
+                OPTIONAL MATCH (eq)-[:SAFETY_CRITICAL]->(se:SafetyElement)
+                RETURN t,
+                       collect(DISTINCT eq.name) AS equipment,
+                       collect(DISTINCT s.symptom) AS symptoms,
+                       collect(DISTINCT c.cause) AS causes,
+                       collect(DISTINCT p.pattern_name) AS patterns,
+                       collect(DISTINCT se.name) AS safety
+                LIMIT 1
+                """,
+                tag=tag_path,
+            )
+            record = result.single()
+            if not record:
+                return {
+                    "tag_path": tag_path,
+                    "equipment": [],
+                    "symptoms": [],
+                    "causes": [],
+                    "patterns": [],
+                    "safety": [],
+                }
+            node = record["t"]
+            return {
+                "tag_path": tag_path,
+                "tag_name": node.get("name") if node else tag_path,
+                "equipment": [x for x in record["equipment"] if x],
+                "symptoms": [x for x in record["symptoms"] if x],
+                "causes": [x for x in record["causes"] if x],
+                "patterns": [x for x in record["patterns"] if x],
+                "safety": [x for x in record["safety"] if x],
+            }
+
+    # -----------------------------
+    # Triage and persistence
+    # -----------------------------
+    def run_llm_triage(
+        self,
+        context: Dict[str, Any],
+        deterministic: Dict[str, Any],
+        live_sample: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        fallback = {
+            "summary": f"Deterministic anomaly on {context.get('tag_name', context['tag_path'])}",
+            "category": deterministic.get("category", "deviation"),
+            "severity": "medium",
+            "confidence": 0.55,
+            "probable_causes": ["Signal deviates from historical baseline."],
+            "verification_checks": [
+                f"Check live quality/timestamp for {context.get('tag_path')}",
+                "Inspect upstream interlocks and communication health.",
+            ],
+            "safety_notes": context.get("safety", []),
+            "rationale": "LLM triage unavailable; using deterministic fallback.",
+            "related_entities": [
+                {"label": "Equipment", "name": e} for e in context.get("equipment", [])[:3]
+            ],
+        }
+        if not self.llm:
+            return fallback
+
+        system_prompt = (
+            "You are an industrial anomaly triage assistant. "
+            "Return ONLY valid JSON with keys: summary, category, severity, confidence, "
+            "probable_causes, verification_checks, safety_notes, rationale, related_entities. "
+            "Severity must be one of critical/high/medium/low. "
+            "Category must be one of spike/drift/stuck/state-conflict/quality-issue/deviation. "
+            "related_entities is a list of objects: {label,name}."
+        )
+        user_prompt = json.dumps(
+            {
+                "context": context,
+                "deterministic": deterministic,
+                "live_sample": live_sample,
+            },
+            default=str,
+        )
+        try:
+            result = self.llm.query_json(
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                max_tokens=900,
+                use_tools=False,
+            )
+            data = result.get("data")
+            if not isinstance(data, dict):
+                return fallback
+            merged = dict(fallback)
+            merged.update({k: v for k, v in data.items() if v is not None})
+            return merged
+        except Exception as exc:
+            emit("AGENT_ERROR", {
+                "runId": self.run_id,
+                "code": "llm_triage_failed",
+                "message": str(exc),
+                "recoverable": True,
+                "timestamp": utc_now_iso(),
+            })
+            return fallback
+
+    def _severity_from_scores(self, deterministic: Dict[str, Any], llm_out: Dict[str, Any]) -> str:
+        sev = str(llm_out.get("severity", "")).lower()
+        if sev in {"critical", "high", "medium", "low"}:
+            return sev
+        z = abs(float(deterministic.get("z_score", 0.0)))
+        if z >= 8:
+            return "critical"
+        if z >= 5:
+            return "high"
+        if z >= 3:
+            return "medium"
+        return "low"
+
+    def is_duplicate_recent(self, dedup_sig: str) -> bool:
+        cooldown = max(1, int(self.config.get("dedupCooldownMinutes", 10)))
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {dedup_key: $dedup_key})
+                WHERE e.created_at IS NOT NULL
+                  AND datetime(e.created_at) > datetime() - duration({minutes: $minutes})
+                RETURN count(e) AS cnt
+                """,
+                dedup_key=dedup_sig,
+                minutes=cooldown,
+            )
+            row = result.single()
+            return bool(row and row["cnt"] > 0)
+
+    def persist_event(
+        self,
+        context: Dict[str, Any],
+        deterministic: Dict[str, Any],
+        live_sample: Dict[str, Any],
+        triage: Dict[str, Any],
+    ) -> Optional[Dict[str, Any]]:
+        category = triage.get("category") or deterministic.get("category", "deviation")
+        dedup_sig = dedup_key(context["tag_path"], category, int(self.config.get("dedupCooldownMinutes", 10)))
+        if self.is_duplicate_recent(dedup_sig):
+            return None
+
+        event_id = f"ae-{uuid.uuid4()}"
+        severity = self._severity_from_scores(deterministic, triage)
+        confidence = float(max(0.0, min(1.0, triage.get("confidence", 0.5))))
+        event_data = {
+            "event_id": event_id,
+            "run_id": self.run_id,
+            "event_schema_version": 1,
+            "state": "open",
+            "severity": severity,
+            "confidence": confidence,
+            "category": category,
+            "summary": triage.get("summary", f"Anomaly on {context['tag_path']}"),
+            "explanation": triage.get("rationale", ""),
+            "recommended_checks_json": json.dumps(triage.get("verification_checks", []), default=str),
+            "probable_causes_json": json.dumps(triage.get("probable_causes", []), default=str),
+            "safety_notes_json": json.dumps(triage.get("safety_notes", []), default=str),
+            "deterministic_reasons_json": json.dumps(deterministic.get("reasons", []), default=str),
+            "z_score": float(deterministic.get("z_score", 0.0)),
+            "mad_score": float(deterministic.get("mad_score", 0.0)),
+            "delta_rate": float(deterministic.get("delta_rate", 0.0)),
+            "window_volatility": float(deterministic.get("window_volatility", 0.0)),
+            "source_tag": context["tag_path"],
+            "tag_name": context.get("tag_name") or context["tag_path"],
+            "live_quality": live_sample.get("quality"),
+            "live_timestamp": live_sample.get("timestamp"),
+            "live_value": str(live_sample.get("value")),
+            "dedup_key": dedup_sig,
+            "created_at": utc_now_iso(),
+            "updated_at": utc_now_iso(),
+        }
+
+        with self.graph.session() as session:
+            session.run(
+                """
+                MATCH (r:AgentRun {run_id: $run_id})
+                CREATE (e:AnomalyEvent $props)
+                MERGE (r)-[:EMITTED]->(e)
+                """,
+                run_id=self.run_id,
+                props=event_data,
+            )
+
+            session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                MATCH (t:ScadaTag)
+                WHERE t.name = $tag OR t.opc_item_path = $tag
+                MERGE (e)-[:OBSERVED_ON]->(t)
+                """,
+                event_id=event_id,
+                tag=context["tag_path"],
+            )
+
+            for equipment_name in context.get("equipment", [])[:5]:
+                session.run(
+                    """
+                    MATCH (e:AnomalyEvent {event_id: $event_id})
+                    MATCH (eq:Equipment {name: $name})
+                    MERGE (e)-[:AFFECTS]->(eq)
+                    """,
+                    event_id=event_id,
+                    name=equipment_name,
+                )
+
+            related_inputs: List[Dict[str, str]] = []
+            for item in triage.get("related_entities", []) or []:
+                if isinstance(item, dict) and item.get("label") and item.get("name"):
+                    related_inputs.append({"label": str(item["label"]), "name": str(item["name"])})
+            for name in context.get("symptoms", [])[:3]:
+                related_inputs.append({"label": "FaultSymptom", "name": name})
+            for name in context.get("causes", [])[:3]:
+                related_inputs.append({"label": "FaultCause", "name": name})
+
+            for rel in related_inputs[:8]:
+                label = rel["label"]
+                if label not in {"FaultSymptom", "FaultCause", "ControlPattern", "SafetyElement", "Equipment", "ScadaTag"}:
+                    continue
+                session.run(
+                    f"""
+                    MATCH (e:AnomalyEvent {{event_id: $event_id}})
+                    MATCH (n:{label})
+                    WHERE n.name = $name OR n.symptom = $name OR n.cause = $name
+                    MERGE (e)-[:RELATED_TO]->(n)
+                    """,
+                    event_id=event_id,
+                    name=rel["name"],
+                )
+
+        return event_data
+
+    # -----------------------------
+    # Monitoring loop
+    # -----------------------------
+    def run_cycle(self) -> Dict[str, Any]:
+        cycle_start = time.time()
+        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0}
+        thresholds = self.config.get("thresholds", {})
+        min_history = int(self.config.get("minHistoryPoints", 30))
+
+        if not self.api.is_configured:
+            emit("AGENT_ERROR", {
+                "runId": self.run_id,
+                "code": "ignition_not_configured",
+                "message": "Ignition API URL/token not configured.",
+                "recoverable": True,
+                "timestamp": utc_now_iso(),
+            })
+            metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
+            return metrics
+
+        tags = self.get_monitored_tags()
+        if not tags:
+            emit("AGENT_ERROR", {
+                "runId": self.run_id,
+                "code": "no_tags_found",
+                "message": "No ScadaTag nodes with readable tag paths found.",
+                "recoverable": True,
+                "timestamp": utc_now_iso(),
+            })
+            metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
+            return metrics
+
+        tag_paths = [t["path"] for t in tags]
+        live_values = self.api.read_tags(tag_paths)
+        candidates: List[Dict[str, Any]] = []
+        now = datetime.now(timezone.utc)
+
+        for tv in live_values:
+            if tv.error:
+                continue
+            if not is_quality_good(tv.quality):
+                # quality gate: only emit quality anomalies if this persists via triage.
+                continue
+            if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
+                continue
+
+            history = self.fetch_history_values(tv.path)
+            if len(history) < min_history:
+                continue
+
+            prev_val = self._prev_values.get(tv.path)
+            deterministic = compute_deviation_scores(
+                current_value=tv.value,
+                history_values=history,
+                prev_value=prev_val,
+                thresholds=thresholds,
+            )
+            curr_num = safe_float(tv.value)
+            if curr_num is not None:
+                self._prev_values[tv.path] = curr_num
+
+            if deterministic.get("candidate"):
+                context = self.get_context(tv.path)
+                candidates.append(
+                    {
+                        "context": context,
+                        "deterministic": deterministic,
+                        "live_sample": {
+                            "path": tv.path,
+                            "value": tv.value,
+                            "quality": tv.quality,
+                            "timestamp": tv.timestamp,
+                            "data_type": tv.data_type,
+                        },
+                    }
+                )
+
+        metrics["candidates"] = len(candidates)
+        max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
+        max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
+        shortlisted = candidates[:max_candidates]
+
+        for idx, candidate in enumerate(shortlisted):
+            use_llm = idx < max_triage
+            triage = (
+                self.run_llm_triage(
+                    candidate["context"],
+                    candidate["deterministic"],
+                    candidate["live_sample"],
+                )
+                if use_llm
+                else {
+                    "summary": f"Deviation on {candidate['context'].get('tag_name', candidate['context']['tag_path'])}",
+                    "category": candidate["deterministic"].get("category", "deviation"),
+                    "severity": "medium",
+                    "confidence": 0.5,
+                    "verification_checks": [],
+                    "probable_causes": [],
+                    "safety_notes": [],
+                    "rationale": "Triaged in deterministic-only mode due per-cycle LLM cap.",
+                    "related_entities": [],
+                }
+            )
+            metrics["triaged"] += 1
+            persisted = self.persist_event(
+                candidate["context"],
+                candidate["deterministic"],
+                candidate["live_sample"],
+                triage,
+            )
+            if persisted:
+                metrics["emitted"] += 1
+                emit("AGENT_EVENT", {
+                    "runId": self.run_id,
+                    "eventId": persisted["event_id"],
+                    "severity": persisted["severity"],
+                    "summary": persisted["summary"],
+                    "category": persisted.get("category"),
+                    "entityRefs": {
+                        "tag": persisted.get("tag_name") or persisted.get("source_tag"),
+                        "sourceTag": persisted.get("source_tag"),
+                    },
+                    "createdAt": persisted.get("created_at"),
+                })
+
+        metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
+        return metrics
+
+    def cleanup_retention(self) -> int:
+        retention_days = int(self.config.get("retentionDays", 14))
+        return self.graph.cleanup_anomaly_events(retention_days=retention_days)
+
+    def run_forever(self) -> int:
+        self.init_schema()
+        self.upsert_run("running")
+        emit("AGENT_STATUS", {
+            "runId": self.run_id,
+            "state": "running",
+            "cycleMs": 0,
+            "candidates": 0,
+            "triaged": 0,
+            "emitted": 0,
+            "timestamp": utc_now_iso(),
+        })
+
+        poll_ms = int(self.config.get("pollIntervalMs", 15000))
+        cleanup_every = max(1, int(self.config.get("cleanupEveryCycles", 40)))
+        exit_code = 0
+        reason = "stopped"
+
+        while self._running:
+            self._cycle_count += 1
+            cycle_started = time.time()
+            try:
+                metrics = self.run_cycle()
+                self.heartbeat(metrics)
+                emit("AGENT_STATUS", {
+                    "runId": self.run_id,
+                    "state": "running",
+                    "cycleMs": metrics["cycleMs"],
+                    "candidates": metrics["candidates"],
+                    "triaged": metrics["triaged"],
+                    "emitted": metrics["emitted"],
+                    "timestamp": utc_now_iso(),
+                })
+                if self._cycle_count % cleanup_every == 0:
+                    deleted = self.cleanup_retention()
+                    if deleted > 0:
+                        emit("AGENT_STATUS", {
+                            "runId": self.run_id,
+                            "state": "retention_cleanup",
+                            "cycleMs": 0,
+                            "candidates": 0,
+                            "triaged": 0,
+                            "emitted": deleted,
+                            "timestamp": utc_now_iso(),
+                        })
+            except Exception as exc:
+                reason = "failed"
+                exit_code = 1
+                emit("AGENT_ERROR", {
+                    "runId": self.run_id,
+                    "code": "cycle_error",
+                    "message": str(exc),
+                    "recoverable": True,
+                    "timestamp": utc_now_iso(),
+                })
+
+            elapsed_ms = int((time.time() - cycle_started) * 1000)
+            remaining = max(0, poll_ms - elapsed_ms) / 1000.0
+            if remaining > 0:
+                time.sleep(remaining)
+
+        self.upsert_run("stopped" if reason != "failed" else "failed", reason=reason)
+        emit("AGENT_COMPLETE", {
+            "runId": self.run_id,
+            "success": exit_code == 0,
+            "reason": reason,
+            "stoppedAt": utc_now_iso(),
+        })
+        return exit_code
+
+    # -----------------------------
+    # Single-operation helpers
+    # -----------------------------
+    def list_events(self, limit: int, state: Optional[str], severity: Optional[str], run_id: Optional[str]) -> Dict[str, Any]:
+        events = self.graph.list_anomaly_events(limit=limit, state=state, severity=severity, run_id=run_id)
+        return {"success": True, "events": events}
+
+    def get_event(self, event_id: str) -> Dict[str, Any]:
+        event = self.graph.get_anomaly_event(event_id)
+        if not event:
+            return {"success": False, "error": f"Event not found: {event_id}"}
+        return {"success": True, "event": event}
+
+    def ack_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                SET e.state = 'acknowledged',
+                    e.acknowledged_at = datetime(),
+                    e.ack_note = $note,
+                    e.updated_at = datetime()
+                RETURN count(e) AS cnt
+                """,
+                event_id=event_id,
+                note=note or "",
+            )
+            record = result.single()
+            if not record or record["cnt"] == 0:
+                return {"success": False, "error": f"Event not found: {event_id}"}
+        return {"success": True, "eventId": event_id}
+
+    def get_status(self, run_id: str) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (r:AgentRun {run_id: $run_id})
+                RETURN r
+                LIMIT 1
+                """,
+                run_id=run_id,
+            )
+            row = result.single()
+            if not row:
+                return {"success": False, "error": f"Run not found: {run_id}"}
+            props = dict(row["r"])
+            return {
+                "success": True,
+                "status": props.get("status"),
+                "metrics": {
+                    "cycleCount": props.get("cycle_count", 0),
+                    "lastCycleMs": props.get("last_cycle_ms", 0),
+                    "lastCandidates": props.get("last_candidates", 0),
+                    "lastTriaged": props.get("last_triaged", 0),
+                    "lastEmitted": props.get("last_emitted", 0),
+                },
+                "lastHeartbeatAt": props.get("last_heartbeat_at"),
+                "run": props,
+            }
+
+
+def _load_fixture_cases(path: Path) -> List[Dict[str, Any]]:
+    data = json.loads(path.read_text(encoding="utf-8"))
+    if isinstance(data, dict):
+        return data.get("cases", [])
+    if isinstance(data, list):
+        return data
+    return []
+
+
+def replay_fixtures(config_json: Optional[str], fixture_path: str) -> Dict[str, Any]:
+    config = merge_defaults(json.loads(config_json) if config_json else {})
+    path = Path(fixture_path)
+    cases = _load_fixture_cases(path)
+    thresholds = config.get("thresholds", {})
+    passed = 0
+    failures: List[Dict[str, Any]] = []
+
+    for case in cases:
+        result = compute_deviation_scores(
+            current_value=case.get("current_value"),
+            history_values=case.get("history_values", []),
+            prev_value=case.get("prev_value"),
+            thresholds=thresholds,
+        )
+        expected = bool(case.get("expected_candidate", False))
+        if result.get("candidate") == expected:
+            passed += 1
+        else:
+            failures.append(
+                {
+                    "id": case.get("id"),
+                    "expected_candidate": expected,
+                    "actual_candidate": result.get("candidate"),
+                    "category": result.get("category"),
+                    "reasons": result.get("reasons", []),
+                }
+            )
+
+    return {
+        "success": len(failures) == 0,
+        "total": len(cases),
+        "passed": passed,
+        "failed": len(failures),
+        "failures": failures,
+    }
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Anomaly monitor worker")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p_run = sub.add_parser("run", help="Run continuous anomaly monitoring")
+    p_run.add_argument("--run-id", help="Optional run id")
+    p_run.add_argument("--config-json", default="{}", help="JSON config string")
+
+    p_status = sub.add_parser("status", help="Get status for one run")
+    p_status.add_argument("--run-id", required=True)
+
+    p_list = sub.add_parser("list-events", help="List anomaly events")
+    p_list.add_argument("--limit", type=int, default=100)
+    p_list.add_argument("--state")
+    p_list.add_argument("--severity")
+    p_list.add_argument("--run-id")
+
+    p_get = sub.add_parser("get-event", help="Get one anomaly event")
+    p_get.add_argument("--event-id", required=True)
+
+    p_ack = sub.add_parser("ack-event", help="Acknowledge one anomaly event")
+    p_ack.add_argument("--event-id", required=True)
+    p_ack.add_argument("--note")
+
+    p_cleanup = sub.add_parser("cleanup", help="Delete old anomaly events")
+    p_cleanup.add_argument("--retention-days", type=int, default=14)
+
+    p_replay = sub.add_parser("replay-fixtures", help="Validate deterministic scoring against fixtures")
+    p_replay.add_argument("--fixture-file", required=True)
+    p_replay.add_argument("--config-json", default="{}")
+
+    args = parser.parse_args()
+
+    if args.command == "replay-fixtures":
+        result = replay_fixtures(args.config_json, args.fixture_file)
+        print(json.dumps(result))
+        return 0 if result["success"] else 1
+
+    try:
+        monitor = AnomalyMonitor(
+            config=json.loads(getattr(args, "config_json", "{}") or "{}"),
+            run_id=getattr(args, "run_id", None),
+        )
+    except Exception as exc:
+        print(json.dumps({"success": False, "error": str(exc)}))
+        return 1
+
+    if args.command == "run":
+        def _signal_handler(_signum, _frame):
+            monitor._running = False
+
+        signal.signal(signal.SIGTERM, _signal_handler)
+        if hasattr(signal, "SIGINT"):
+            signal.signal(signal.SIGINT, _signal_handler)
+        return monitor.run_forever()
+
+    if args.command == "status":
+        print(json.dumps(monitor.get_status(args.run_id), default=str))
+        return 0
+
+    if args.command == "list-events":
+        print(json.dumps(monitor.list_events(args.limit, args.state, args.severity, args.run_id), default=str))
+        return 0
+
+    if args.command == "get-event":
+        print(json.dumps(monitor.get_event(args.event_id), default=str))
+        return 0
+
+    if args.command == "ack-event":
+        print(json.dumps(monitor.ack_event(args.event_id, args.note), default=str))
+        return 0
+
+    if args.command == "cleanup":
+        deleted = monitor.graph.cleanup_anomaly_events(args.retention_days)
+        print(json.dumps({"success": True, "deleted": deleted}))
+        return 0
+
+    return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+
diff --git a/scripts/anomaly_rules.py b/scripts/anomaly_rules.py
new file mode 100644
index 0000000..2aa274d
--- /dev/null
+++ b/scripts/anomaly_rules.py
@@ -0,0 +1,217 @@
+#!/usr/bin/env python3
+"""
+Deterministic anomaly scoring primitives for monitoring agents.
+
+This module intentionally avoids external dependencies so it can run in
+packaged/offline environments.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import math
+from datetime import datetime, timezone
+from statistics import mean, median, pstdev
+from typing import Any, Dict, List, Optional
+
+
+def safe_float(value: Any) -> Optional[float]:
+    """Best-effort conversion to float."""
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return float(value)
+    if isinstance(value, (int, float)):
+        if math.isnan(value) or math.isinf(value):
+            return None
+        return float(value)
+    text = str(value).strip()
+    if not text:
+        return None
+    try:
+        result = float(text)
+    except ValueError:
+        return None
+    if math.isnan(result) or math.isinf(result):
+        return None
+    return result
+
+
+def parse_timestamp(ts: Optional[str]) -> Optional[datetime]:
+    """Parse an ISO-like timestamp to UTC-aware datetime."""
+    if not ts:
+        return None
+    text = str(ts).strip()
+    if not text:
+        return None
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+    except ValueError:
+        return None
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt.astimezone(timezone.utc)
+
+
+def is_quality_good(quality: Optional[str]) -> bool:
+    """Conservative quality gate."""
+    if quality is None:
+        return False
+    q = str(quality).strip().lower()
+    if not q:
+        return False
+    if "good" in q or "ok" in q or q in {"192"}:
+        return True
+    return False
+
+
+def is_stale(timestamp: Optional[str], staleness_sec: int, now: Optional[datetime] = None) -> bool:
+    """Return True if sample timestamp is stale or invalid."""
+    if staleness_sec <= 0:
+        return False
+    parsed = parse_timestamp(timestamp)
+    if parsed is None:
+        return True
+    baseline = now or datetime.now(timezone.utc)
+    age = (baseline - parsed).total_seconds()
+    return age > staleness_sec
+
+
+def _mad(values: List[float]) -> float:
+    """Median absolute deviation."""
+    if not values:
+        return 0.0
+    med = median(values)
+    abs_dev = [abs(v - med) for v in values]
+    return median(abs_dev) if abs_dev else 0.0
+
+
+def _percentile_rank(values: List[float], current: float) -> float:
+    """Approximate percentile rank of current within values."""
+    if not values:
+        return 0.0
+    less_or_equal = sum(1 for v in values if v <= current)
+    return less_or_equal / len(values)
+
+
+def compute_deviation_scores(
+    current_value: Any,
+    history_values: List[Any],
+    prev_value: Any = None,
+    thresholds: Optional[Dict[str, float]] = None,
+) -> Dict[str, Any]:
+    """
+    Compute deterministic anomaly scores and candidate flags.
+
+    Threshold defaults are intentionally conservative and should be configured
+    per process during rollout.
+    """
+    cfg = {
+        "z": 3.0,
+        "mad": 3.5,
+        "rate": 0.0,
+        "flatline_std_epsilon": 1e-6,
+        "stuck_window_size": 20,
+    }
+    if thresholds:
+        cfg.update({k: v for k, v in thresholds.items() if v is not None})
+
+    current = safe_float(current_value)
+    hist = [v for v in (safe_float(x) for x in history_values) if v is not None]
+    previous = safe_float(prev_value)
+
+    result: Dict[str, Any] = {
+        "candidate": False,
+        "reasons": [],
+        "category": "normal",
+        "z_score": 0.0,
+        "mad_score": 0.0,
+        "delta_rate": 0.0,
+        "window_volatility": 0.0,
+        "percentile_rank": 0.0,
+        "drift_score": 0.0,
+        "history_points": len(hist),
+    }
+
+    if current is None:
+        result["category"] = "invalid_value"
+        result["reasons"].append("current_value_not_numeric")
+        return result
+    if not hist:
+        result["category"] = "insufficient_history"
+        result["reasons"].append("history_empty")
+        return result
+
+    mu = mean(hist)
+    sigma = pstdev(hist) if len(hist) > 1 else 0.0
+    sigma = max(sigma, 1e-9)
+    z_score = (current - mu) / sigma
+    result["z_score"] = z_score
+    result["window_volatility"] = sigma
+    result["percentile_rank"] = _percentile_rank(hist, current)
+
+    mad = _mad(hist)
+    mad_denom = max(mad * 1.4826, 1e-9)
+    mad_score = abs(current - median(hist)) / mad_denom
+    result["mad_score"] = mad_score
+
+    if previous is not None:
+        result["delta_rate"] = abs(current - previous)
+
+    if abs(z_score) >= float(cfg["z"]):
+        result["candidate"] = True
+        result["reasons"].append("z_score_threshold")
+    if mad_score >= float(cfg["mad"]):
+        result["candidate"] = True
+        result["reasons"].append("mad_score_threshold")
+    if float(cfg["rate"]) > 0 and result["delta_rate"] >= float(cfg["rate"]):
+        result["candidate"] = True
+        result["reasons"].append("delta_rate_threshold")
+
+    if len(hist) >= 20:
+        midpoint = len(hist) // 2
+        first_half = hist[:midpoint]
+        second_half = hist[midpoint:]
+        trend_delta = abs(mean(second_half) - mean(first_half))
+        trend_score = trend_delta / sigma
+        result["drift_score"] = trend_score
+        if trend_score >= 1.25 and (result["percentile_rank"] >= 0.85 or result["percentile_rank"] <= 0.15):
+            result["candidate"] = True
+            result["reasons"].append("drift_trend")
+
+    recent = hist[-int(max(3, cfg["stuck_window_size"])) :]
+    recent_std = pstdev(recent) if len(recent) > 1 else 0.0
+    if recent_std <= float(cfg["flatline_std_epsilon"]):
+        if previous is not None and abs(current - previous) <= float(cfg["flatline_std_epsilon"]):
+            result["candidate"] = True
+            result["reasons"].append("flatline_detected")
+            result["category"] = "stuck"
+
+    if result["category"] == "normal" and result["candidate"]:
+        if "flatline_detected" in result["reasons"]:
+            result["category"] = "stuck"
+        elif result["delta_rate"] > 0 and "delta_rate_threshold" in result["reasons"]:
+            result["category"] = "spike"
+        elif "drift_trend" in result["reasons"]:
+            result["category"] = "drift"
+        elif abs(z_score) > 0 and len(hist) >= 20:
+            # Drift-like heuristic for sustained tail position with moderate rate
+            if result["percentile_rank"] >= 0.95 or result["percentile_rank"] <= 0.05:
+                result["category"] = "drift"
+            else:
+                result["category"] = "spike"
+        else:
+            result["category"] = "deviation"
+
+    return result
+
+
+def dedup_key(tag_path: str, category: str, bucket_minutes: int = 10) -> str:
+    """Create a deterministic dedup signature for event cooldown windows."""
+    now = datetime.now(timezone.utc)
+    bucket = int(now.timestamp() // max(1, bucket_minutes * 60))
+    raw = f"{tag_path}|{category}|{bucket}"
+    return hashlib.sha1(raw.encode("utf-8")).hexdigest()
+
diff --git a/scripts/fixtures/anomaly_replay_cases.json b/scripts/fixtures/anomaly_replay_cases.json
new file mode 100644
index 0000000..544cd3f
--- /dev/null
+++ b/scripts/fixtures/anomaly_replay_cases.json
@@ -0,0 +1,32 @@
+{
+  "cases": [
+    {
+      "id": "normal-baseline",
+      "current_value": 50.3,
+      "prev_value": 50.1,
+      "history_values": [49.9, 50.1, 50.0, 50.2, 50.1, 49.8, 50.3, 50.0, 49.9, 50.2, 50.1, 50.0, 49.9, 50.2, 50.1, 50.0, 50.2, 49.8, 50.0, 50.1, 50.0, 49.9, 50.1, 50.2, 50.0, 50.1, 49.9, 50.0, 50.1, 50.0],
+      "expected_candidate": false
+    },
+    {
+      "id": "sudden-spike",
+      "current_value": 91.0,
+      "prev_value": 49.8,
+      "history_values": [49.9, 50.1, 50.0, 50.2, 50.1, 49.8, 50.3, 50.0, 49.9, 50.2, 50.1, 50.0, 49.9, 50.2, 50.1, 50.0, 50.2, 49.8, 50.0, 50.1, 50.0, 49.9, 50.1, 50.2, 50.0, 50.1, 49.9, 50.0, 50.1, 50.0],
+      "expected_candidate": true
+    },
+    {
+      "id": "slow-drift-tail",
+      "current_value": 61.5,
+      "prev_value": 61.0,
+      "history_values": [50.0, 50.2, 50.3, 50.5, 50.7, 50.9, 51.1, 51.4, 51.8, 52.1, 52.6, 53.0, 53.5, 54.0, 54.5, 55.1, 55.6, 56.0, 56.6, 57.0, 57.5, 58.0, 58.4, 58.9, 59.4, 59.9, 60.2, 60.6, 60.9, 61.2],
+      "expected_candidate": true
+    },
+    {
+      "id": "flatline-stuck",
+      "current_value": 72.0,
+      "prev_value": 72.0,
+      "history_values": [72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0, 72.0],
+      "expected_candidate": true
+    }
+  ]
+}
diff --git a/scripts/graph_api.py b/scripts/graph_api.py
index 8e36e7c..e3bff45 100644
--- a/scripts/graph_api.py
+++ b/scripts/graph_api.py
@@ -77,6 +77,8 @@ class GraphAPI:
         "processdeviation": "mes",
         "functionallocation": "mes",
         "vendor": "mes",
+        "agentrun": "anomaly",
+        "anomalyevent": "anomaly",
     }
 
     # Color palette for node types
@@ -91,6 +93,7 @@ class GraphAPI:
         "flows": "#E91E63",
         "overview": "#607D8B",
         "mes": "#00897B",
+        "anomaly": "#F44336",
         "other": "#9E9E9E",
     }
 
@@ -252,9 +255,11 @@ def get_neighbors(
                     WHERE center.name = $node_id 
                        OR center.name ENDS WITH $node_id
                        OR center.name CONTAINS $node_id
+                       OR center.event_id = $node_id
+                       OR center.run_id = $node_id
                     RETURN elementId(center) as id,
                            labels(center)[0] as type,
-                           center.name as label,
+                           coalesce(center.name, center.event_id, center.run_id, center.symptom, center.phrase, 'unknown') as label,
                            properties(center) as props
                     LIMIT 1
                 """
@@ -264,9 +269,11 @@ def get_neighbors(
                     WHERE center.name = $node_id 
                        OR center.name ENDS WITH $node_id
                        OR center.name CONTAINS $node_id
+                       OR center.event_id = $node_id
+                       OR center.run_id = $node_id
                     RETURN elementId(center) as id,
                            labels(center)[0] as type,
-                           center.name as label,
+                           coalesce(center.name, center.event_id, center.run_id, center.symptom, center.phrase, 'unknown') as label,
                            properties(center) as props
                     LIMIT 1
                 """
diff --git a/scripts/ignition_api_client.py b/scripts/ignition_api_client.py
index d0d7e41..e8fbccf 100644
--- a/scripts/ignition_api_client.py
+++ b/scripts/ignition_api_client.py
@@ -22,7 +22,11 @@
 from urllib.parse import urljoin, quote
 
 import requests
-from dotenv import load_dotenv
+try:
+    from dotenv import load_dotenv
+except ImportError:  # pragma: no cover - optional fallback for minimal envs
+    def load_dotenv(*_args, **_kwargs):
+        return False
 
 load_dotenv()
 
@@ -243,60 +247,55 @@ def read_tags(self, paths: List[str]) -> List[TagValue]:
 
     @staticmethod
     def _local_iso_to_utc(dt_str: str) -> str:
-        """Convert a bare ISO datetime string (assumed local) to UTC.
+        """
+        Convert a bare ISO datetime string (assumed local time) to UTC.
 
-        If the string already has a timezone indicator (Z, +, -)
-        or looks like epoch milliseconds, it is returned unchanged.
+        If the input already contains timezone info or appears to be epoch
+        milliseconds, it is returned unchanged.
         """
         from datetime import datetime, timezone
 
-        s = str(dt_str).strip()
+        text = str(dt_str).strip()
+        if not text:
+            return text
 
-        # Epoch ms – pass through
-        if s.isdigit():
-            return s
+        # Epoch millis (or seconds) should pass through unchanged.
+        if text.isdigit():
+            return text
 
-        # Already has TZ info – pass through
-        if s.endswith("Z") or "+" in s[10:] or s[10:].count("-") > 0:
-            return s
+        # Already timezone-aware.
+        if text.endswith("Z") or "+" in text[10:] or text[10:].count("-") > 0:
+            return text
 
         try:
-            naive = datetime.fromisoformat(s)
-            local_dt = naive.astimezone()          # attach local TZ
+            naive = datetime.fromisoformat(text)
+            local_dt = naive.astimezone()
             utc_dt = local_dt.astimezone(timezone.utc)
             return utc_dt.strftime("%Y-%m-%dT%H:%M:%S")
         except (ValueError, TypeError):
-            return s
+            return text
 
     def query_tag_history(
         self,
         tag_paths: List[str],
         start_date: str,
         end_date: str,
-        return_size: int = 100,
+        return_size: int = 200,
         aggregation_mode: str = "Average",
         return_format: str = "Wide",
         interval_minutes: Optional[int] = None,
         include_bounding_values: bool = False,
     ) -> Optional[Any]:
-        """Query historical tag values via the WebDev queryTagHistory endpoint.
-
-        Bare ISO datetime strings (no timezone suffix) are assumed to be in
-        the server's local timezone and are converted to UTC before sending
-        to the gateway (which interprets all times as UTC).
-
-        Args:
-            tag_paths: Tag paths with provider prefix, e.g. ['[default]Folder/Tag'].
-            start_date: ISO datetime string (local) or epoch ms.
-            end_date: ISO datetime string (local) or epoch ms.
-            return_size: Max rows to return (default 100).
-            aggregation_mode: Average, MinMax, LastValue, Sum, Minimum, Maximum.
-            return_format: Wide or Tall.
-            interval_minutes: Aggregation interval in minutes.
-            include_bounding_values: Include values at boundaries.
         """
-        normalised = [self._ensure_provider_prefix(p) for p in tag_paths]
+        Query historical tag values from the WebDev queryTagHistory endpoint.
+
+        Dates may be passed as local ISO strings; they are converted to UTC
+        to match Ignition endpoint expectations.
+        """
+        if not tag_paths:
+            return {"error": "No tag paths provided", "tagPaths": []}
 
+        normalised = [self._ensure_provider_prefix(p) for p in tag_paths]
         utc_start = self._local_iso_to_utc(start_date)
         utc_end = self._local_iso_to_utc(end_date)
 
@@ -304,19 +303,17 @@ def query_tag_history(
             "tagPaths": ",".join(normalised),
             "startDate": utc_start,
             "endDate": utc_end,
-            "returnSize": return_size,
+            "returnSize": int(return_size),
             "aggregationMode": aggregation_mode,
             "returnFormat": return_format,
-            "includeBoundingValues": str(include_bounding_values).lower(),
+            "includeBoundingValues": str(bool(include_bounding_values)).lower(),
         }
         if interval_minutes is not None:
-            params["intervalMinutes"] = interval_minutes
+            params["intervalMinutes"] = int(interval_minutes)
 
         data = self._get("system/webdev/Axilon/queryTagHistory", params=params)
-
         if data is None:
             return {"error": "API request failed or not configured", "tagPaths": normalised}
-
         return data
 
     # --------------------------------------------------------------------- #
diff --git a/scripts/neo4j_ontology.py b/scripts/neo4j_ontology.py
index 110719f..380e3cb 100644
--- a/scripts/neo4j_ontology.py
+++ b/scripts/neo4j_ontology.py
@@ -9,7 +9,11 @@
 from typing import Dict, List, Optional, Any, Union
 from dataclasses import dataclass, field
 from contextlib import contextmanager
-from dotenv import load_dotenv
+try:
+    from dotenv import load_dotenv
+except ImportError:  # pragma: no cover - optional fallback for minimal envs
+    def load_dotenv(*_args, **_kwargs):
+        return False
 from neo4j import GraphDatabase, Driver, Session
 
 
@@ -147,6 +151,8 @@ def create_indexes(self) -> None:
                 "CREATE CONSTRAINT project_name IF NOT EXISTS FOR (p:Project) REQUIRE p.name IS UNIQUE",
                 "CREATE CONSTRAINT script_name IF NOT EXISTS FOR (s:Script) REQUIRE s.name IS UNIQUE",
                 "CREATE CONSTRAINT namedquery_name IF NOT EXISTS FOR (q:NamedQuery) REQUIRE q.name IS UNIQUE",
+                "CREATE CONSTRAINT agentrun_id IF NOT EXISTS FOR (r:AgentRun) REQUIRE r.run_id IS UNIQUE",
+                "CREATE CONSTRAINT anomalyevent_id IF NOT EXISTS FOR (e:AnomalyEvent) REQUIRE e.event_id IS UNIQUE",
             ]
 
             # Regular indexes
@@ -186,6 +192,11 @@ def create_indexes(self) -> None:
                 "CREATE INDEX hmitextlist_name IF NOT EXISTS FOR (htl:HMITextList) ON (htl.name)",
                 "CREATE INDEX plctagtable_name IF NOT EXISTS FOR (pt:PLCTagTable) ON (pt.name)",
                 "CREATE INDEX plctag_name IF NOT EXISTS FOR (ptg:PLCTag) ON (ptg.name)",
+                # Agent monitoring indexes
+                "CREATE INDEX anomalyevent_created IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.created_at)",
+                "CREATE INDEX anomalyevent_state IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.state)",
+                "CREATE INDEX anomalyevent_severity IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.severity)",
+                "CREATE INDEX anomalyevent_dedup_key IF NOT EXISTS FOR (e:AnomalyEvent) ON (e.dedup_key)",
             ]
 
             for constraint in constraints:
@@ -202,6 +213,95 @@ def create_indexes(self) -> None:
                     if "already exists" not in str(e).lower():
                         print(f"[WARNING] Index error: {e}")
 
+    def init_agent_monitoring_schema(self) -> None:
+        """Ensure agent monitoring labels and indexes exist."""
+        self.create_indexes()
+
+    def list_anomaly_events(
+        self,
+        limit: int = 100,
+        state: Optional[str] = None,
+        severity: Optional[str] = None,
+        run_id: Optional[str] = None,
+    ) -> List[Dict[str, Any]]:
+        """List persisted anomaly events for UI feeds."""
+        with self.session() as session:
+            clauses = []
+            params: Dict[str, Any] = {"limit": max(1, min(limit, 500))}
+            if state:
+                clauses.append("e.state = $state")
+                params["state"] = state
+            if severity:
+                clauses.append("e.severity = $severity")
+                params["severity"] = severity
+            if run_id:
+                clauses.append("e.run_id = $run_id")
+                params["run_id"] = run_id
+            where = f"WHERE {' AND '.join(clauses)}" if clauses else ""
+            query = f"""
+                MATCH (e:AnomalyEvent)
+                {where}
+                OPTIONAL MATCH (e)-[:OBSERVED_ON]->(t:ScadaTag)
+                OPTIONAL MATCH (e)-[:AFFECTS]->(eq:Equipment)
+                RETURN e, collect(DISTINCT t.name) AS tags, collect(DISTINCT eq.name) AS equipment
+                ORDER BY e.created_at DESC
+                LIMIT $limit
+            """
+            result = session.run(query, **params)
+            events: List[Dict[str, Any]] = []
+            for record in result:
+                node = record["e"]
+                props = dict(node)
+                props["tags"] = [x for x in record["tags"] if x]
+                props["equipment"] = [x for x in record["equipment"] if x]
+                events.append(props)
+            return events
+
+    def get_anomaly_event(self, event_id: str) -> Optional[Dict[str, Any]]:
+        """Get one anomaly event with linked context labels."""
+        with self.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                OPTIONAL MATCH (e)-[:OBSERVED_ON]->(t:ScadaTag)
+                OPTIONAL MATCH (e)-[:AFFECTS]->(eq:Equipment)
+                OPTIONAL MATCH (e)-[r:RELATED_TO]->(n)
+                RETURN e,
+                       collect(DISTINCT t.name) AS tags,
+                       collect(DISTINCT eq.name) AS equipment,
+                       collect(DISTINCT {type: type(r), label: labels(n)[0], name: coalesce(n.name, n.symptom, n.phrase)}) AS related
+                LIMIT 1
+                """,
+                event_id=event_id,
+            )
+            record = result.single()
+            if not record:
+                return None
+            data = dict(record["e"])
+            data["tags"] = [x for x in record["tags"] if x]
+            data["equipment"] = [x for x in record["equipment"] if x]
+            data["related"] = [
+                x for x in record["related"] if x and x.get("name")
+            ]
+            return data
+
+    def cleanup_anomaly_events(self, retention_days: int = 14) -> int:
+        """Delete old anomaly events outside retention window."""
+        with self.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent)
+                WHERE e.created_at IS NOT NULL
+                  AND datetime(e.created_at) < datetime() - duration({days: $days})
+                WITH collect(e) AS old_events
+                FOREACH (n IN old_events | DETACH DELETE n)
+                RETURN size(old_events) AS deleted
+                """,
+                days=max(1, retention_days),
+            )
+            record = result.single()
+            return int(record["deleted"]) if record else 0
+
     def clear_all(self) -> None:
         """Clear all nodes and relationships. USE WITH CAUTION."""
         with self.session() as session:
@@ -4192,12 +4292,22 @@ def main():
             "tia-projects",
             "tia-project-resources",
             "db-connections",
+            "init-agent-schema",
+            "list-anomaly-events",
+            "get-anomaly-event",
+            "cleanup-anomaly-events",
         ],
         help="Command to execute",
     )
     parser.add_argument("--file", "-f", help="JSON file for import/export")
     parser.add_argument("--query", "-q", help="Query string for search")
     parser.add_argument("--project", "-p", help="Project name for project-resources")
+    parser.add_argument("--event-id", help="Event ID for get-anomaly-event")
+    parser.add_argument("--state", help="Filter anomaly events by state")
+    parser.add_argument("--severity", help="Filter anomaly events by severity")
+    parser.add_argument("--run-id", help="Filter anomaly events by run_id")
+    parser.add_argument("--limit", type=int, default=100, help="Limit results for list commands")
+    parser.add_argument("--retention-days", type=int, default=14, help="Retention window in days")
     parser.add_argument("--json", action="store_true", help="Output in JSON format")
     parser.add_argument(
         "--enrichment-status",
@@ -4437,7 +4547,43 @@ def main():
                         f"  {c['name']} ({c['database_type']}) "
                         f"- {c['url']} [{enabled}]"
                     )
+        elif args.command == "init-agent-schema":
+            graph.init_agent_monitoring_schema()
+            print("[OK] Initialized agent monitoring schema")
+
+        elif args.command == "list-anomaly-events":
+            events = graph.list_anomaly_events(
+                limit=args.limit,
+                state=args.state,
+                severity=args.severity,
+                run_id=args.run_id,
+            )
+            if args.json:
+                print(json_module.dumps(events))
+            else:
+                print(f"Anomaly events: {len(events)}")
+                for event in events:
+                    print(
+                        f"- {event.get('event_id')} {event.get('severity')} "
+                        f"{event.get('summary', '')[:80]}"
+                    )
+
+        elif args.command == "get-anomaly-event":
+            if not args.event_id:
+                print("[ERROR] --event-id required for get-anomaly-event")
+                return
+            event = graph.get_anomaly_event(args.event_id)
+            if args.json:
+                print(json_module.dumps(event or {}))
+            else:
+                if not event:
+                    print(f"[ERROR] Event not found: {args.event_id}")
+                    return
+                print(json_module.dumps(event, indent=2))
 
+        elif args.command == "cleanup-anomaly-events":
+            deleted = graph.cleanup_anomaly_events(args.retention_days)
+            print(f"[OK] Deleted {deleted} anomaly events older than {args.retention_days} days")
 
 if __name__ == "__main__":
     main()

From 1f1f6b4dc1f7005d8d144d21a10ff2dd77cce070 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 18:06:40 +0000
Subject: [PATCH 02/16] Emit provider failures as anomaly feed events

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py | 183 ++++++++++++++++++++++++++++++++-----
 1 file changed, 161 insertions(+), 22 deletions(-)

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 70a0f4b..9049108 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -263,7 +263,7 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
                 values.append(val)
         return values
 
-    def fetch_history_values(self, tag_path: str) -> List[float]:
+    def fetch_history_values(self, tag_path: str) -> tuple[List[float], Optional[str]]:
         minutes = int(self.config.get("historyWindowMinutes", 360))
         end_dt = datetime.now(timezone.utc)
         start_dt = end_dt - timedelta(minutes=minutes)
@@ -275,7 +275,9 @@ def fetch_history_values(self, tag_path: str) -> List[float]:
             aggregation_mode="Average",
             return_format="Wide",
         )
-        return self._extract_history_values(data, tag_path)
+        if isinstance(data, dict) and data.get("error"):
+            return [], str(data.get("error"))
+        return self._extract_history_values(data, tag_path), None
 
     def get_context(self, tag_path: str) -> Dict[str, Any]:
         with self.graph.session() as session:
@@ -517,6 +519,97 @@ def persist_event(
 
         return event_data
 
+    def _emit_persisted_event(self, persisted: Dict[str, Any]) -> None:
+        """Emit normalized AGENT_EVENT payload for UI stream."""
+        emit("AGENT_EVENT", {
+            "runId": self.run_id,
+            "eventId": persisted["event_id"],
+            "severity": persisted["severity"],
+            "summary": persisted["summary"],
+            "category": persisted.get("category"),
+            "entityRefs": {
+                "tag": persisted.get("tag_name") or persisted.get("source_tag"),
+                "sourceTag": persisted.get("source_tag"),
+            },
+            "createdAt": persisted.get("created_at"),
+        })
+
+    def emit_provider_failure_event(
+        self,
+        code: str,
+        message: str,
+        *,
+        severity: str = "high",
+        category: str = "quality-issue",
+        source_tag: Optional[str] = None,
+        details: Optional[Dict[str, Any]] = None,
+    ) -> bool:
+        """
+        Persist and stream provider-health anomalies so failures appear in feed.
+
+        Returns:
+            True if a new event was persisted (false if deduped).
+        """
+        emit("AGENT_ERROR", {
+            "runId": self.run_id,
+            "code": code,
+            "message": message,
+            "recoverable": True,
+            "timestamp": utc_now_iso(),
+        })
+
+        tag = source_tag or f"provider://{code}"
+        detail_blob = json.dumps(details or {}, default=str)
+        context = {
+            "tag_path": tag,
+            "tag_name": source_tag or "ProviderHealth",
+            "equipment": [],
+            "symptoms": [],
+            "causes": [],
+            "patterns": [],
+            "safety": [],
+        }
+        deterministic = {
+            "candidate": True,
+            "reasons": [code],
+            "category": category,
+            "z_score": 0.0,
+            "mad_score": 0.0,
+            "delta_rate": 0.0,
+            "window_volatility": 0.0,
+            "history_points": 0,
+        }
+        triage = {
+            "summary": message,
+            "category": category,
+            "severity": severity,
+            "confidence": 0.9,
+            "probable_causes": [message],
+            "verification_checks": [
+                "Check Ignition gateway connectivity and credentials.",
+                "Validate tag provider availability and endpoint health.",
+            ],
+            "safety_notes": [],
+            "rationale": f"Provider health event ({code}). Details: {detail_blob}",
+            "related_entities": [],
+        }
+        persisted = self.persist_event(
+            context=context,
+            deterministic=deterministic,
+            live_sample={
+                "path": tag,
+                "value": "",
+                "quality": "Bad",
+                "timestamp": utc_now_iso(),
+                "data_type": "provider_health",
+            },
+            triage=triage,
+        )
+        if persisted:
+            self._emit_persisted_event(persisted)
+            return True
+        return False
+
     # -----------------------------
     # Monitoring loop
     # -----------------------------
@@ -527,13 +620,14 @@ def run_cycle(self) -> Dict[str, Any]:
         min_history = int(self.config.get("minHistoryPoints", 30))
 
         if not self.api.is_configured:
-            emit("AGENT_ERROR", {
-                "runId": self.run_id,
-                "code": "ignition_not_configured",
-                "message": "Ignition API URL/token not configured.",
-                "recoverable": True,
-                "timestamp": utc_now_iso(),
-            })
+            emitted = self.emit_provider_failure_event(
+                "ignition_not_configured",
+                "Ignition API URL/token not configured.",
+                severity="critical",
+                category="state-conflict",
+            )
+            if emitted:
+                metrics["emitted"] += 1
             metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
             return metrics
 
@@ -553,17 +647,31 @@ def run_cycle(self) -> Dict[str, Any]:
         live_values = self.api.read_tags(tag_paths)
         candidates: List[Dict[str, Any]] = []
         now = datetime.now(timezone.utc)
+        live_error_count = 0
+        live_error_samples: List[str] = []
+        history_error_count = 0
+        history_error_samples: List[str] = []
+        valid_live_count = 0
 
         for tv in live_values:
             if tv.error:
+                live_error_count += 1
+                if len(live_error_samples) < 5:
+                    live_error_samples.append(f"{tv.path}: {tv.error}")
                 continue
+            valid_live_count += 1
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
                 continue
             if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
                 continue
 
-            history = self.fetch_history_values(tv.path)
+            history, history_error = self.fetch_history_values(tv.path)
+            if history_error:
+                history_error_count += 1
+                if len(history_error_samples) < 5:
+                    history_error_samples.append(f"{tv.path}: {history_error}")
+                continue
             if len(history) < min_history:
                 continue
 
@@ -594,6 +702,48 @@ def run_cycle(self) -> Dict[str, Any]:
                     }
                 )
 
+        if live_values and live_error_count == len(live_values):
+            emitted = self.emit_provider_failure_event(
+                "live_tag_provider_failed",
+                f"Live tag provider failed for all reads ({live_error_count}/{len(live_values)}).",
+                severity="high",
+                category="quality-issue",
+                details={"samples": live_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+        elif live_error_count > 0:
+            emitted = self.emit_provider_failure_event(
+                "live_tag_provider_partial_failure",
+                f"Live tag provider partially failed ({live_error_count}/{len(live_values)} reads).",
+                severity="medium",
+                category="quality-issue",
+                details={"samples": live_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
+        if valid_live_count > 0 and history_error_count >= max(1, int(valid_live_count * 0.8)):
+            emitted = self.emit_provider_failure_event(
+                "history_provider_failed",
+                f"History provider failed for most queries ({history_error_count}/{valid_live_count}).",
+                severity="high",
+                category="quality-issue",
+                details={"samples": history_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+        elif history_error_count > 0:
+            emitted = self.emit_provider_failure_event(
+                "history_provider_partial_failure",
+                f"History provider partially failed ({history_error_count}/{valid_live_count}).",
+                severity="medium",
+                category="quality-issue",
+                details={"samples": history_error_samples},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
         metrics["candidates"] = len(candidates)
         max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
         max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
@@ -629,18 +779,7 @@ def run_cycle(self) -> Dict[str, Any]:
             )
             if persisted:
                 metrics["emitted"] += 1
-                emit("AGENT_EVENT", {
-                    "runId": self.run_id,
-                    "eventId": persisted["event_id"],
-                    "severity": persisted["severity"],
-                    "summary": persisted["summary"],
-                    "category": persisted.get("category"),
-                    "entityRefs": {
-                        "tag": persisted.get("tag_name") or persisted.get("source_tag"),
-                        "sourceTag": persisted.get("source_tag"),
-                    },
-                    "createdAt": persisted.get("created_at"),
-                })
+                self._emit_persisted_event(persisted)
 
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics

From 1a17e651432ab28c7109ef46bc5fc05987e4be53 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 18:19:17 +0000
Subject: [PATCH 03/16] Migrate tests to pytest with ingest coverage

Co-authored-by: leor <leor@fortresslabs.com>
---
 .gitignore                                    |   3 +-
 pytest.ini                                    |   4 +
 requirements-dev.txt                          |   1 +
 tests/README.md                               |  46 +++++
 tests/conftest.py                             |  35 ++++
 .../integration/simulated_ignition_server.py  | 170 ++++++++++++++++++
 .../integration/test_live_value_sim_server.py |  75 ++++++++
 tests/unit/test_anomaly_rules.py              |  64 +++++++
 tests/unit/test_ingest_siemens_parser.py      |  72 ++++++++
 tests/unit/test_ingest_workbench_parser.py    | 119 ++++++++++++
 10 files changed, 587 insertions(+), 2 deletions(-)
 create mode 100644 pytest.ini
 create mode 100644 requirements-dev.txt
 create mode 100644 tests/README.md
 create mode 100644 tests/conftest.py
 create mode 100644 tests/integration/simulated_ignition_server.py
 create mode 100644 tests/integration/test_live_value_sim_server.py
 create mode 100644 tests/unit/test_anomaly_rules.py
 create mode 100644 tests/unit/test_ingest_siemens_parser.py
 create mode 100644 tests/unit/test_ingest_workbench_parser.py

diff --git a/.gitignore b/.gitignore
index 085a6d7..28f5878 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,8 +9,7 @@ venv/
 ENV/
 .venv
 
-# Test files and outputs
-tests/
+# Test outputs
 *_updated*.xml
 *_applied*.xml
 *_diffs/
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..3b2c446
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+addopts = -q
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..e079f8a
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1 @@
+pytest
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..350a8d4
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,46 @@
+# Test Flow: Agents Monitoring + Ingest
+
+This repository now includes a lightweight test scaffold using `pytest`.
+
+## Layout
+
+- `tests/unit/`
+  - `test_anomaly_rules.py`  
+    Unit tests for deterministic anomaly scoring and quality/staleness gates.
+  - `test_ingest_workbench_parser.py`  
+    Unit tests for workbench ingest parsing.
+  - `test_ingest_siemens_parser.py`  
+    Unit tests for Siemens `.st` ingest parsing.
+
+- `tests/integration/`
+  - `simulated_ignition_server.py`  
+    Local simulated live/history webserver implementing:
+    - `/system/webdev/Axilon/getTags`
+    - `/system/webdev/Axilon/queryTagHistory`
+  - `test_live_value_sim_server.py`  
+    Integration tests for `IgnitionApiClient` + anomaly scoring with simulated live values.
+
+## Run all tests
+
+```bash
+python3 -m pytest
+```
+
+## Run only unit tests
+
+```bash
+python3 -m pytest tests/unit
+```
+
+## Run only integration tests
+
+```bash
+python3 -m pytest tests/integration
+```
+
+## Notes
+
+- Integration tests are fully local and do **not** require a real Ignition gateway.
+- LLM services are not required for these tests.
+- Neo4j is not required for this test suite.
+
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..5b51088
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+SCRIPTS_DIR = REPO_ROOT / "scripts"
+INTEGRATION_DIR = REPO_ROOT / "tests" / "integration"
+
+for path in (SCRIPTS_DIR, INTEGRATION_DIR):
+    path_str = str(path)
+    if path_str not in sys.path:
+        sys.path.insert(0, path_str)
+
+
+@pytest.fixture
+def sim_ignition():
+    from simulated_ignition_server import (
+        start_simulated_ignition_server,
+        stop_simulated_ignition_server,
+    )
+
+    server, thread, state, base_url = start_simulated_ignition_server()
+    try:
+        yield {
+            "server": server,
+            "thread": thread,
+            "state": state,
+            "base_url": base_url,
+        }
+    finally:
+        stop_simulated_ignition_server(server, thread)
diff --git a/tests/integration/simulated_ignition_server.py b/tests/integration/simulated_ignition_server.py
new file mode 100644
index 0000000..607f316
--- /dev/null
+++ b/tests/integration/simulated_ignition_server.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+"""
+Simulated Ignition WebDev endpoints for local integration tests.
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta, timezone
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from typing import Dict, List, Tuple
+from urllib.parse import parse_qs, urlparse
+
+
+def _utc_iso(offset_minutes: int = 0) -> str:
+    return (datetime.now(timezone.utc) + timedelta(minutes=offset_minutes)).isoformat()
+
+
+@dataclass
+class SimulatedIgnitionState:
+    fail_live_reads: bool = False
+    fail_history_reads: bool = False
+    live_tags: Dict[str, Dict] = field(default_factory=dict)
+    tag_history: Dict[str, List[Tuple[str, float]]] = field(default_factory=dict)
+
+    def __post_init__(self) -> None:
+        if not self.live_tags:
+            self.live_tags = {
+                "[default]Line/Throughput": {
+                    "value": 95.0,
+                    "quality": "Good",
+                    "timestamp": _utc_iso(),
+                    "dataType": "Float8",
+                },
+                "[default]Line/Temperature": {
+                    "value": 42.0,
+                    "quality": "Good",
+                    "timestamp": _utc_iso(),
+                    "dataType": "Float8",
+                },
+            }
+        if not self.tag_history:
+            base = [49.9, 50.1, 50.0, 50.2, 50.1, 49.8, 50.3, 50.0, 49.9, 50.2]
+            self.tag_history = {
+                "[default]Line/Throughput": [
+                    (_utc_iso(offset_minutes=-(len(base) - i)), value)
+                    for i, value in enumerate(base)
+                ],
+                "[default]Line/Temperature": [
+                    (_utc_iso(offset_minutes=-(len(base) - i)), 41.5 + (i * 0.1))
+                    for i in range(len(base))
+                ],
+            }
+
+
+class _IgnitionHandler(BaseHTTPRequestHandler):
+    state: SimulatedIgnitionState
+
+    def _send_json(self, payload, status: int = 200) -> None:
+        body = json.dumps(payload).encode("utf-8")
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def do_GET(self):  # noqa: N802 - BaseHTTPRequestHandler naming
+        parsed = urlparse(self.path)
+        path = parsed.path
+        query = parse_qs(parsed.query)
+
+        if path == "/system/webdev/Axilon/getTags":
+            if self.state.fail_live_reads:
+                self._send_json({"error": "simulated live provider failure"}, status=503)
+                return
+
+            raw = query.get("tagPaths", [""])[0]
+            tag_paths = [p.strip() for p in raw.split(",") if p.strip()]
+            tags = []
+            for tag_path in tag_paths:
+                data = self.state.live_tags.get(tag_path)
+                if not data:
+                    tags.append(
+                        {
+                            "tagPath": tag_path,
+                            "value": None,
+                            "quality": "Bad",
+                            "isGood": False,
+                            "timestamp": _utc_iso(),
+                            "dataType": "Unknown",
+                        }
+                    )
+                    continue
+                tags.append(
+                    {
+                        "tagPath": tag_path,
+                        "value": data.get("value"),
+                        "quality": data.get("quality", "Good"),
+                        "isGood": str(data.get("quality", "Good")).lower() == "good",
+                        "timestamp": data.get("timestamp", _utc_iso()),
+                        "dataType": data.get("dataType", "Unknown"),
+                    }
+                )
+            self._send_json({"success": True, "count": len(tags), "tags": tags})
+            return
+
+        if path == "/system/webdev/Axilon/queryTagHistory":
+            if self.state.fail_history_reads:
+                self._send_json({"error": "simulated history provider failure"}, status=503)
+                return
+
+            raw = query.get("tagPaths", [""])[0]
+            tag_paths = [p.strip() for p in raw.split(",") if p.strip()]
+            rows = []
+
+            primary_path = tag_paths[0] if tag_paths else "[default]Line/Throughput"
+            primary_hist = self.state.tag_history.get(primary_path, [])
+            for ts, _ in primary_hist:
+                row = {"timestamp": ts}
+                for tag_path in tag_paths:
+                    values = self.state.tag_history.get(tag_path, [])
+                    match_val = None
+                    for hist_ts, hist_val in values:
+                        if hist_ts == ts:
+                            match_val = hist_val
+                            break
+                    if match_val is None and values:
+                        match_val = values[-1][1]
+                    row[tag_path] = match_val
+                rows.append(row)
+
+            self._send_json(
+                {
+                    "success": True,
+                    "rows": rows,
+                    "tagPaths": tag_paths,
+                    "returnFormat": "Wide",
+                }
+            )
+            return
+
+        self._send_json({"error": f"unsupported endpoint: {path}"}, status=404)
+
+    def log_message(self, format, *args):  # noqa: A003 - stdlib signature
+        # Silence default HTTP request logs during tests.
+        return
+
+
+def start_simulated_ignition_server() -> tuple[HTTPServer, threading.Thread, SimulatedIgnitionState, str]:
+    state = SimulatedIgnitionState()
+    handler_cls = type(
+        "IgnitionHandlerWithState",
+        (_IgnitionHandler,),
+        {"state": state},
+    )
+    server = HTTPServer(("127.0.0.1", 0), handler_cls)
+    thread = threading.Thread(target=server.serve_forever, daemon=True)
+    thread.start()
+    host, port = server.server_address
+    base_url = f"http://{host}:{port}"
+    return server, thread, state, base_url
+
+
+def stop_simulated_ignition_server(server: HTTPServer, thread: threading.Thread) -> None:
+    server.shutdown()
+    server.server_close()
+    thread.join(timeout=3)
+
diff --git a/tests/integration/test_live_value_sim_server.py b/tests/integration/test_live_value_sim_server.py
new file mode 100644
index 0000000..d6feeea
--- /dev/null
+++ b/tests/integration/test_live_value_sim_server.py
@@ -0,0 +1,75 @@
+from datetime import datetime, timedelta, timezone
+
+from anomaly_rules import compute_deviation_scores
+from ignition_api_client import IgnitionApiClient
+
+def test_read_tags_history_and_detect_spike(sim_ignition):
+    state = sim_ignition["state"]
+    state.fail_live_reads = False
+    state.fail_history_reads = False
+
+    client = IgnitionApiClient(base_url=sim_ignition["base_url"], api_token="token")
+    try:
+        tag_path = "[default]Line/Throughput"
+        tv = client.read_tag(tag_path)
+        assert tv.error is None
+        assert tv.quality == "Good"
+        assert float(tv.value) == 95.0
+
+        start = (datetime.now(timezone.utc) - timedelta(hours=1)).replace(microsecond=0).isoformat()
+        end = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
+        history = client.query_tag_history([tag_path], start, end, return_size=100)
+        assert isinstance(history, dict)
+        assert "rows" in history
+
+        history_values = [
+            row[tag_path]
+            for row in history["rows"]
+            if isinstance(row, dict) and tag_path in row and row[tag_path] is not None
+        ]
+        assert len(history_values) > 5
+
+        score = compute_deviation_scores(
+            current_value=tv.value,
+            history_values=history_values,
+            prev_value=55.0,
+            thresholds={"z": 3.0, "mad": 3.5, "rate": 10.0},
+        )
+        assert score["candidate"]
+        assert score["category"] in {"spike", "deviation", "drift"}
+    finally:
+        client.close()
+
+
+def test_live_provider_failure_surfaces_as_read_error(sim_ignition):
+    state = sim_ignition["state"]
+    state.fail_live_reads = True
+
+    client = IgnitionApiClient(base_url=sim_ignition["base_url"], api_token="token")
+    try:
+        tv = client.read_tag("[default]Line/Throughput")
+        assert tv.error is not None
+        assert "failed" in tv.error.lower()
+    finally:
+        client.close()
+
+
+def test_history_provider_failure_surfaces_error_payload(sim_ignition):
+    state = sim_ignition["state"]
+    state.fail_history_reads = True
+
+    client = IgnitionApiClient(base_url=sim_ignition["base_url"], api_token="token")
+    try:
+        start = (datetime.now(timezone.utc) - timedelta(hours=1)).replace(microsecond=0).isoformat()
+        end = datetime.now(timezone.utc).replace(microsecond=0).isoformat()
+        history = client.query_tag_history(
+            ["[default]Line/Throughput"],
+            start,
+            end,
+            return_size=100,
+        )
+        assert isinstance(history, dict)
+        assert "error" in history
+    finally:
+        client.close()
+
diff --git a/tests/unit/test_anomaly_rules.py b/tests/unit/test_anomaly_rules.py
new file mode 100644
index 0000000..e5f2af1
--- /dev/null
+++ b/tests/unit/test_anomaly_rules.py
@@ -0,0 +1,64 @@
+from datetime import datetime, timedelta, timezone
+
+import pytest
+
+from anomaly_rules import compute_deviation_scores, is_quality_good, is_stale
+
+
+def test_detects_sharp_rise_and_sharp_drop():
+    baseline = [50.0, 49.9, 50.1, 50.2, 49.8, 50.0, 50.1, 49.9, 50.0, 50.2] * 3
+
+    rise = compute_deviation_scores(
+        current_value=95.0,
+        history_values=baseline,
+        prev_value=52.0,
+        thresholds={"z": 3.0, "mad": 3.5, "rate": 10.0},
+    )
+    drop = compute_deviation_scores(
+        current_value=12.0,
+        history_values=baseline,
+        prev_value=49.0,
+        thresholds={"z": 3.0, "mad": 3.5, "rate": 10.0},
+    )
+
+    assert rise["candidate"]
+    assert drop["candidate"]
+
+
+def test_detects_flatline_stuck_pattern():
+    flat = [72.0] * 30
+    result = compute_deviation_scores(
+        current_value=72.0,
+        history_values=flat,
+        prev_value=72.0,
+        thresholds={"z": 3.0, "mad": 3.5, "rate": 1.0, "stuck_window_size": 20},
+    )
+    assert result["candidate"]
+    assert "flatline_detected" in result["reasons"]
+    assert result["category"] == "stuck"
+
+
+@pytest.mark.parametrize(
+    "quality,expected",
+    [("Good", True), ("OK", True), ("Bad", False), (None, False)],
+)
+def test_quality_helper(quality, expected):
+    assert is_quality_good(quality) is expected
+
+
+def test_staleness_helper():
+    recent_ts = datetime.now(timezone.utc).isoformat()
+    old_ts = (datetime.now(timezone.utc) - timedelta(minutes=15)).isoformat()
+    assert not is_stale(recent_ts, staleness_sec=300)
+    assert is_stale(old_ts, staleness_sec=300)
+
+
+def test_non_numeric_current_value_is_rejected():
+    result = compute_deviation_scores(
+        current_value="not-a-number",
+        history_values=[1, 2, 3, 4, 5],
+        prev_value=3,
+    )
+    assert not result["candidate"]
+    assert result["category"] == "invalid_value"
+
diff --git a/tests/unit/test_ingest_siemens_parser.py b/tests/unit/test_ingest_siemens_parser.py
new file mode 100644
index 0000000..935bf71
--- /dev/null
+++ b/tests/unit/test_ingest_siemens_parser.py
@@ -0,0 +1,72 @@
+from pathlib import Path
+
+from siemens_parser import SiemensSTParser
+
+
+SAMPLE_ST = """
+NAMESPACE Plant.Process
+
+TYPE MotorData : STRUCT
+    Speed : REAL;
+END_STRUCT
+END_TYPE
+
+CLASS MotorFB
+VAR_INPUT
+    StartCmd : BOOL; // start command
+END_VAR
+VAR_OUTPUT
+    Running : BOOL;
+END_VAR
+METHOD PUBLIC Execute : BOOL
+VAR
+    tempVar : INT := 1;
+END_VAR
+Running := StartCmd;
+END_METHOD
+END_CLASS
+
+PROGRAM MainProgram
+VAR
+    Counter : INT := 0;
+END_VAR
+Counter := Counter + 1;
+END_PROGRAM
+
+CONFIGURATION Config1
+TASK MainTask(INTERVAL := T#100MS, PRIORITY := 1);
+PROGRAM PLC_PRG WITH MainTask: MainProgram;
+END_CONFIGURATION
+
+END_NAMESPACE
+"""
+
+
+def test_parse_structured_text_blocks(tmp_path):
+    st_path = Path(tmp_path) / "sample.st"
+    st_path.write_text(SAMPLE_ST, encoding="utf-8")
+
+    parser = SiemensSTParser()
+    blocks = parser.parse_file(str(st_path))
+    assert len(blocks) >= 4
+
+    by_name = {b.name: b for b in blocks}
+    assert "MotorData" in by_name
+    assert by_name["MotorData"].type == "UDT"
+    assert by_name["MotorData"].local_tags[0].name == "Speed"
+
+    assert "MotorFB" in by_name
+    fb = by_name["MotorFB"]
+    assert fb.type == "FB"
+    assert any(t.name == "StartCmd" and t.direction == "INPUT" for t in fb.input_tags)
+    assert any(t.name == "Running" and t.direction == "OUTPUT" for t in fb.output_tags)
+    assert any(r["name"] == "Execute" for r in fb.routines)
+
+    assert "MainProgram" in by_name
+    assert by_name["MainProgram"].type == "PROGRAM"
+    assert "Counter := Counter + 1" in by_name["MainProgram"].raw_implementation
+
+    assert "Config1" in by_name
+    assert by_name["Config1"].type == "CONFIGURATION"
+    assert "MainTask" in by_name["Config1"].description
+
diff --git a/tests/unit/test_ingest_workbench_parser.py b/tests/unit/test_ingest_workbench_parser.py
new file mode 100644
index 0000000..7609490
--- /dev/null
+++ b/tests/unit/test_ingest_workbench_parser.py
@@ -0,0 +1,119 @@
+import json
+from pathlib import Path
+
+from workbench_parser import WorkbenchParser
+
+
+def test_parse_workbench_project_json_with_inline_resources(tmp_path):
+    root = Path(tmp_path)
+
+    # Script file expected by WorkbenchParser._read_script_file
+    script_file = root / "scripts" / "PlantA" / "utility" / "tags" / "code.py"
+    script_file.parent.mkdir(parents=True, exist_ok=True)
+    script_file.write_text("def read_tag():\n    return 42\n", encoding="utf-8")
+
+    data = {
+        "__typeName": "WorkbenchState",
+        "version": "1.2.3",
+        "root": {
+            "windows": [
+                {
+                    "projectName": "PlantA",
+                    "title": "MainView",
+                    "path": "main/view",
+                    "windowType": "perspective",
+                    "rootContainer": {
+                        "meta": {"name": "Root"},
+                        "type": "ia.container",
+                        "propConfig": {
+                            "props.value": {
+                                "binding": {
+                                    "type": "tag",
+                                    "config": {
+                                        "tagPath": "[default]Line/Speed",
+                                        "bidirectional": True,
+                                    },
+                                }
+                            }
+                        },
+                        "children": [],
+                    },
+                }
+            ],
+            "namedQueries": [
+                {
+                    "projectName": "PlantA",
+                    "queryName": "GetBatches",
+                    "folderPath": "Prod\\Ops",
+                    "query": "SELECT * FROM batches",
+                }
+            ],
+            "scripts": [
+                {
+                    "projectName": "PlantA",
+                    "path": ["utility", "tags"],
+                    "scope": "A",
+                }
+            ],
+            "tags": [
+                {
+                    "name": "LineSpeed",
+                    "type": "Opc",
+                    "dataType": "Float8",
+                    "opcItemPath": "[default]Line/Speed",
+                },
+                {
+                    "name": "BatchCount",
+                    "type": "Memory",
+                    "dataType": "Int4",
+                    "value": 7,
+                },
+            ],
+            "udtDefinitions": [
+                {
+                    "name": "MotorUDT",
+                    "id": "MotorUDT",
+                    "parameters": {
+                        "area": {"dataType": "String", "value": "A1"}
+                    },
+                    "members": [
+                        {
+                            "name": "Run",
+                            "type": "opc",
+                            "dataType": "Boolean",
+                            "opcItemPath": "[default]Motor/Run",
+                            "serverName": {"binding": "default"},
+                        }
+                    ],
+                }
+            ],
+        },
+    }
+
+    project_json = root / "project.json"
+    project_json.write_text(json.dumps(data), encoding="utf-8")
+
+    parser = WorkbenchParser()
+    backup = parser.parse_file(str(project_json))
+
+    assert "PlantA" in backup.projects
+    assert len(backup.windows) == 1
+    assert backup.windows[0].name == "MainView"
+    assert backup.windows[0].components[0].bindings[0].target == "[default]Line/Speed"
+
+    assert len(backup.named_queries) == 1
+    assert backup.named_queries[0].id == "Prod/Ops/GetBatches"
+    assert "SELECT" in backup.named_queries[0].query_text
+
+    assert len(backup.scripts) == 1
+    assert "return 42" in backup.scripts[0].script_text
+
+    tag_types = {t.name: t.tag_type for t in backup.tags}
+    assert tag_types["LineSpeed"] == "opc"
+    assert tag_types["BatchCount"] == "memory"
+
+    assert len(backup.udt_definitions) == 1
+    udt = backup.udt_definitions[0]
+    assert "area" in udt.parameters
+    assert udt.members[0].server_name == "default"
+

From e9ca37d4bbd907fd258af0ebca7e3122dcaa1d1c Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:16:19 +0000
Subject: [PATCH 04/16] Improve anomaly visibility and clear acknowledged
 events

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/index.html     |   1 +
 electron-ui/main.js        |  11 ++++
 electron-ui/preload.js     |   1 +
 electron-ui/renderer.js    |  27 ++++++++--
 scripts/anomaly_monitor.py | 101 ++++++++++++++++++++++++++++++++++++-
 5 files changed, 136 insertions(+), 5 deletions(-)

diff --git a/electron-ui/index.html b/electron-ui/index.html
index 7e5e8a7..99ba9a1 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -593,6 +593,7 @@ <h3>Anomaly Feed</h3>
                   <option value="">All states</option>
                   <option value="open">Open</option>
                   <option value="acknowledged">Acknowledged</option>
+                  <option value="cleared">Cleared</option>
                 </select>
                 <select class="input input-sm" id="agents-filter-severity">
                   <option value="">All severity</option>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index e215fb4..8796380 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -1687,6 +1687,17 @@ ipcMain.handle('agents:ack-event', async (event, eventId, note = '') => {
   }
 });
 
+ipcMain.handle('agents:clear-event', async (event, eventId, note = '') => {
+  try {
+    const args = ['clear-event', '--event-id', String(eventId)];
+    if (note) args.push('--note', String(note));
+    const output = await runPythonScript('anomaly_monitor.py', args);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
 ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
   try {
     const output = await runPythonScript('anomaly_monitor.py', [
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index 1e0930c..e94b546 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -78,6 +78,7 @@ contextBridge.exposeInMainWorld('api', {
   agentsListEvents: (filters) => ipcRenderer.invoke('agents:list-events', filters),
   agentsGetEvent: (eventId) => ipcRenderer.invoke('agents:get-event', eventId),
   agentsAckEvent: (eventId, note) => ipcRenderer.invoke('agents:ack-event', eventId, note),
+  agentsClearEvent: (eventId, note) => ipcRenderer.invoke('agents:clear-event', eventId, note),
   agentsCleanup: (retentionDays) => ipcRenderer.invoke('agents:cleanup', retentionDays),
   
   // Database connections
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index cab7e8b..fef65a6 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3742,7 +3742,19 @@ function renderAgentEventDetails(event) {
   `;
 
   if (el.btnOpenGraph) el.btnOpenGraph.disabled = !resolveAgentGraphTarget(event);
-  if (el.btnAck) el.btnAck.disabled = event.state === 'acknowledged';
+  if (el.btnAck) {
+    const state = String(event.state || '').toLowerCase();
+    if (state === 'acknowledged') {
+      el.btnAck.textContent = 'Clear';
+      el.btnAck.disabled = false;
+    } else if (state === 'cleared') {
+      el.btnAck.textContent = 'Cleared';
+      el.btnAck.disabled = true;
+    } else {
+      el.btnAck.textContent = 'Acknowledge';
+      el.btnAck.disabled = false;
+    }
+  }
 }
 
 async function selectAgentEvent(eventId) {
@@ -3826,11 +3838,15 @@ async function stopAgentsMonitoring() {
 
 async function acknowledgeSelectedAgentEvent() {
   if (!agentsState.selectedEventId) return;
-  const result = await window.api.agentsAckEvent(agentsState.selectedEventId, '');
+  const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+  const state = String(selected?.state || '').toLowerCase();
+  const result = state === 'acknowledged'
+    ? await window.api.agentsClearEvent(agentsState.selectedEventId, '')
+    : await window.api.agentsAckEvent(agentsState.selectedEventId, '');
   if (!result.success) return;
   await loadAgentEvents();
-  const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
-  renderAgentEventDetails(selected || null);
+  const refreshed = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
+  renderAgentEventDetails(refreshed || null);
 }
 
 function upsertRealtimeAgentEvent(payload) {
@@ -3864,6 +3880,9 @@ function ensureAgentListeners() {
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
+    if (payload.diagnostics) {
+      console.debug('[Agents diagnostics]', payload.diagnostics);
+    }
   });
 
   window.api.onAgentEvent((payload) => {
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 9049108..46b8b2c 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -615,7 +615,7 @@ def emit_provider_failure_event(
     # -----------------------------
     def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
-        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0}
+        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0, "diagnostics": {}}
         thresholds = self.config.get("thresholds", {})
         min_history = int(self.config.get("minHistoryPoints", 30))
 
@@ -652,6 +652,10 @@ def run_cycle(self) -> Dict[str, Any]:
         history_error_count = 0
         history_error_samples: List[str] = []
         valid_live_count = 0
+        quality_filtered_count = 0
+        stale_filtered_count = 0
+        insufficient_history_count = 0
+        low_history_candidate_count = 0
 
         for tv in live_values:
             if tv.error:
@@ -662,8 +666,10 @@ def run_cycle(self) -> Dict[str, Any]:
             valid_live_count += 1
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
+                quality_filtered_count += 1
                 continue
             if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
+                stale_filtered_count += 1
                 continue
 
             history, history_error = self.fetch_history_values(tv.path)
@@ -673,6 +679,39 @@ def run_cycle(self) -> Dict[str, Any]:
                     history_error_samples.append(f"{tv.path}: {history_error}")
                 continue
             if len(history) < min_history:
+                insufficient_history_count += 1
+                # Low-history fallback: still score dramatic shifts when at least a
+                # small baseline exists, otherwise simulator users see no events.
+                if len(history) >= 5:
+                    prev_val = self._prev_values.get(tv.path)
+                    deterministic = compute_deviation_scores(
+                        current_value=tv.value,
+                        history_values=history,
+                        prev_value=prev_val,
+                        thresholds=thresholds,
+                    )
+                    curr_num = safe_float(tv.value)
+                    if curr_num is not None:
+                        self._prev_values[tv.path] = curr_num
+
+                    if deterministic.get("candidate"):
+                        deterministic["reasons"] = list(deterministic.get("reasons", [])) + ["low_history_override"]
+                        deterministic["history_quality"] = "low"
+                        context = self.get_context(tv.path)
+                        candidates.append(
+                            {
+                                "context": context,
+                                "deterministic": deterministic,
+                                "live_sample": {
+                                    "path": tv.path,
+                                    "value": tv.value,
+                                    "quality": tv.quality,
+                                    "timestamp": tv.timestamp,
+                                    "data_type": tv.data_type,
+                                },
+                            }
+                        )
+                        low_history_candidate_count += 1
                 continue
 
             prev_val = self._prev_values.get(tv.path)
@@ -744,6 +783,28 @@ def run_cycle(self) -> Dict[str, Any]:
             if emitted:
                 metrics["emitted"] += 1
 
+        if valid_live_count > 0 and stale_filtered_count >= max(1, int(valid_live_count * 0.8)):
+            emitted = self.emit_provider_failure_event(
+                "live_timestamp_stale",
+                f"Most live samples were stale ({stale_filtered_count}/{valid_live_count}).",
+                severity="medium",
+                category="quality-issue",
+                details={"staleCount": stale_filtered_count, "validLiveCount": valid_live_count},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
+        if valid_live_count > 0 and quality_filtered_count >= max(1, int(valid_live_count * 0.8)):
+            emitted = self.emit_provider_failure_event(
+                "live_quality_bad",
+                f"Most live samples had non-good quality ({quality_filtered_count}/{valid_live_count}).",
+                severity="medium",
+                category="quality-issue",
+                details={"qualityFilteredCount": quality_filtered_count, "validLiveCount": valid_live_count},
+            )
+            if emitted:
+                metrics["emitted"] += 1
+
         metrics["candidates"] = len(candidates)
         max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
         max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
@@ -781,6 +842,16 @@ def run_cycle(self) -> Dict[str, Any]:
                 metrics["emitted"] += 1
                 self._emit_persisted_event(persisted)
 
+        metrics["diagnostics"] = {
+            "monitoredTags": len(tag_paths),
+            "validLiveCount": valid_live_count,
+            "liveErrorCount": live_error_count,
+            "qualityFilteredCount": quality_filtered_count,
+            "staleFilteredCount": stale_filtered_count,
+            "historyErrorCount": history_error_count,
+            "insufficientHistoryCount": insufficient_history_count,
+            "lowHistoryCandidateCount": low_history_candidate_count,
+        }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics
 
@@ -819,6 +890,7 @@ def run_forever(self) -> int:
                     "candidates": metrics["candidates"],
                     "triaged": metrics["triaged"],
                     "emitted": metrics["emitted"],
+                    "diagnostics": metrics.get("diagnostics", {}),
                     "timestamp": utc_now_iso(),
                 })
                 if self._cycle_count % cleanup_every == 0:
@@ -890,6 +962,25 @@ def ack_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
                 return {"success": False, "error": f"Event not found: {event_id}"}
         return {"success": True, "eventId": event_id}
 
+    def clear_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
+        with self.graph.session() as session:
+            result = session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                SET e.state = 'cleared',
+                    e.cleared_at = datetime(),
+                    e.clear_note = $note,
+                    e.updated_at = datetime()
+                RETURN count(e) AS cnt
+                """,
+                event_id=event_id,
+                note=note or "",
+            )
+            record = result.single()
+            if not record or record["cnt"] == 0:
+                return {"success": False, "error": f"Event not found: {event_id}"}
+        return {"success": True, "eventId": event_id}
+
     def get_status(self, run_id: str) -> Dict[str, Any]:
         with self.graph.session() as session:
             result = session.run(
@@ -990,6 +1081,10 @@ def main() -> int:
     p_ack.add_argument("--event-id", required=True)
     p_ack.add_argument("--note")
 
+    p_clear = sub.add_parser("clear-event", help="Clear one acknowledged anomaly event")
+    p_clear.add_argument("--event-id", required=True)
+    p_clear.add_argument("--note")
+
     p_cleanup = sub.add_parser("cleanup", help="Delete old anomaly events")
     p_cleanup.add_argument("--retention-days", type=int, default=14)
 
@@ -1038,6 +1133,10 @@ def _signal_handler(_signum, _frame):
         print(json.dumps(monitor.ack_event(args.event_id, args.note), default=str))
         return 0
 
+    if args.command == "clear-event":
+        print(json.dumps(monitor.clear_event(args.event_id, args.note), default=str))
+        return 0
+
     if args.command == "cleanup":
         deleted = monitor.graph.cleanup_anomaly_events(args.retention_days)
         print(json.dumps({"success": True, "deleted": deleted}))

From e51cc3b692710a5574bcd1ad50b3329b735c742f Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:28:36 +0000
Subject: [PATCH 05/16] Add automatic subsystem-aware anomaly detection

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js                           |   7 +
 electron-ui/renderer.js                       |  26 +-
 scripts/anomaly_monitor.py                    | 309 ++++++++++++++++--
 tests/unit/test_anomaly_monitor_subsystems.py |  56 ++++
 4 files changed, 377 insertions(+), 21 deletions(-)
 create mode 100644 tests/unit/test_anomaly_monitor_subsystems.py

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 8796380..5ebe28b 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -205,7 +205,9 @@ function normalizeAgentConfig(config = {}) {
     minHistoryPoints: Math.max(10, Number(config.minHistoryPoints || 30)),
     maxMonitoredTags: Math.max(10, Number(config.maxMonitoredTags || 200)),
     maxCandidatesPerCycle: Math.max(1, Number(config.maxCandidatesPerCycle || 25)),
+    maxCandidatesPerSubsystem: Math.max(1, Number(config.maxCandidatesPerSubsystem || 8)),
     maxLlmTriagesPerCycle: Math.max(0, Number(config.maxLlmTriagesPerCycle || 5)),
+    maxLlmTriagesPerSubsystem: Math.max(0, Number(config.maxLlmTriagesPerSubsystem || 2)),
     dedupCooldownMinutes: Math.max(1, Number(config.dedupCooldownMinutes || 10)),
     retentionDays: Math.max(1, Number(config.retentionDays || 14)),
     cleanupEveryCycles: Math.max(1, Number(config.cleanupEveryCycles || 40)),
@@ -221,6 +223,11 @@ function normalizeAgentConfig(config = {}) {
       project: scope.project || null,
       equipmentTags: Array.isArray(scope.equipmentTags) ? scope.equipmentTags : [],
       tagRegex: scope.tagRegex || null,
+      subsystemMode: String(scope.subsystemMode || 'auto').toLowerCase() === 'global' ? 'global' : 'auto',
+      subsystemPriority: Array.isArray(scope.subsystemPriority) && scope.subsystemPriority.length
+        ? scope.subsystemPriority.map(String)
+        : ['view', 'equipment', 'group', 'global'],
+      subsystemInclude: Array.isArray(scope.subsystemInclude) ? scope.subsystemInclude.map(String) : [],
     },
   };
 }
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index fef65a6..042666c 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3583,12 +3583,18 @@ function getAgentsConfigFromUI() {
     pollIntervalMs: Number(el.cfgPoll?.value || 15000),
     historyWindowMinutes: Number(el.cfgHist?.value || 360),
     minHistoryPoints: Number(el.cfgPoints?.value || 30),
+    maxCandidatesPerSubsystem: 8,
     maxLlmTriagesPerCycle: Number(el.cfgMaxLlm?.value || 5),
+    maxLlmTriagesPerSubsystem: 2,
     thresholds: {
       z: Number(el.cfgZ?.value || 3),
       mad: Number(el.cfgMad?.value || 3.5),
       stalenessSec: Number(el.cfgStale?.value || 120),
     },
+    scope: {
+      subsystemMode: 'auto',
+      subsystemPriority: ['view', 'equipment', 'group', 'global'],
+    },
   };
 }
 
@@ -3634,6 +3640,8 @@ function getFilteredAgentEvents() {
         event.summary,
         event.source_tag,
         event.tag_name,
+        event.subsystem_name,
+        event.subsystem_type,
         ...(event.equipment || []),
         ...(event.tags || []),
       ]
@@ -3659,6 +3667,12 @@ function renderAgentEventList() {
       const active = event.event_id === agentsState.selectedEventId ? ' active' : '';
       const sev = String(event.severity || 'low').toLowerCase();
       const equipment = (event.equipment || []).slice(0, 2).join(', ');
+      const subsystemLabel = event.subsystem_name
+        ? `${event.subsystem_type || 'subsystem'}: ${event.subsystem_name}`
+        : '';
+      const baseMeta = [event.tag_name || event.source_tag || '', equipment, subsystemLabel]
+        .filter(Boolean)
+        .join(' • ');
       return `
         <div class="agents-event-card${active}" data-event-id="${escapeHtml(event.event_id || '')}">
           <div class="agents-event-line-top">
@@ -3666,7 +3680,7 @@ function renderAgentEventList() {
             <span class="agents-event-time">${escapeHtml(formatAgentTime(event.created_at))}</span>
           </div>
           <div class="agents-event-summary">${escapeHtml(event.summary || 'Untitled anomaly')}</div>
-          <div class="agents-event-meta">${escapeHtml(event.tag_name || event.source_tag || '')}${equipment ? ` • ${escapeHtml(equipment)}` : ''}</div>
+          <div class="agents-event-meta">${escapeHtml(baseMeta)}</div>
         </div>
       `;
     })
@@ -3682,8 +3696,14 @@ function renderAgentEventList() {
 }
 
 function resolveAgentGraphTarget(event) {
+  if (String(event.subsystem_type || '').toLowerCase() === 'view' && event.subsystem_name) {
+    return { name: event.subsystem_name, type: 'View' };
+  }
   const equipment = (event.equipment || []).find(Boolean);
   if (equipment) return { name: equipment, type: 'Equipment' };
+  if (String(event.subsystem_type || '').toLowerCase() === 'equipment' && event.subsystem_name) {
+    return { name: event.subsystem_name, type: 'Equipment' };
+  }
   const tagName = event.tag_name || (event.tags || []).find(Boolean) || event.source_tag;
   if (tagName) return { name: tagName, type: 'ScadaTag' };
   return null;
@@ -3714,6 +3734,8 @@ function renderAgentEventDetails(event) {
       <div class="agents-detail-item"><span class="agents-detail-label">Confidence</span><span class="agents-detail-value">${escapeHtml(String(event.confidence ?? ''))}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Category</span><span class="agents-detail-value">${escapeHtml(event.category || '')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Timestamp</span><span class="agents-detail-value">${escapeHtml(formatAgentTime(event.created_at))}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Subsystem Type</span><span class="agents-detail-value">${escapeHtml(event.subsystem_type || 'global')}</span></div>
+      <div class="agents-detail-item"><span class="agents-detail-label">Subsystem</span><span class="agents-detail-value">${escapeHtml(event.subsystem_name || 'all')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Source Tag</span><span class="agents-detail-value">${escapeHtml(event.source_tag || '')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">Tag Name</span><span class="agents-detail-value">${escapeHtml(event.tag_name || '')}</span></div>
       <div class="agents-detail-item"><span class="agents-detail-label">z-score</span><span class="agents-detail-value">${escapeHtml(String(event.z_score ?? '0'))}</span></div>
@@ -3860,6 +3882,8 @@ function upsertRealtimeAgentEvent(payload) {
     created_at: payload.createdAt || new Date().toISOString(),
     source_tag: payload.entityRefs?.sourceTag || payload.entityRefs?.tag || '',
     tag_name: payload.entityRefs?.tag || '',
+    subsystem_type: payload.entityRefs?.subsystemType || '',
+    subsystem_name: payload.entityRefs?.subsystemName || '',
     state: 'open',
   };
   if (idx >= 0) {
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 46b8b2c..beaaf86 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -23,7 +23,7 @@
 import uuid
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 try:
     from dotenv import load_dotenv
@@ -53,6 +53,101 @@ def emit(prefix: str, payload: Dict[str, Any]) -> None:
     print(f"[{prefix}] {json.dumps(payload, default=str)}", flush=True)
 
 
+DEFAULT_SUBSYSTEM_PRIORITY = ["view", "equipment", "group", "global"]
+
+
+def _canonical_subsystem_type(kind: Any) -> str:
+    value = str(kind or "").strip().lower()
+    if value in {"view", "views"}:
+        return "view"
+    if value in {"equipment", "equip", "asset"}:
+        return "equipment"
+    if value in {"group", "groups", "folder", "path", "prefix", "tag_group"}:
+        return "group"
+    if value in {"global", "all", "system"}:
+        return "global"
+    return "group"
+
+
+def _subsystem_ref(kind: Any, name: Any) -> Dict[str, str]:
+    subsystem_type = _canonical_subsystem_type(kind)
+    subsystem_name = str(name or "").strip()
+    if not subsystem_name:
+        subsystem_type = "global"
+        subsystem_name = "all"
+    return {
+        "type": subsystem_type,
+        "name": subsystem_name,
+        "id": f"{subsystem_type}:{subsystem_name.lower()}",
+    }
+
+
+def infer_tag_group(tag_path: Optional[str], folder_name: Optional[str] = None) -> Optional[str]:
+    folder = str(folder_name or "").strip().strip("/")
+    if folder:
+        head = folder.split("/", 1)[0].strip()
+        if head:
+            return head
+
+    raw = str(tag_path or "").strip()
+    if not raw:
+        return None
+    if raw.startswith("[") and "]" in raw:
+        raw = raw.split("]", 1)[1]
+    raw = raw.strip("/")
+    if not raw:
+        return None
+    parts = [p.strip() for p in raw.split("/") if p.strip()]
+    # Ignore flat tags and only infer a group when there is at least one folder segment.
+    if len(parts) < 2:
+        return None
+    return parts[0]
+
+
+def derive_subsystems_for_tag(
+    tag_meta: Dict[str, Any],
+    subsystem_mode: str = "auto",
+    priority: Optional[List[str]] = None,
+) -> Tuple[List[Dict[str, str]], Dict[str, str]]:
+    mode = str(subsystem_mode or "auto").strip().lower()
+    if mode in {"global", "off", "disabled"}:
+        global_ref = _subsystem_ref("global", "all")
+        return [global_ref], global_ref
+
+    refs: List[Dict[str, str]] = []
+    seen: Set[str] = set()
+
+    def add_ref(kind: str, name: Optional[str]) -> None:
+        if not name:
+            return
+        ref = _subsystem_ref(kind, name)
+        if ref["id"] in seen:
+            return
+        seen.add(ref["id"])
+        refs.append(ref)
+
+    for view_name in tag_meta.get("views") or []:
+        add_ref("view", str(view_name))
+    for equipment_name in tag_meta.get("equipment") or []:
+        add_ref("equipment", str(equipment_name))
+    add_ref("group", infer_tag_group(tag_meta.get("path"), tag_meta.get("folder_name")))
+
+    if not refs:
+        refs = [_subsystem_ref("global", "all")]
+
+    ordered_priority = [
+        _canonical_subsystem_type(x) for x in (priority or DEFAULT_SUBSYSTEM_PRIORITY)
+    ]
+    primary = refs[0]
+    for kind in ordered_priority:
+        found = next((s for s in refs if s.get("type") == kind), None)
+        if found:
+            primary = found
+            break
+
+    return refs, primary
+
+
 def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     raw = dict(config or {})
     thresholds = raw.get("thresholds", {}) if isinstance(raw.get("thresholds"), dict) else {}
@@ -62,7 +157,9 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         "minHistoryPoints": 30,
         "maxMonitoredTags": 200,
         "maxCandidatesPerCycle": 25,
+        "maxCandidatesPerSubsystem": 8,
         "maxLlmTriagesPerCycle": 5,
+        "maxLlmTriagesPerSubsystem": 2,
         "dedupCooldownMinutes": 10,
         "retentionDays": 14,
         "cleanupEveryCycles": 40,
@@ -71,6 +168,9 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
             "project": None,
             "equipmentTags": [],
             "tagRegex": None,
+            "subsystemMode": "auto",
+            "subsystemPriority": list(DEFAULT_SUBSYSTEM_PRIORITY),
+            "subsystemInclude": [],
         },
         "thresholds": {
             "z": 3.0,
@@ -81,11 +181,32 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
             "stuck_window_size": 20,
         },
     }
-    cfg = defaults
+    cfg = dict(defaults)
+    cfg["scope"] = dict(defaults["scope"])
+    cfg["thresholds"] = dict(defaults["thresholds"])
     cfg.update({k: v for k, v in raw.items() if k in defaults and k != "thresholds"})
     cfg["thresholds"].update({k: v for k, v in thresholds.items() if v is not None})
     if isinstance(raw.get("scope"), dict):
         cfg["scope"].update(raw["scope"])
+    scope_cfg = cfg["scope"]
+    mode = str(scope_cfg.get("subsystemMode") or "auto").strip().lower()
+    if mode not in {"auto", "global", "off", "disabled"}:
+        mode = "auto"
+    scope_cfg["subsystemMode"] = mode
+    if not isinstance(scope_cfg.get("subsystemPriority"), list) or not scope_cfg.get("subsystemPriority"):
+        scope_cfg["subsystemPriority"] = list(DEFAULT_SUBSYSTEM_PRIORITY)
+    scope_cfg["subsystemPriority"] = [
+        str(x).strip()
+        for x in scope_cfg.get("subsystemPriority", [])
+        if str(x).strip()
+    ] or list(DEFAULT_SUBSYSTEM_PRIORITY)
+    if not isinstance(scope_cfg.get("subsystemInclude"), list):
+        scope_cfg["subsystemInclude"] = []
+    scope_cfg["subsystemInclude"] = [
+        str(x).strip().lower()
+        for x in scope_cfg.get("subsystemInclude", [])
+        if str(x).strip()
+    ]
     return cfg
 
 
@@ -179,11 +300,22 @@ def heartbeat(self, metrics: Dict[str, Any]) -> None:
     # -----------------------------
     # Tag and context collection
     # -----------------------------
-    def get_monitored_tags(self) -> List[Dict[str, str]]:
+    def get_monitored_tags(self) -> List[Dict[str, Any]]:
         max_tags = int(self.config.get("maxMonitoredTags", 200))
         scope = self.config.get("scope", {})
         tag_regex = scope.get("tagRegex")
-        equipment_tags = set(scope.get("equipmentTags") or [])
+        equipment_tags = {
+            str(x).strip().lower()
+            for x in (scope.get("equipmentTags") or [])
+            if str(x).strip()
+        }
+        subsystem_mode = str(scope.get("subsystemMode") or "auto").strip().lower()
+        subsystem_priority = scope.get("subsystemPriority") or list(DEFAULT_SUBSYSTEM_PRIORITY)
+        subsystem_include = {
+            str(x).strip().lower()
+            for x in (scope.get("subsystemInclude") or [])
+            if str(x).strip()
+        }
 
         with self.graph.session() as session:
             result = session.run(
@@ -191,13 +323,29 @@ def get_monitored_tags(self) -> List[Dict[str, str]]:
                 MATCH (t:ScadaTag)
                 WHERE coalesce(t.opc_item_path, t.name) IS NOT NULL
                   AND coalesce(t.opc_item_path, t.name) <> ''
+                OPTIONAL MATCH (c:ViewComponent)-[:BINDS_TO]->(t)
+                OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(c)
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
                 RETURN DISTINCT coalesce(t.opc_item_path, t.name) AS tag_path,
-                                coalesce(t.name, t.opc_item_path) AS tag_name
+                                coalesce(t.name, t.opc_item_path) AS tag_name,
+                                coalesce(t.folder_name, '') AS folder_name,
+                                collect(DISTINCT v.name) AS views,
+                                collect(DISTINCT eq.name) AS equipment
                 LIMIT $limit
                 """,
                 limit=max_tags * 3,
             )
-            tags = [{"path": r["tag_path"], "name": r["tag_name"]} for r in result if r["tag_path"]]
+            tags = [
+                {
+                    "path": r["tag_path"],
+                    "name": r["tag_name"],
+                    "folder_name": r["folder_name"] or "",
+                    "views": [x for x in (r["views"] or []) if x],
+                    "equipment": [x for x in (r["equipment"] or []) if x],
+                }
+                for r in result
+                if r["tag_path"]
+            ]
 
         if tag_regex:
             import re
@@ -214,7 +362,32 @@ def get_monitored_tags(self) -> List[Dict[str, str]]:
                 })
 
         if equipment_tags:
-            tags = [t for t in tags if t["name"] in equipment_tags or t["path"] in equipment_tags]
+            tags = [
+                t for t in tags
+                if t["name"].lower() in equipment_tags
+                or t["path"].lower() in equipment_tags
+                or any(str(eq).strip().lower() in equipment_tags for eq in t.get("equipment", []))
+            ]
+
+        for tag in tags:
+            subsystems, primary = derive_subsystems_for_tag(
+                tag_meta=tag,
+                subsystem_mode=subsystem_mode,
+                priority=subsystem_priority,
+            )
+            tag["subsystems"] = subsystems
+            tag["primary_subsystem"] = primary
+
+        if subsystem_include:
+            tags = [
+                t
+                for t in tags
+                if any(
+                    s.get("id", "").lower() in subsystem_include
+                    or s.get("name", "").lower() in subsystem_include
+                    for s in (t.get("subsystems") or [])
+                )
+            ]
 
         return tags[:max_tags]
 
@@ -285,15 +458,18 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
                 """
                 MATCH (t:ScadaTag)
                 WHERE t.name = $tag OR t.opc_item_path = $tag
+                OPTIONAL MATCH (vc:ViewComponent)-[:BINDS_TO]->(t)
+                OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(vc)
                 OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
                 OPTIONAL MATCH (eq)-[:HAS_SYMPTOM]->(s:FaultSymptom)
-                OPTIONAL MATCH (s)-[:CAUSED_BY]->(c:FaultCause)
+                OPTIONAL MATCH (s)-[:CAUSED_BY]->(fc:FaultCause)
                 OPTIONAL MATCH (eq)-[:HAS_PATTERN]->(p:ControlPattern)
                 OPTIONAL MATCH (eq)-[:SAFETY_CRITICAL]->(se:SafetyElement)
                 RETURN t,
+                       collect(DISTINCT v.name) AS views,
                        collect(DISTINCT eq.name) AS equipment,
                        collect(DISTINCT s.symptom) AS symptoms,
-                       collect(DISTINCT c.cause) AS causes,
+                       collect(DISTINCT fc.cause) AS causes,
                        collect(DISTINCT p.pattern_name) AS patterns,
                        collect(DISTINCT se.name) AS safety
                 LIMIT 1
@@ -304,7 +480,9 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
             if not record:
                 return {
                     "tag_path": tag_path,
+                    "views": [],
                     "equipment": [],
+                    "group": infer_tag_group(tag_path),
                     "symptoms": [],
                     "causes": [],
                     "patterns": [],
@@ -314,7 +492,9 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
             return {
                 "tag_path": tag_path,
                 "tag_name": node.get("name") if node else tag_path,
+                "views": [x for x in record["views"] if x],
                 "equipment": [x for x in record["equipment"] if x],
+                "group": infer_tag_group(tag_path, node.get("folder_name") if node else None),
                 "symptoms": [x for x in record["symptoms"] if x],
                 "causes": [x for x in record["causes"] if x],
                 "patterns": [x for x in record["patterns"] if x],
@@ -344,7 +524,7 @@ def run_llm_triage(
             "rationale": "LLM triage unavailable; using deterministic fallback.",
             "related_entities": [
                 {"label": "Equipment", "name": e} for e in context.get("equipment", [])[:3]
-            ],
+            ] + [{"label": "View", "name": v} for v in context.get("views", [])[:2]],
         }
         if not self.llm:
             return fallback
@@ -423,9 +603,12 @@ def persist_event(
         deterministic: Dict[str, Any],
         live_sample: Dict[str, Any],
         triage: Dict[str, Any],
+        subsystem: Optional[Dict[str, str]] = None,
     ) -> Optional[Dict[str, Any]]:
         category = triage.get("category") or deterministic.get("category", "deviation")
-        dedup_sig = dedup_key(context["tag_path"], category, int(self.config.get("dedupCooldownMinutes", 10)))
+        subsystem_ref = subsystem or _subsystem_ref("global", "all")
+        dedup_source = f"{context['tag_path']}::{subsystem_ref.get('id', 'global:all')}"
+        dedup_sig = dedup_key(dedup_source, category, int(self.config.get("dedupCooldownMinutes", 10)))
         if self.is_duplicate_recent(dedup_sig):
             return None
 
@@ -452,6 +635,9 @@ def persist_event(
             "window_volatility": float(deterministic.get("window_volatility", 0.0)),
             "source_tag": context["tag_path"],
             "tag_name": context.get("tag_name") or context["tag_path"],
+            "subsystem_type": subsystem_ref.get("type"),
+            "subsystem_name": subsystem_ref.get("name"),
+            "subsystem_id": subsystem_ref.get("id"),
             "live_quality": live_sample.get("quality"),
             "live_timestamp": live_sample.get("timestamp"),
             "live_value": str(live_sample.get("value")),
@@ -493,6 +679,27 @@ def persist_event(
                     name=equipment_name,
                 )
 
+            if subsystem_ref.get("type") == "view":
+                session.run(
+                    """
+                    MATCH (e:AnomalyEvent {event_id: $event_id})
+                    MATCH (v:View {name: $name})
+                    MERGE (e)-[:SCOPED_TO]->(v)
+                    """,
+                    event_id=event_id,
+                    name=subsystem_ref.get("name"),
+                )
+            elif subsystem_ref.get("type") == "equipment":
+                session.run(
+                    """
+                    MATCH (e:AnomalyEvent {event_id: $event_id})
+                    MATCH (eq:Equipment {name: $name})
+                    MERGE (e)-[:SCOPED_TO]->(eq)
+                    """,
+                    event_id=event_id,
+                    name=subsystem_ref.get("name"),
+                )
+
             related_inputs: List[Dict[str, str]] = []
             for item in triage.get("related_entities", []) or []:
                 if isinstance(item, dict) and item.get("label") and item.get("name"):
@@ -504,7 +711,7 @@ def persist_event(
 
             for rel in related_inputs[:8]:
                 label = rel["label"]
-                if label not in {"FaultSymptom", "FaultCause", "ControlPattern", "SafetyElement", "Equipment", "ScadaTag"}:
+                if label not in {"FaultSymptom", "FaultCause", "ControlPattern", "SafetyElement", "Equipment", "ScadaTag", "View"}:
                     continue
                 session.run(
                     f"""
@@ -530,6 +737,8 @@ def _emit_persisted_event(self, persisted: Dict[str, Any]) -> None:
             "entityRefs": {
                 "tag": persisted.get("tag_name") or persisted.get("source_tag"),
                 "sourceTag": persisted.get("source_tag"),
+                "subsystemType": persisted.get("subsystem_type"),
+                "subsystemName": persisted.get("subsystem_name"),
             },
             "createdAt": persisted.get("created_at"),
         })
@@ -543,6 +752,7 @@ def emit_provider_failure_event(
         category: str = "quality-issue",
         source_tag: Optional[str] = None,
         details: Optional[Dict[str, Any]] = None,
+        subsystem: Optional[Dict[str, str]] = None,
     ) -> bool:
         """
         Persist and stream provider-health anomalies so failures appear in feed.
@@ -604,6 +814,7 @@ def emit_provider_failure_event(
                 "data_type": "provider_health",
             },
             triage=triage,
+            subsystem=subsystem,
         )
         if persisted:
             self._emit_persisted_event(persisted)
@@ -618,6 +829,10 @@ def run_cycle(self) -> Dict[str, Any]:
         metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0, "diagnostics": {}}
         thresholds = self.config.get("thresholds", {})
         min_history = int(self.config.get("minHistoryPoints", 30))
+        max_candidates_total = max(1, int(self.config.get("maxCandidatesPerCycle", 25)))
+        max_candidates_per_subsystem = max(1, int(self.config.get("maxCandidatesPerSubsystem", 8)))
+        max_triage_total = max(0, int(self.config.get("maxLlmTriagesPerCycle", 5)))
+        max_triage_per_subsystem = max(0, int(self.config.get("maxLlmTriagesPerSubsystem", 2)))
 
         if not self.api.is_configured:
             emitted = self.emit_provider_failure_event(
@@ -644,6 +859,13 @@ def run_cycle(self) -> Dict[str, Any]:
             return metrics
 
         tag_paths = [t["path"] for t in tags]
+        tag_lookup = {t["path"]: t for t in tags}
+        detected_subsystems = sorted(
+            {
+                (t.get("primary_subsystem") or _subsystem_ref("global", "all")).get("id", "global:all")
+                for t in tags
+            }
+        )
         live_values = self.api.read_tags(tag_paths)
         candidates: List[Dict[str, Any]] = []
         now = datetime.now(timezone.utc)
@@ -656,8 +878,12 @@ def run_cycle(self) -> Dict[str, Any]:
         stale_filtered_count = 0
         insufficient_history_count = 0
         low_history_candidate_count = 0
+        candidate_subsystem_counts: Dict[str, int] = {}
 
         for tv in live_values:
+            tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
+            subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
+
             if tv.error:
                 live_error_count += 1
                 if len(live_error_samples) < 5:
@@ -698,6 +924,8 @@ def run_cycle(self) -> Dict[str, Any]:
                         deterministic["reasons"] = list(deterministic.get("reasons", [])) + ["low_history_override"]
                         deterministic["history_quality"] = "low"
                         context = self.get_context(tv.path)
+                        context["subsystem"] = subsystem
+                        context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
                         candidates.append(
                             {
                                 "context": context,
@@ -709,8 +937,11 @@ def run_cycle(self) -> Dict[str, Any]:
                                     "timestamp": tv.timestamp,
                                     "data_type": tv.data_type,
                                 },
+                                "subsystem": subsystem,
                             }
                         )
+                        sub_id = subsystem.get("id", "global:all")
+                        candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
                         low_history_candidate_count += 1
                 continue
 
@@ -727,6 +958,8 @@ def run_cycle(self) -> Dict[str, Any]:
 
             if deterministic.get("candidate"):
                 context = self.get_context(tv.path)
+                context["subsystem"] = subsystem
+                context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
                 candidates.append(
                     {
                         "context": context,
@@ -738,8 +971,11 @@ def run_cycle(self) -> Dict[str, Any]:
                             "timestamp": tv.timestamp,
                             "data_type": tv.data_type,
                         },
+                        "subsystem": subsystem,
                     }
                 )
+                sub_id = subsystem.get("id", "global:all")
+                candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
 
         if live_values and live_error_count == len(live_values):
             emitted = self.emit_provider_failure_event(
@@ -806,12 +1042,28 @@ def run_cycle(self) -> Dict[str, Any]:
                 metrics["emitted"] += 1
 
         metrics["candidates"] = len(candidates)
-        max_candidates = int(self.config.get("maxCandidatesPerCycle", 25))
-        max_triage = int(self.config.get("maxLlmTriagesPerCycle", 5))
-        shortlisted = candidates[:max_candidates]
-
-        for idx, candidate in enumerate(shortlisted):
-            use_llm = idx < max_triage
+        shortlisted: List[Dict[str, Any]] = []
+        selected_per_subsystem: Dict[str, int] = {}
+        for candidate in candidates:
+            subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
+            sub_id = subsystem.get("id", "global:all")
+            if selected_per_subsystem.get(sub_id, 0) >= max_candidates_per_subsystem:
+                continue
+            shortlisted.append(candidate)
+            selected_per_subsystem[sub_id] = selected_per_subsystem.get(sub_id, 0) + 1
+            if len(shortlisted) >= max_candidates_total:
+                break
+
+        llm_total = 0
+        llm_per_subsystem: Dict[str, int] = {}
+
+        for candidate in shortlisted:
+            subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
+            sub_id = subsystem.get("id", "global:all")
+            use_llm = (
+                llm_total < max_triage_total
+                and llm_per_subsystem.get(sub_id, 0) < max_triage_per_subsystem
+            )
             triage = (
                 self.run_llm_triage(
                     candidate["context"],
@@ -820,28 +1072,38 @@ def run_cycle(self) -> Dict[str, Any]:
                 )
                 if use_llm
                 else {
-                    "summary": f"Deviation on {candidate['context'].get('tag_name', candidate['context']['tag_path'])}",
+                    "summary": (
+                        f"Deviation on {candidate['context'].get('tag_name', candidate['context']['tag_path'])} "
+                        f"in subsystem {subsystem.get('name', 'all')}"
+                    ),
                     "category": candidate["deterministic"].get("category", "deviation"),
                     "severity": "medium",
                     "confidence": 0.5,
                     "verification_checks": [],
                     "probable_causes": [],
                     "safety_notes": [],
-                    "rationale": "Triaged in deterministic-only mode due per-cycle LLM cap.",
+                    "rationale": "Triaged in deterministic-only mode due per-cycle/per-subsystem LLM caps.",
                     "related_entities": [],
                 }
             )
+            if use_llm:
+                llm_total += 1
+                llm_per_subsystem[sub_id] = llm_per_subsystem.get(sub_id, 0) + 1
             metrics["triaged"] += 1
             persisted = self.persist_event(
                 candidate["context"],
                 candidate["deterministic"],
                 candidate["live_sample"],
                 triage,
+                subsystem=subsystem,
             )
             if persisted:
                 metrics["emitted"] += 1
                 self._emit_persisted_event(persisted)
 
+        top_candidates_by_subsystem = dict(
+            sorted(candidate_subsystem_counts.items(), key=lambda item: item[1], reverse=True)[:10]
+        )
         metrics["diagnostics"] = {
             "monitoredTags": len(tag_paths),
             "validLiveCount": valid_live_count,
@@ -851,6 +1113,13 @@ def run_cycle(self) -> Dict[str, Any]:
             "historyErrorCount": history_error_count,
             "insufficientHistoryCount": insufficient_history_count,
             "lowHistoryCandidateCount": low_history_candidate_count,
+            "detectedSubsystemCount": len(detected_subsystems),
+            "detectedSubsystems": detected_subsystems[:10],
+            "candidateSubsystemCount": len(candidate_subsystem_counts),
+            "candidateBySubsystem": top_candidates_by_subsystem,
+            "maxCandidatesPerSubsystem": max_candidates_per_subsystem,
+            "maxLlmTriagesPerSubsystem": max_triage_per_subsystem,
+            "llmTriagedCount": llm_total,
         }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics
diff --git a/tests/unit/test_anomaly_monitor_subsystems.py b/tests/unit/test_anomaly_monitor_subsystems.py
new file mode 100644
index 0000000..4fb807c
--- /dev/null
+++ b/tests/unit/test_anomaly_monitor_subsystems.py
@@ -0,0 +1,56 @@
+from anomaly_monitor import derive_subsystems_for_tag, infer_tag_group
+
+
+def test_infer_tag_group_prefers_folder_name():
+    group = infer_tag_group("[default]Area1/Pump101/Speed", folder_name="LineA/Area1")
+    assert group == "LineA"
+
+
+def test_infer_tag_group_from_tag_path():
+    group = infer_tag_group("[default]Boiler/Feedwater/Flow")
+    assert group == "Boiler"
+
+
+def test_infer_tag_group_none_for_flat_tag():
+    assert infer_tag_group("[default]SingleTag") is None
+
+
+def test_derive_subsystems_auto_with_priority():
+    subsystems, primary = derive_subsystems_for_tag(
+        tag_meta={
+            "path": "[default]Line1/PumpA/Pressure",
+            "folder_name": "Line1/PumpA",
+            "views": ["Overview/Main"],
+            "equipment": ["PumpA"],
+        },
+        subsystem_mode="auto",
+        priority=["equipment", "view", "group"],
+    )
+    subsystem_ids = {item["id"] for item in subsystems}
+    assert "equipment:pumpa" in subsystem_ids
+    assert "view:overview/main" in subsystem_ids
+    assert "group:line1" in subsystem_ids
+    assert primary["type"] == "equipment"
+    assert primary["name"] == "PumpA"
+
+
+def test_derive_subsystems_global_mode():
+    subsystems, primary = derive_subsystems_for_tag(
+        tag_meta={
+            "path": "[default]Line1/PumpA/Pressure",
+            "views": ["Overview/Main"],
+            "equipment": ["PumpA"],
+        },
+        subsystem_mode="global",
+    )
+    assert subsystems == [{"type": "global", "name": "all", "id": "global:all"}]
+    assert primary == {"type": "global", "name": "all", "id": "global:all"}
+
+
+def test_derive_subsystems_falls_back_to_global_when_no_ontology_links():
+    subsystems, primary = derive_subsystems_for_tag(
+        tag_meta={"path": "[default]TagOnly"},
+        subsystem_mode="auto",
+    )
+    assert len(subsystems) == 1
+    assert primary["id"] == "global:all"

From 53d5a4787bb3760658a1e0317db90a12f05e3ba1 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:43:00 +0000
Subject: [PATCH 06/16] Add subsystem anomaly diagnostics for tag gating

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py | 172 +++++++++++++++++++++++++++++++++++++
 1 file changed, 172 insertions(+)

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index beaaf86..32cb492 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -860,6 +860,10 @@ def run_cycle(self) -> Dict[str, Any]:
 
         tag_paths = [t["path"] for t in tags]
         tag_lookup = {t["path"]: t for t in tags}
+        linked_tag_count = sum(
+            1 for t in tags if (t.get("views") or t.get("equipment"))
+        )
+        unlinked_tag_count = max(0, len(tags) - linked_tag_count)
         detected_subsystems = sorted(
             {
                 (t.get("primary_subsystem") or _subsystem_ref("global", "all")).get("id", "global:all")
@@ -879,13 +883,64 @@ def run_cycle(self) -> Dict[str, Any]:
         insufficient_history_count = 0
         low_history_candidate_count = 0
         candidate_subsystem_counts: Dict[str, int] = {}
+        live_error_linked = 0
+        live_error_unlinked = 0
+        history_error_linked = 0
+        history_error_unlinked = 0
+        quality_filtered_linked = 0
+        quality_filtered_unlinked = 0
+        stale_filtered_linked = 0
+        stale_filtered_unlinked = 0
+        evaluated_linked = 0
+        evaluated_unlinked = 0
+        candidate_linked = 0
+        candidate_unlinked = 0
+        near_shift_count = 0
+        near_shift_linked = 0
+        near_shift_unlinked = 0
+        subsystem_shift_signals: Dict[str, Dict[str, Any]] = {}
+
+        def _update_subsystem_signal(
+            subsystem_ref: Dict[str, str], deterministic: Dict[str, Any], tag_path: str
+        ) -> None:
+            sub_id = subsystem_ref.get("id", "global:all")
+            abs_z = abs(float(deterministic.get("z_score", 0.0)))
+            z = float(deterministic.get("z_score", 0.0))
+            bucket = subsystem_shift_signals.setdefault(
+                sub_id,
+                {
+                    "subsystemId": sub_id,
+                    "subsystemType": subsystem_ref.get("type", "global"),
+                    "subsystemName": subsystem_ref.get("name", "all"),
+                    "evaluated": 0,
+                    "candidate": 0,
+                    "nearShift": 0,
+                    "sumAbsZ": 0.0,
+                    "sumZ": 0.0,
+                    "maxAbsZ": 0.0,
+                    "sampleTag": tag_path,
+                },
+            )
+            bucket["evaluated"] += 1
+            bucket["sumAbsZ"] += abs_z
+            bucket["sumZ"] += z
+            if abs_z >= 1.5:
+                bucket["nearShift"] += 1
+            if abs_z > bucket["maxAbsZ"]:
+                bucket["maxAbsZ"] = abs_z
+                bucket["sampleTag"] = tag_path
 
         for tv in live_values:
             tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
             subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
+            is_linked = bool(tag_meta.get("views") or tag_meta.get("equipment"))
 
             if tv.error:
                 live_error_count += 1
+                if is_linked:
+                    live_error_linked += 1
+                else:
+                    live_error_unlinked += 1
                 if len(live_error_samples) < 5:
                     live_error_samples.append(f"{tv.path}: {tv.error}")
                 continue
@@ -893,14 +948,26 @@ def run_cycle(self) -> Dict[str, Any]:
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
                 quality_filtered_count += 1
+                if is_linked:
+                    quality_filtered_linked += 1
+                else:
+                    quality_filtered_unlinked += 1
                 continue
             if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
                 stale_filtered_count += 1
+                if is_linked:
+                    stale_filtered_linked += 1
+                else:
+                    stale_filtered_unlinked += 1
                 continue
 
             history, history_error = self.fetch_history_values(tv.path)
             if history_error:
                 history_error_count += 1
+                if is_linked:
+                    history_error_linked += 1
+                else:
+                    history_error_unlinked += 1
                 if len(history_error_samples) < 5:
                     history_error_samples.append(f"{tv.path}: {history_error}")
                 continue
@@ -920,7 +987,39 @@ def run_cycle(self) -> Dict[str, Any]:
                     if curr_num is not None:
                         self._prev_values[tv.path] = curr_num
 
+                    _update_subsystem_signal(subsystem, deterministic, tv.path)
+                    if is_linked:
+                        evaluated_linked += 1
+                    else:
+                        evaluated_unlinked += 1
+                    if abs(float(deterministic.get("z_score", 0.0))) >= 1.5:
+                        near_shift_count += 1
+                        if is_linked:
+                            near_shift_linked += 1
+                        else:
+                            near_shift_unlinked += 1
+
                     if deterministic.get("candidate"):
+                        sub_bucket = subsystem_shift_signals.setdefault(
+                            subsystem.get("id", "global:all"),
+                            {
+                                "subsystemId": subsystem.get("id", "global:all"),
+                                "subsystemType": subsystem.get("type", "global"),
+                                "subsystemName": subsystem.get("name", "all"),
+                                "evaluated": 0,
+                                "candidate": 0,
+                                "nearShift": 0,
+                                "sumAbsZ": 0.0,
+                                "sumZ": 0.0,
+                                "maxAbsZ": 0.0,
+                                "sampleTag": tv.path,
+                            },
+                        )
+                        sub_bucket["candidate"] += 1
+                        if is_linked:
+                            candidate_linked += 1
+                        else:
+                            candidate_unlinked += 1
                         deterministic["reasons"] = list(deterministic.get("reasons", [])) + ["low_history_override"]
                         deterministic["history_quality"] = "low"
                         context = self.get_context(tv.path)
@@ -956,7 +1055,39 @@ def run_cycle(self) -> Dict[str, Any]:
             if curr_num is not None:
                 self._prev_values[tv.path] = curr_num
 
+            _update_subsystem_signal(subsystem, deterministic, tv.path)
+            if is_linked:
+                evaluated_linked += 1
+            else:
+                evaluated_unlinked += 1
+            if abs(float(deterministic.get("z_score", 0.0))) >= 1.5:
+                near_shift_count += 1
+                if is_linked:
+                    near_shift_linked += 1
+                else:
+                    near_shift_unlinked += 1
+
             if deterministic.get("candidate"):
+                sub_bucket = subsystem_shift_signals.setdefault(
+                    subsystem.get("id", "global:all"),
+                    {
+                        "subsystemId": subsystem.get("id", "global:all"),
+                        "subsystemType": subsystem.get("type", "global"),
+                        "subsystemName": subsystem.get("name", "all"),
+                        "evaluated": 0,
+                        "candidate": 0,
+                        "nearShift": 0,
+                        "sumAbsZ": 0.0,
+                        "sumZ": 0.0,
+                        "maxAbsZ": 0.0,
+                        "sampleTag": tv.path,
+                    },
+                )
+                sub_bucket["candidate"] += 1
+                if is_linked:
+                    candidate_linked += 1
+                else:
+                    candidate_unlinked += 1
                 context = self.get_context(tv.path)
                 context["subsystem"] = subsystem
                 context["subsystems"] = tag_meta.get("subsystems") or [subsystem]
@@ -1056,6 +1187,7 @@ def run_cycle(self) -> Dict[str, Any]:
 
         llm_total = 0
         llm_per_subsystem: Dict[str, int] = {}
+        dedup_suppressed_count = 0
 
         for candidate in shortlisted:
             subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
@@ -1100,26 +1232,66 @@ def run_cycle(self) -> Dict[str, Any]:
             if persisted:
                 metrics["emitted"] += 1
                 self._emit_persisted_event(persisted)
+            else:
+                dedup_suppressed_count += 1
 
         top_candidates_by_subsystem = dict(
             sorted(candidate_subsystem_counts.items(), key=lambda item: item[1], reverse=True)[:10]
         )
+        top_shift_signals = sorted(
+            subsystem_shift_signals.values(),
+            key=lambda item: (
+                int(item.get("candidate", 0)),
+                float(item.get("maxAbsZ", 0.0)),
+                int(item.get("nearShift", 0)),
+                int(item.get("evaluated", 0)),
+            ),
+            reverse=True,
+        )[:8]
+        for item in top_shift_signals:
+            evaluated = max(1, int(item.get("evaluated", 0)))
+            item["avgAbsZ"] = round(float(item.get("sumAbsZ", 0.0)) / evaluated, 3)
+            item["avgZ"] = round(float(item.get("sumZ", 0.0)) / evaluated, 3)
+            item["shiftRatio"] = round(float(item.get("nearShift", 0)) / evaluated, 3)
+            item["candidateRatio"] = round(float(item.get("candidate", 0)) / evaluated, 3)
+            item.pop("sumAbsZ", None)
+            item.pop("sumZ", None)
+
         metrics["diagnostics"] = {
             "monitoredTags": len(tag_paths),
+            "linkedTags": linked_tag_count,
+            "unlinkedTags": unlinked_tag_count,
             "validLiveCount": valid_live_count,
             "liveErrorCount": live_error_count,
+            "liveErrorLinked": live_error_linked,
+            "liveErrorUnlinked": live_error_unlinked,
             "qualityFilteredCount": quality_filtered_count,
+            "qualityFilteredLinked": quality_filtered_linked,
+            "qualityFilteredUnlinked": quality_filtered_unlinked,
             "staleFilteredCount": stale_filtered_count,
+            "staleFilteredLinked": stale_filtered_linked,
+            "staleFilteredUnlinked": stale_filtered_unlinked,
             "historyErrorCount": history_error_count,
+            "historyErrorLinked": history_error_linked,
+            "historyErrorUnlinked": history_error_unlinked,
             "insufficientHistoryCount": insufficient_history_count,
             "lowHistoryCandidateCount": low_history_candidate_count,
+            "evaluatedLinked": evaluated_linked,
+            "evaluatedUnlinked": evaluated_unlinked,
+            "candidateLinked": candidate_linked,
+            "candidateUnlinked": candidate_unlinked,
+            "nearShiftCount": near_shift_count,
+            "nearShiftLinked": near_shift_linked,
+            "nearShiftUnlinked": near_shift_unlinked,
             "detectedSubsystemCount": len(detected_subsystems),
             "detectedSubsystems": detected_subsystems[:10],
             "candidateSubsystemCount": len(candidate_subsystem_counts),
             "candidateBySubsystem": top_candidates_by_subsystem,
+            "subsystemShiftSignals": top_shift_signals,
             "maxCandidatesPerSubsystem": max_candidates_per_subsystem,
             "maxLlmTriagesPerSubsystem": max_triage_per_subsystem,
             "llmTriagedCount": llm_total,
+            "dedupSuppressedCount": dedup_suppressed_count,
         }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics

From 5db2eb8080f6699f359d76af6c723a7dfc38b2a8 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:47:00 +0000
Subject: [PATCH 07/16] Fix fcose startup deps and surface agent diagnostics

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/index.html  | 4 +++-
 electron-ui/renderer.js | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/electron-ui/index.html b/electron-ui/index.html
index 99ba9a1..2c43657 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -3,7 +3,7 @@
 <head>
   <meta charset="UTF-8">
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <meta http-equiv="Content-Security-Policy" content="default-src 'self'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; script-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://unpkg.com;">
+  <meta http-equiv="Content-Security-Policy" content="default-src 'self'; img-src 'self' data: https:; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; script-src 'self' 'unsafe-inline' https://cdnjs.cloudflare.com https://unpkg.com;">
   <title>Axilon</title>
   <link rel="stylesheet" href="styles.css">
 </head>
@@ -1521,6 +1521,8 @@ <h3 id="graph-modal-title">Graph: Node</h3>
   <script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.28.1/cytoscape.min.js"></script>
   <script src="https://unpkg.com/dagre@0.8.5/dist/dagre.min.js"></script>
   <script src="https://unpkg.com/cytoscape-dagre@2.5.0/cytoscape-dagre.js"></script>
+  <script src="https://unpkg.com/layout-base@2.0.1/layout-base.js"></script>
+  <script src="https://unpkg.com/cose-base@2.2.0/cose-base.js"></script>
   <script src="https://unpkg.com/cytoscape-fcose@2.2.0/cytoscape-fcose.js"></script>
   
   <script src="graph-renderer.js"></script>
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 042666c..fa0009f 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3905,7 +3905,7 @@ function ensureAgentListeners() {
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
     if (payload.diagnostics) {
-      console.debug('[Agents diagnostics]', payload.diagnostics);
+      console.info('[Agents diagnostics]', payload.diagnostics);
     }
   });
 

From 730676d99c545cdbb6e963067b9180d9f7dd8b49 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 22:53:30 +0000
Subject: [PATCH 08/16] Prioritize view-bound live tag paths for monitoring

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js                           |   1 +
 electron-ui/renderer.js                       |   1 +
 scripts/anomaly_monitor.py                    | 177 +++++++++++++++---
 tests/unit/test_anomaly_monitor_subsystems.py |  20 +-
 4 files changed, 177 insertions(+), 22 deletions(-)

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 5ebe28b..5f5babb 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -228,6 +228,7 @@ function normalizeAgentConfig(config = {}) {
         ? scope.subsystemPriority.map(String)
         : ['view', 'equipment', 'group', 'global'],
       subsystemInclude: Array.isArray(scope.subsystemInclude) ? scope.subsystemInclude.map(String) : [],
+      includeUnlinkedTags: Boolean(scope.includeUnlinkedTags),
     },
   };
 }
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index fa0009f..b597326 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3594,6 +3594,7 @@ function getAgentsConfigFromUI() {
     scope: {
       subsystemMode: 'auto',
       subsystemPriority: ['view', 'equipment', 'group', 'global'],
+      includeUnlinkedTags: false,
     },
   };
 }
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 32cb492..58b0720 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -104,6 +104,31 @@ def infer_tag_group(tag_path: Optional[str], folder_name: Optional[str] = None)
     return parts[0]
 
 
+def _last_segment_from_tag_path(tag_path: Optional[str]) -> str:
+    raw = str(tag_path or "").strip()
+    if not raw:
+        return ""
+    if raw.startswith("[") and "]" in raw:
+        raw = raw.split("]", 1)[1]
+    raw = raw.strip("/")
+    if not raw:
+        return ""
+    parts = [p.strip() for p in raw.split("/") if p.strip()]
+    return parts[-1] if parts else raw
+
+
+def _looks_like_live_tag_path(value: Optional[str]) -> bool:
+    path = str(value or "").strip()
+    if not path:
+        return False
+    # Typical Ignition path shape: [provider]Folder/Tag or Folder/Tag
+    if path.startswith("[") and "]" in path:
+        return True
+    if "/" in path and not any(ch in path for ch in "{}()"):
+        return True
+    return False
+
+
 def derive_subsystems_for_tag(
     tag_meta: Dict[str, Any],
     subsystem_mode: str = "auto",
@@ -171,6 +196,7 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
             "subsystemMode": "auto",
             "subsystemPriority": list(DEFAULT_SUBSYSTEM_PRIORITY),
             "subsystemInclude": [],
+            "includeUnlinkedTags": False,
         },
         "thresholds": {
             "z": 3.0,
@@ -207,6 +233,7 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         for x in scope_cfg.get("subsystemInclude", [])
         if str(x).strip()
     ]
+    scope_cfg["includeUnlinkedTags"] = bool(scope_cfg.get("includeUnlinkedTags", False))
     return cfg
 
 
@@ -316,36 +343,118 @@ def get_monitored_tags(self) -> List[Dict[str, Any]]:
             for x in (scope.get("subsystemInclude") or [])
             if str(x).strip()
         }
+        include_unlinked = bool(scope.get("includeUnlinkedTags", False))
+        tag_map: Dict[str, Dict[str, Any]] = {}
+
+        def upsert_tag(
+            *,
+            tag_path: str,
+            tag_name: str,
+            folder_name: str = "",
+            views: Optional[List[str]] = None,
+            equipment: Optional[List[str]] = None,
+            source: str = "unknown",
+        ) -> None:
+            path = str(tag_path or "").strip()
+            if not path:
+                return
+            entry = tag_map.setdefault(
+                path,
+                {
+                    "path": path,
+                    "name": str(tag_name or _last_segment_from_tag_path(path) or path),
+                    "folder_name": str(folder_name or ""),
+                    "views": [],
+                    "equipment": [],
+                    "source": source,
+                    "bound_to_view": False,
+                },
+            )
+            if source == "view_binding":
+                entry["bound_to_view"] = True
+                entry["source"] = source
+            if folder_name and not entry.get("folder_name"):
+                entry["folder_name"] = str(folder_name)
+            if tag_name and (
+                not entry.get("name")
+                or entry.get("name") == entry.get("path")
+                or entry.get("name") == _last_segment_from_tag_path(entry.get("path"))
+            ):
+                entry["name"] = str(tag_name)
+            for view_name in views or []:
+                v = str(view_name or "").strip()
+                if v and v not in entry["views"]:
+                    entry["views"].append(v)
+            for eq_name in equipment or []:
+                eq = str(eq_name or "").strip()
+                if eq and eq not in entry["equipment"]:
+                    entry["equipment"].append(eq)
 
         with self.graph.session() as session:
-            result = session.run(
+            bound_result = session.run(
+                """
+                MATCH (v:View)-[:HAS_COMPONENT]->(c:ViewComponent)-[r:BINDS_TO]->(n)
+                WHERE r.tag_path IS NOT NULL
+                  AND trim(r.tag_path) <> ''
+                  AND toLower(coalesce(r.binding_type, 'tag')) = 'tag'
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(n)
+                RETURN DISTINCT trim(r.tag_path) AS tag_path,
+                                coalesce(n.name, '') AS tag_name,
+                                collect(DISTINCT v.name) AS views,
+                                collect(DISTINCT eq.name) AS equipment
+                LIMIT $limit
+                """,
+                limit=max_tags * 4,
+            )
+            for r in bound_result:
+                path = str(r["tag_path"] or "").strip()
+                if not _looks_like_live_tag_path(path):
+                    continue
+                upsert_tag(
+                    tag_path=path,
+                    tag_name=str(r["tag_name"] or _last_segment_from_tag_path(path)),
+                    folder_name=infer_tag_group(path) or "",
+                    views=[x for x in (r["views"] or []) if x],
+                    equipment=[x for x in (r["equipment"] or []) if x],
+                    source="view_binding",
+                )
+
+            scada_result = session.run(
                 """
                 MATCH (t:ScadaTag)
-                WHERE coalesce(t.opc_item_path, t.name) IS NOT NULL
-                  AND coalesce(t.opc_item_path, t.name) <> ''
+                WHERE t.opc_item_path IS NOT NULL
+                  AND trim(t.opc_item_path) <> ''
                 OPTIONAL MATCH (c:ViewComponent)-[:BINDS_TO]->(t)
                 OPTIONAL MATCH (v:View)-[:HAS_COMPONENT]->(c)
                 OPTIONAL MATCH (eq:Equipment)-[*1..2]-(t)
-                RETURN DISTINCT coalesce(t.opc_item_path, t.name) AS tag_path,
+                RETURN DISTINCT trim(t.opc_item_path) AS tag_path,
                                 coalesce(t.name, t.opc_item_path) AS tag_name,
                                 coalesce(t.folder_name, '') AS folder_name,
                                 collect(DISTINCT v.name) AS views,
                                 collect(DISTINCT eq.name) AS equipment
                 LIMIT $limit
                 """,
-                limit=max_tags * 3,
+                limit=max_tags * 6,
             )
-            tags = [
-                {
-                    "path": r["tag_path"],
-                    "name": r["tag_name"],
-                    "folder_name": r["folder_name"] or "",
-                    "views": [x for x in (r["views"] or []) if x],
-                    "equipment": [x for x in (r["equipment"] or []) if x],
-                }
-                for r in result
-                if r["tag_path"]
-            ]
+            for r in scada_result:
+                path = str(r["tag_path"] or "").strip()
+                if not _looks_like_live_tag_path(path):
+                    continue
+                upsert_tag(
+                    tag_path=path,
+                    tag_name=str(r["tag_name"] or _last_segment_from_tag_path(path)),
+                    folder_name=str(r["folder_name"] or ""),
+                    views=[x for x in (r["views"] or []) if x],
+                    equipment=[x for x in (r["equipment"] or []) if x],
+                    source="scada_tag",
+                )
+
+        tags = list(tag_map.values())
+
+        if not include_unlinked:
+            linked = [t for t in tags if (t.get("views") or t.get("equipment") or t.get("bound_to_view"))]
+            if linked:
+                tags = linked
 
         if tag_regex:
             import re
@@ -369,6 +478,14 @@ def get_monitored_tags(self) -> List[Dict[str, Any]]:
                 or any(str(eq).strip().lower() in equipment_tags for eq in t.get("equipment", []))
             ]
 
+        tags.sort(
+            key=lambda t: (
+                0 if t.get("bound_to_view") else 1,
+                0 if (t.get("views") or t.get("equipment")) else 1,
+                str(t.get("path", "")),
+            )
+        )
+
         for tag in tags:
             subsystems, primary = derive_subsystems_for_tag(
                 tag_meta=tag,
@@ -477,11 +594,29 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
                 tag=tag_path,
             )
             record = result.single()
+            fallback_views: List[str] = []
+            fallback_equipment: List[str] = []
+            fallback_result = session.run(
+                """
+                MATCH (v:View)-[:HAS_COMPONENT]->(vc:ViewComponent)-[r:BINDS_TO]->(n)
+                WHERE r.tag_path = $tag
+                OPTIONAL MATCH (eq:Equipment)-[*1..2]-(n)
+                RETURN collect(DISTINCT v.name) AS views,
+                       collect(DISTINCT eq.name) AS equipment
+                LIMIT 1
+                """,
+                tag=tag_path,
+            ).single()
+            if fallback_result:
+                fallback_views = [x for x in (fallback_result["views"] or []) if x]
+                fallback_equipment = [x for x in (fallback_result["equipment"] or []) if x]
+
             if not record:
                 return {
                     "tag_path": tag_path,
-                    "views": [],
-                    "equipment": [],
+                    "tag_name": _last_segment_from_tag_path(tag_path) or tag_path,
+                    "views": fallback_views,
+                    "equipment": fallback_equipment,
                     "group": infer_tag_group(tag_path),
                     "symptoms": [],
                     "causes": [],
@@ -491,9 +626,9 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
             node = record["t"]
             return {
                 "tag_path": tag_path,
-                "tag_name": node.get("name") if node else tag_path,
-                "views": [x for x in record["views"] if x],
-                "equipment": [x for x in record["equipment"] if x],
+                "tag_name": node.get("name") if node else (_last_segment_from_tag_path(tag_path) or tag_path),
+                "views": sorted(set([x for x in record["views"] if x] + fallback_views)),
+                "equipment": sorted(set([x for x in record["equipment"] if x] + fallback_equipment)),
                 "group": infer_tag_group(tag_path, node.get("folder_name") if node else None),
                 "symptoms": [x for x in record["symptoms"] if x],
                 "causes": [x for x in record["causes"] if x],
diff --git a/tests/unit/test_anomaly_monitor_subsystems.py b/tests/unit/test_anomaly_monitor_subsystems.py
index 4fb807c..83795ce 100644
--- a/tests/unit/test_anomaly_monitor_subsystems.py
+++ b/tests/unit/test_anomaly_monitor_subsystems.py
@@ -1,4 +1,9 @@
-from anomaly_monitor import derive_subsystems_for_tag, infer_tag_group
+from anomaly_monitor import (
+    _last_segment_from_tag_path,
+    _looks_like_live_tag_path,
+    derive_subsystems_for_tag,
+    infer_tag_group,
+)
 
 
 def test_infer_tag_group_prefers_folder_name():
@@ -54,3 +59,16 @@ def test_derive_subsystems_falls_back_to_global_when_no_ontology_links():
     )
     assert len(subsystems) == 1
     assert primary["id"] == "global:all"
+
+
+def test_tag_path_helpers_identify_live_paths():
+    assert _looks_like_live_tag_path("[default]Line/Pump/Speed")
+    assert _looks_like_live_tag_path("Line/Pump/Speed")
+    assert not _looks_like_live_tag_path("SimpleTagNameOnly")
+    assert not _looks_like_live_tag_path("{../props.value}")
+
+
+def test_last_segment_from_tag_path():
+    assert _last_segment_from_tag_path("[default]Line/Pump/Speed") == "Speed"
+    assert _last_segment_from_tag_path("Line/Pump/Speed") == "Speed"
+    assert _last_segment_from_tag_path("Speed") == "Speed"

From 8d529c015bd9928525f969d8c5050041dd34e556 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:01:29 +0000
Subject: [PATCH 09/16] Add staleness/tool-call diagnostics and shutdown-safe
 IPC

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js              | 98 +++++++++++++++++++++-----------
 electron-ui/renderer.js          |  6 ++
 scripts/anomaly_monitor.py       | 53 ++++++++++++++++-
 scripts/anomaly_rules.py         | 13 ++++-
 tests/unit/test_anomaly_rules.py | 16 +++++-
 5 files changed, 150 insertions(+), 36 deletions(-)

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 5f5babb..9f4f350 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -5,6 +5,7 @@ const { spawn } = require('child_process');
 
 let mainWindow;
 let activeAgentRun = null;
+let isAppShuttingDown = false;
 
 // ---------------------------------------------------------------------------
 // Python backend configuration  (works in both dev and packaged modes)
@@ -89,6 +90,10 @@ function createWindow() {
   });
 
   mainWindow.loadFile('index.html');
+
+  mainWindow.on('closed', () => {
+    mainWindow = null;
+  });
   
   // Open DevTools in development
   if (process.argv.includes('--dev')) {
@@ -105,11 +110,15 @@ app.on('window-all-closed', () => {
 });
 
 app.on('before-quit', () => {
+  isAppShuttingDown = true;
+  console.info('[Shutdown] before-quit triggered');
   if (activeAgentRun && activeAgentRun.process && !activeAgentRun.process.killed) {
     try {
+      console.info(`[Shutdown] Stopping active agent run ${activeAgentRun.runId}`);
       activeAgentRun.process.kill('SIGTERM');
     } catch (err) {
       // Ignore termination errors during shutdown.
+      console.warn('[Shutdown] Failed to terminate active agent process:', err.message);
     }
   }
 });
@@ -135,27 +144,27 @@ function runPythonScript(scriptName, args = [], options = {}) {
       stdout += text;
       
       // Send streaming output to renderer if enabled
-      if (streaming && mainWindow) {
+      if (streaming) {
         // Parse and emit tool calls separately
         const lines = text.split('\n');
         for (const line of lines) {
           if (line.startsWith('[TOOL]')) {
-            mainWindow.webContents.send('tool-call', {
+            sendToRenderer('tool-call', {
               streamId,
               tool: line.replace('[TOOL]', '').trim()
-            });
+            }, 'runPythonScript stdout tool');
           } else if (line.startsWith('[DEBUG]')) {
-            mainWindow.webContents.send('stream-output', {
+            sendToRenderer('stream-output', {
               streamId,
               text: line,
               type: 'debug'
-            });
+            }, 'runPythonScript stdout debug');
           } else if (line.trim()) {
-            mainWindow.webContents.send('stream-output', {
+            sendToRenderer('stream-output', {
               streamId,
               text: line,
               type: 'output'
-            });
+            }, 'runPythonScript stdout output');
           }
         }
       }
@@ -166,21 +175,21 @@ function runPythonScript(scriptName, args = [], options = {}) {
       stderr += text;
       
       // Stream stderr too (useful for verbose output)
-      if (streaming && mainWindow) {
-        mainWindow.webContents.send('stream-output', {
+      if (streaming) {
+        sendToRenderer('stream-output', {
           streamId,
           text,
           type: 'stderr'
-        });
+        }, 'runPythonScript stderr');
       }
     });
 
     pythonProcess.on('close', (code) => {
-      if (streaming && mainWindow) {
-        mainWindow.webContents.send('stream-complete', {
+      if (streaming) {
+        sendToRenderer('stream-complete', {
           streamId,
           success: code === 0
-        });
+        }, 'runPythonScript close');
       }
       
       if (code === 0) {
@@ -233,12 +242,35 @@ function normalizeAgentConfig(config = {}) {
   };
 }
 
-function routeAgentMessage(channel, payload) {
-  if (mainWindow) {
+function canSendToRenderer() {
+  if (!mainWindow) return false;
+  if (typeof mainWindow.isDestroyed === 'function' && mainWindow.isDestroyed()) return false;
+  const wc = mainWindow.webContents;
+  if (!wc) return false;
+  if (typeof wc.isDestroyed === 'function' && wc.isDestroyed()) return false;
+  return true;
+}
+
+function sendToRenderer(channel, payload, context = '') {
+  if (!canSendToRenderer()) {
+    if (isAppShuttingDown) {
+      console.info(`[Shutdown] Dropped renderer message ${channel}${context ? ` (${context})` : ''}`);
+    }
+    return false;
+  }
+  try {
     mainWindow.webContents.send(channel, payload);
+    return true;
+  } catch (err) {
+    console.warn(`[IPC] Failed sending ${channel}${context ? ` (${context})` : ''}: ${err.message}`);
+    return false;
   }
 }
 
+function routeAgentMessage(channel, payload) {
+  sendToRenderer(channel, payload, 'agent-stream');
+}
+
 function parseAgentLine(line) {
   const trimmed = (line || '').trim();
   if (!trimmed) return null;
@@ -566,22 +598,22 @@ ipcMain.handle('troubleshoot', async (event, question, history) => {
         stderr += text;
         
         // Stream tool calls, debug info, and Claude response from stderr to frontend
-        if (mainWindow) {
+        if (canSendToRenderer()) {
           // Check for special prefixes first (they appear on their own lines)
           if (text.includes('[TOOL]') || text.includes('[DEBUG]') || text.includes('[INFO]')) {
             const lines = text.split('\n');
             for (const line of lines) {
               if (line.startsWith('[TOOL]')) {
-                mainWindow.webContents.send('tool-call', {
+                sendToRenderer('tool-call', {
                   streamId,
                   tool: line.replace('[TOOL]', '').trim()
-                });
+                }, 'troubleshoot stderr tool');
               } else if (line.startsWith('[DEBUG]') || line.startsWith('[INFO]')) {
-                mainWindow.webContents.send('stream-output', {
+                sendToRenderer('stream-output', {
                   streamId,
                   text: line,
                   type: 'debug'
-                });
+                }, 'troubleshoot stderr debug');
               }
             }
           } else if (text.includes('[STREAM]')) {
@@ -589,29 +621,29 @@ ipcMain.handle('troubleshoot', async (event, question, history) => {
             const streamStart = text.indexOf('[STREAM]');
             const afterStream = text.substring(streamStart + 8); // 8 = length of '[STREAM]'
             if (afterStream) {
-              mainWindow.webContents.send('stream-output', {
+              sendToRenderer('stream-output', {
                 streamId,
                 text: afterStream,
                 type: 'claude-stream'
-              });
+              }, 'troubleshoot stderr stream-start');
             }
           } else if (text && !text.startsWith('[')) {
             // Continuation of Claude streaming (no prefix)
-            mainWindow.webContents.send('stream-output', {
+            sendToRenderer('stream-output', {
               streamId,
               text: text,
               type: 'claude-stream'
-            });
+            }, 'troubleshoot stderr stream-cont');
           }
         }
       });
       
       proc.on('close', (code) => {
-        if (mainWindow) {
-          mainWindow.webContents.send('stream-complete', {
+        if (canSendToRenderer()) {
+          sendToRenderer('stream-complete', {
             streamId,
             success: code === 0
-          });
+          }, 'troubleshoot close');
         }
         
         if (code === 0) {
@@ -1164,25 +1196,25 @@ ipcMain.handle('graph:ai-propose', async (event, description) => {
         stderr += text;
         
         // Stream tool calls to frontend
-        if (mainWindow && text.includes('[TOOL]')) {
+        if (canSendToRenderer() && text.includes('[TOOL]')) {
           const lines = text.split('\n');
           for (const line of lines) {
             if (line.startsWith('[TOOL]')) {
-              mainWindow.webContents.send('tool-call', {
+              sendToRenderer('tool-call', {
                 streamId,
                 tool: line.replace('[TOOL]', '').trim()
-              });
+              }, 'ai-propose stderr tool');
             }
           }
         }
       });
       
       proc.on('close', (code) => {
-        if (mainWindow) {
-          mainWindow.webContents.send('stream-complete', {
+        if (canSendToRenderer()) {
+          sendToRenderer('stream-complete', {
             streamId,
             success: code === 0
-          });
+          }, 'ai-propose close');
         }
         
         if (code === 0) {
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index b597326..4e85c2b 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3907,6 +3907,12 @@ function ensureAgentListeners() {
     updateAgentMetrics(payload, payload.timestamp);
     if (payload.diagnostics) {
       console.info('[Agents diagnostics]', payload.diagnostics);
+      if (Array.isArray(payload.diagnostics.toolCalls) && payload.diagnostics.toolCalls.length) {
+        console.info('[Agents tool calls]', payload.diagnostics.toolCalls);
+      }
+      if (Array.isArray(payload.diagnostics.staleSamples) && payload.diagnostics.staleSamples.length) {
+        console.info('[Agents stale samples]', payload.diagnostics.staleSamples);
+      }
     }
   });
 
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 58b0720..efad1d0 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -1006,6 +1006,28 @@ def run_cycle(self) -> Dict[str, Any]:
             }
         )
         live_values = self.api.read_tags(tag_paths)
+        tool_calls: List[Dict[str, Any]] = []
+        tool_calls.append({
+            "tool": "read_tags",
+            "request": {
+                "count": len(tag_paths),
+                "samplePaths": tag_paths[:8],
+            },
+            "result": {
+                "count": len(live_values),
+                "errorCount": sum(1 for tv in live_values if tv.error),
+                "qualityGoodCount": sum(1 for tv in live_values if is_quality_good(tv.quality)),
+                "sample": [
+                    {
+                        "path": tv.path,
+                        "quality": tv.quality,
+                        "timestamp": tv.timestamp,
+                        "error": tv.error,
+                    }
+                    for tv in live_values[:5]
+                ],
+            },
+        })
         candidates: List[Dict[str, Any]] = []
         now = datetime.now(timezone.utc)
         live_error_count = 0
@@ -1033,6 +1055,7 @@ def run_cycle(self) -> Dict[str, Any]:
         near_shift_count = 0
         near_shift_linked = 0
         near_shift_unlinked = 0
+        stale_samples: List[Dict[str, Any]] = []
         subsystem_shift_signals: Dict[str, Dict[str, Any]] = {}
 
         def _update_subsystem_signal(
@@ -1088,15 +1111,39 @@ def _update_subsystem_signal(
                 else:
                     quality_filtered_unlinked += 1
                 continue
-            if is_stale(tv.timestamp, int(thresholds.get("stalenessSec", 120)), now=now):
+            parsed_ts = parse_timestamp(tv.timestamp)
+            age_sec = (now - parsed_ts).total_seconds() if parsed_ts is not None else None
+            stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
+            if is_stale(tv.timestamp, stale_threshold_sec, now=now):
                 stale_filtered_count += 1
                 if is_linked:
                     stale_filtered_linked += 1
                 else:
                     stale_filtered_unlinked += 1
+                if len(stale_samples) < 8:
+                    stale_samples.append({
+                        "path": tv.path,
+                        "timestampRaw": tv.timestamp,
+                        "timestampParsedUtc": parsed_ts.isoformat() if parsed_ts else None,
+                        "ageSec": round(age_sec, 3) if age_sec is not None else None,
+                        "thresholdSec": stale_threshold_sec,
+                        "reason": "timestamp_parse_failed" if parsed_ts is None else "age_exceeds_threshold",
+                    })
                 continue
 
             history, history_error = self.fetch_history_values(tv.path)
+            if len(tool_calls) < 18:
+                tool_calls.append({
+                    "tool": "query_tag_history",
+                    "request": {
+                        "tagPath": tv.path,
+                        "historyWindowMinutes": int(self.config.get("historyWindowMinutes", 360)),
+                    },
+                    "result": {
+                        "historyPoints": len(history),
+                        "error": history_error,
+                    },
+                })
             if history_error:
                 history_error_count += 1
                 if is_linked:
@@ -1418,6 +1465,9 @@ def _update_subsystem_signal(
             "nearShiftCount": near_shift_count,
             "nearShiftLinked": near_shift_linked,
             "nearShiftUnlinked": near_shift_unlinked,
+            "stalenessThresholdSec": int(thresholds.get("stalenessSec", 120)),
+            "staleSamples": stale_samples,
+            "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
             "detectedSubsystemCount": len(detected_subsystems),
             "detectedSubsystems": detected_subsystems[:10],
             "candidateSubsystemCount": len(candidate_subsystem_counts),
@@ -1427,6 +1477,7 @@ def _update_subsystem_signal(
             "maxLlmTriagesPerSubsystem": max_triage_per_subsystem,
             "llmTriagedCount": llm_total,
             "dedupSuppressedCount": dedup_suppressed_count,
+            "toolCalls": tool_calls,
         }
         metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
         return metrics
diff --git a/scripts/anomaly_rules.py b/scripts/anomaly_rules.py
index 2aa274d..5e0e6aa 100644
--- a/scripts/anomaly_rules.py
+++ b/scripts/anomaly_rules.py
@@ -44,6 +44,15 @@ def parse_timestamp(ts: Optional[str]) -> Optional[datetime]:
     text = str(ts).strip()
     if not text:
         return None
+    # Handle unix epoch (seconds or milliseconds) represented as numeric text.
+    if text.isdigit():
+        try:
+            raw = int(text)
+            if raw > 10_000_000_000:  # likely milliseconds
+                raw = raw / 1000.0
+            return datetime.fromtimestamp(raw, tz=timezone.utc)
+        except (ValueError, OSError, OverflowError):
+            return None
     if text.endswith("Z"):
         text = text[:-1] + "+00:00"
     try:
@@ -51,7 +60,9 @@ def parse_timestamp(ts: Optional[str]) -> Optional[datetime]:
     except ValueError:
         return None
     if dt.tzinfo is None:
-        dt = dt.replace(tzinfo=timezone.utc)
+        # Ignition often returns naive local timestamps; assume local timezone.
+        local_tz = datetime.now().astimezone().tzinfo or timezone.utc
+        dt = dt.replace(tzinfo=local_tz)
     return dt.astimezone(timezone.utc)
 
 
diff --git a/tests/unit/test_anomaly_rules.py b/tests/unit/test_anomaly_rules.py
index e5f2af1..7a75c9e 100644
--- a/tests/unit/test_anomaly_rules.py
+++ b/tests/unit/test_anomaly_rules.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from anomaly_rules import compute_deviation_scores, is_quality_good, is_stale
+from anomaly_rules import compute_deviation_scores, is_quality_good, is_stale, parse_timestamp
 
 
 def test_detects_sharp_rise_and_sharp_drop():
@@ -53,6 +53,20 @@ def test_staleness_helper():
     assert is_stale(old_ts, staleness_sec=300)
 
 
+def test_staleness_accepts_epoch_seconds_and_millis():
+    now = datetime.now(timezone.utc)
+    recent = int(now.timestamp())
+    recent_ms = int(now.timestamp() * 1000)
+    assert not is_stale(str(recent), staleness_sec=300, now=now)
+    assert not is_stale(str(recent_ms), staleness_sec=300, now=now)
+
+
+def test_parse_timestamp_naive_assumed_local_time():
+    local_now = datetime.now().replace(microsecond=0)
+    parsed = parse_timestamp(local_now.isoformat())
+    assert parsed is not None
+
+
 def test_non_numeric_current_value_is_rejected():
     result = compute_deviation_scores(
         current_value="not-a-number",

From c9a240a37836600aa9c478965d9e0b4185eab901 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:05:52 +0000
Subject: [PATCH 10/16] Infer missing live timestamps and expand tool call
 diagnostics

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py                    | 30 ++++++
 scripts/ignition_api_client.py                | 91 +++++++++++++++++--
 tests/unit/test_ignition_api_client_parser.py | 45 +++++++++
 3 files changed, 156 insertions(+), 10 deletions(-)
 create mode 100644 tests/unit/test_ignition_api_client_parser.py

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index efad1d0..5768e83 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -56,6 +56,15 @@ def emit(prefix: str, payload: Dict[str, Any]) -> None:
 DEFAULT_SUBSYSTEM_PRIORITY = ["view", "equipment", "group", "global"]
 
 
+def _preview_value(value: Any, max_len: int = 120) -> Any:
+    if value is None or isinstance(value, (bool, int, float)):
+        return value
+    text = str(value)
+    if len(text) <= max_len:
+        return text
+    return text[: max_len - 3] + "..."
+
+
 def _canonical_subsystem_type(kind: Any) -> str:
     value = str(kind or "").strip().lower()
     if value in {"view", "views"}:
@@ -1017,11 +1026,24 @@ def run_cycle(self) -> Dict[str, Any]:
                 "count": len(live_values),
                 "errorCount": sum(1 for tv in live_values if tv.error),
                 "qualityGoodCount": sum(1 for tv in live_values if is_quality_good(tv.quality)),
+                "timestampMissingCount": sum(1 for tv in live_values if not tv.timestamp),
+                "timestampInferredCount": sum(
+                    1
+                    for tv in live_values
+                    if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred"))
+                ),
                 "sample": [
                     {
                         "path": tv.path,
+                        "value": _preview_value(tv.value),
                         "quality": tv.quality,
                         "timestamp": tv.timestamp,
+                        "timestampInferred": bool(tv.config.get("timestamp_inferred"))
+                        if isinstance(tv.config, dict)
+                        else False,
+                        "configKeys": sorted(list(tv.config.keys()))[:8]
+                        if isinstance(tv.config, dict)
+                        else [],
                         "error": tv.error,
                     }
                     for tv in live_values[:5]
@@ -1035,6 +1057,8 @@ def run_cycle(self) -> Dict[str, Any]:
         history_error_count = 0
         history_error_samples: List[str] = []
         valid_live_count = 0
+        missing_timestamp_count = 0
+        inferred_timestamp_count = 0
         quality_filtered_count = 0
         stale_filtered_count = 0
         insufficient_history_count = 0
@@ -1103,6 +1127,10 @@ def _update_subsystem_signal(
                     live_error_samples.append(f"{tv.path}: {tv.error}")
                 continue
             valid_live_count += 1
+            if not tv.timestamp:
+                missing_timestamp_count += 1
+            if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred")):
+                inferred_timestamp_count += 1
             if not is_quality_good(tv.quality):
                 # quality gate: only emit quality anomalies if this persists via triage.
                 quality_filtered_count += 1
@@ -1444,6 +1472,8 @@ def _update_subsystem_signal(
             "linkedTags": linked_tag_count,
             "unlinkedTags": unlinked_tag_count,
             "validLiveCount": valid_live_count,
+            "missingTimestampCount": missing_timestamp_count,
+            "inferredTimestampCount": inferred_timestamp_count,
             "liveErrorCount": live_error_count,
             "liveErrorLinked": live_error_linked,
             "liveErrorUnlinked": live_error_unlinked,
diff --git a/scripts/ignition_api_client.py b/scripts/ignition_api_client.py
index e8fbccf..eae959a 100644
--- a/scripts/ignition_api_client.py
+++ b/scripts/ignition_api_client.py
@@ -17,6 +17,7 @@
 import os
 import json
 import logging
+from datetime import datetime, timezone
 from typing import Dict, List, Optional, Any
 from dataclasses import dataclass, field
 from urllib.parse import urljoin, quote
@@ -239,7 +240,11 @@ def read_tags(self, paths: List[str]) -> List[TagValue]:
                 for p in normalised
             ]
 
-        return self._parse_tags_response(normalised, data)
+        return self._parse_tags_response(
+            normalised,
+            data,
+            fallback_timestamp=datetime.now(timezone.utc).isoformat(),
+        )
 
     # --------------------------------------------------------------------- #
     #  Tag history – WebDev module endpoint
@@ -344,11 +349,36 @@ def _ensure_provider_prefix(path: str) -> str:
             return path
         return f"[default]{path}"
 
-    _TAG_ITEM_KNOWN_KEYS = {"value", "quality", "tagPath", "isGood",
-                             "timestamp", "t", "dataType", "data_type"}
+    _TAG_ITEM_KNOWN_KEYS = {
+        "value",
+        "v",
+        "quality",
+        "q",
+        "tagPath",
+        "path",
+        "fullPath",
+        "isGood",
+        "timestamp",
+        "t",
+        "ts",
+        "time",
+        "timeStamp",
+        "dateTime",
+        "datetime",
+        "lastChange",
+        "lastChanged",
+        "timestampMs",
+        "eventTime",
+        "dataType",
+        "data_type",
+    }
 
     @staticmethod
-    def _parse_tags_response(paths: List[str], data: Any) -> List["TagValue"]:
+    def _parse_tags_response(
+        paths: List[str],
+        data: Any,
+        fallback_timestamp: Optional[str] = None,
+    ) -> List["TagValue"]:
         """Parse the response from the WebDev getTags endpoint.
 
         Expected shape: {"allGood": bool, "success": bool, "count": N,
@@ -367,9 +397,41 @@ def _parse_tags_response(paths: List[str], data: Any) -> List["TagValue"]:
             return [TagValue(path=p, value=data, quality="Unknown") for p in paths]
 
         by_path: Dict[str, dict] = {}
+
+        def extract_item_path(item: Dict[str, Any]) -> Optional[str]:
+            for key in ("tagPath", "path", "fullPath"):
+                val = item.get(key)
+                if isinstance(val, str) and val.strip():
+                    return val.strip()
+            return None
+
+        def extract_item_timestamp(item: Dict[str, Any]) -> Optional[str]:
+            for key in (
+                "timestamp",
+                "t",
+                "ts",
+                "time",
+                "timeStamp",
+                "dateTime",
+                "datetime",
+                "lastChange",
+                "lastChanged",
+                "timestampMs",
+                "eventTime",
+            ):
+                val = item.get(key)
+                if val is None:
+                    continue
+                text = str(val).strip()
+                if text:
+                    return text
+            return None
+
         for item in items:
-            if isinstance(item, dict) and "tagPath" in item:
-                by_path[item["tagPath"]] = item
+            if isinstance(item, dict):
+                item_path = extract_item_path(item)
+                if item_path:
+                    by_path[item_path] = item
 
         results: List[TagValue] = []
         for i, path in enumerate(paths):
@@ -380,13 +442,22 @@ def _parse_tags_response(paths: List[str], data: Any) -> List["TagValue"]:
             if item is None:
                 results.append(TagValue(path=path, error="No data returned for this path"))
             elif isinstance(item, dict):
+                ts = extract_item_timestamp(item)
+                inferred_timestamp = False
+                if not ts and fallback_timestamp:
+                    ts = fallback_timestamp
+                    inferred_timestamp = True
                 extra = {k: v for k, v in item.items()
                          if k not in IgnitionApiClient._TAG_ITEM_KNOWN_KEYS} or None
+                if inferred_timestamp:
+                    if extra is None:
+                        extra = {}
+                    extra["timestamp_inferred"] = True
                 results.append(TagValue(
-                    path=item.get("tagPath", path),
-                    value=item.get("value"),
-                    quality=str(item.get("quality", "Good" if item.get("isGood") else "Unknown")),
-                    timestamp=item.get("timestamp") or item.get("t"),
+                    path=extract_item_path(item) or path,
+                    value=item.get("value", item.get("v")),
+                    quality=str(item.get("quality", item.get("q", "Good" if item.get("isGood") else "Unknown"))),
+                    timestamp=ts,
                     data_type=item.get("dataType") or item.get("data_type"),
                     config=extra,
                 ))
diff --git a/tests/unit/test_ignition_api_client_parser.py b/tests/unit/test_ignition_api_client_parser.py
new file mode 100644
index 0000000..2157673
--- /dev/null
+++ b/tests/unit/test_ignition_api_client_parser.py
@@ -0,0 +1,45 @@
+from ignition_api_client import IgnitionApiClient
+
+
+def test_parse_tags_response_infers_timestamp_when_missing():
+    paths = ["[default]Feed_Storage/Tank1_Level"]
+    fallback_ts = "2026-03-02T00:00:00+00:00"
+    payload = {
+        "tags": [
+            {
+                "tagPath": paths[0],
+                "value": 42.5,
+                "quality": "Good",
+            }
+        ]
+    }
+
+    rows = IgnitionApiClient._parse_tags_response(paths, payload, fallback_timestamp=fallback_ts)
+    assert len(rows) == 1
+    assert rows[0].path == paths[0]
+    assert rows[0].timestamp == fallback_ts
+    assert rows[0].config is not None
+    assert rows[0].config.get("timestamp_inferred") is True
+
+
+def test_parse_tags_response_supports_alt_keys():
+    paths = ["[default]Feed_Storage/Tank1_Pressure"]
+    payload = {
+        "items": [
+            {
+                "path": paths[0],
+                "v": 101.3,
+                "q": "Good",
+                "ts": "1710000000000",
+                "data_type": "Float8",
+            }
+        ]
+    }
+
+    rows = IgnitionApiClient._parse_tags_response(paths, payload)
+    assert len(rows) == 1
+    assert rows[0].path == paths[0]
+    assert rows[0].value == 101.3
+    assert rows[0].quality == "Good"
+    assert rows[0].timestamp == "1710000000000"
+    assert rows[0].data_type == "Float8"

From 0a49338cb9f96cca166ada8f99cf19ec09b39fc8 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:08:27 +0000
Subject: [PATCH 11/16] Add raw agent event logging to debug missing
 diagnostics

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/main.js     | 15 +++++++++++++--
 electron-ui/renderer.js |  7 +++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/electron-ui/main.js b/electron-ui/main.js
index 9f4f350..43eb3fb 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -255,6 +255,8 @@ function sendToRenderer(channel, payload, context = '') {
   if (!canSendToRenderer()) {
     if (isAppShuttingDown) {
       console.info(`[Shutdown] Dropped renderer message ${channel}${context ? ` (${context})` : ''}`);
+    } else {
+      console.warn(`[IPC] Renderer unavailable for ${channel}${context ? ` (${context})` : ''}`);
     }
     return false;
   }
@@ -268,7 +270,10 @@ function sendToRenderer(channel, payload, context = '') {
 }
 
 function routeAgentMessage(channel, payload) {
-  sendToRenderer(channel, payload, 'agent-stream');
+  const ok = sendToRenderer(channel, payload, 'agent-stream');
+  if (!ok) {
+    console.warn(`[Agent IPC] Failed to route message on ${channel}`);
+  }
 }
 
 function parseAgentLine(line) {
@@ -309,7 +314,12 @@ function handleAgentStdoutChunk(text) {
   activeAgentRun.stdoutBuffer = lines.pop() || '';
   for (const line of lines) {
     const parsed = parseAgentLine(line);
-    if (!parsed) continue;
+    if (!parsed) {
+      if (line.trim().startsWith('[AGENT')) {
+        console.warn('[Agent stream] Unparsed line:', line.slice(0, 300));
+      }
+      continue;
+    }
     if (parsed.channel === 'agent-status' && parsed.payload) {
       activeAgentRun.status = parsed.payload.state || activeAgentRun.status;
       activeAgentRun.metrics = {
@@ -1615,6 +1625,7 @@ ipcMain.handle('agents:start', async (event, rawConfig = {}) => {
     proc.stderr.on('data', (data) => {
       const text = data.toString().trim();
       if (!text) return;
+      console.warn('[Agent stderr]', text.slice(0, 500));
       routeAgentMessage('agent-error', {
         runId,
         code: 'worker_stderr',
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 4e85c2b..90958d6 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3838,11 +3838,14 @@ async function refreshAgentStatus() {
 
 async function startAgentsMonitoring() {
   const config = getAgentsConfigFromUI();
+  console.warn('[Agents start requested]', config);
   const result = await window.api.agentsStart(config);
   if (!result.success) {
+    console.error('[Agents start failed]', result);
     updateAgentStatusUi('error', result.error || 'Failed to start monitoring');
     return;
   }
+  console.warn('[Agents started]', result);
   agentsState.runId = result.runId;
   agentsState.status = 'running';
   updateAgentStatusUi('running', `Run ${result.runId}`);
@@ -3901,6 +3904,7 @@ function ensureAgentListeners() {
 
   window.api.onAgentStatus((payload) => {
     if (!payload) return;
+    console.warn('[Agents status]', payload);
     if (payload.runId) agentsState.runId = payload.runId;
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
@@ -3917,16 +3921,19 @@ function ensureAgentListeners() {
   });
 
   window.api.onAgentEvent((payload) => {
+    console.warn('[Agents event]', payload);
     upsertRealtimeAgentEvent(payload);
   });
 
   window.api.onAgentError((payload) => {
     if (!payload) return;
+    console.error('[Agents error]', payload);
     updateAgentStatusUi('error', payload.message || 'Agent runtime error');
   });
 
   window.api.onAgentComplete((payload) => {
     if (!payload) return;
+    console.warn('[Agents complete]', payload);
     agentsState.status = payload.success ? 'stopped' : 'failed';
     updateAgentStatusUi(agentsState.status, payload.reason || 'Run complete');
     refreshAgentStatus();

From 8465607682ebf5aa20a0f1c612be8a7a2dc6a02f Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:11:01 +0000
Subject: [PATCH 12/16] Always log agent diagnostics and stale/tool details

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/renderer.js | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 90958d6..d4fe40d 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3909,15 +3909,20 @@ function ensureAgentListeners() {
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
-    if (payload.diagnostics) {
-      console.info('[Agents diagnostics]', payload.diagnostics);
-      if (Array.isArray(payload.diagnostics.toolCalls) && payload.diagnostics.toolCalls.length) {
-        console.info('[Agents tool calls]', payload.diagnostics.toolCalls);
-      }
-      if (Array.isArray(payload.diagnostics.staleSamples) && payload.diagnostics.staleSamples.length) {
-        console.info('[Agents stale samples]', payload.diagnostics.staleSamples);
-      }
-    }
+    const diagnostics = payload.diagnostics || {};
+    console.warn('[Agents diagnostics]', diagnostics);
+    console.warn('[Agents diagnostics summary]', {
+      monitoredTags: diagnostics.monitoredTags ?? null,
+      linkedTags: diagnostics.linkedTags ?? null,
+      validLiveCount: diagnostics.validLiveCount ?? null,
+      staleFilteredCount: diagnostics.staleFilteredCount ?? null,
+      missingTimestampCount: diagnostics.missingTimestampCount ?? null,
+      inferredTimestampCount: diagnostics.inferredTimestampCount ?? null,
+      detectedSubsystemCount: diagnostics.detectedSubsystemCount ?? null,
+      candidateSubsystemCount: diagnostics.candidateSubsystemCount ?? null,
+    });
+    console.warn('[Agents tool calls]', Array.isArray(diagnostics.toolCalls) ? diagnostics.toolCalls : []);
+    console.warn('[Agents stale samples]', Array.isArray(diagnostics.staleSamples) ? diagnostics.staleSamples : []);
   });
 
   window.api.onAgentEvent((payload) => {

From da80bf16703361cede0ae47ef7c7f37e0f6bad77 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:14:13 +0000
Subject: [PATCH 13/16] Always emit shaped agent diagnostics with status phase

Co-authored-by: leor <leor@fortresslabs.com>
---
 electron-ui/renderer.js    |   2 +
 scripts/anomaly_monitor.py | 103 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index d4fe40d..9b5da03 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3912,6 +3912,8 @@ function ensureAgentListeners() {
     const diagnostics = payload.diagnostics || {};
     console.warn('[Agents diagnostics]', diagnostics);
     console.warn('[Agents diagnostics summary]', {
+      phase: diagnostics.phase ?? null,
+      reason: diagnostics.reason ?? null,
       monitoredTags: diagnostics.monitoredTags ?? null,
       linkedTags: diagnostics.linkedTags ?? null,
       validLiveCount: diagnostics.validLiveCount ?? null,
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 5768e83..2fa7f73 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -65,6 +65,58 @@ def _preview_value(value: Any, max_len: int = 120) -> Any:
     return text[: max_len - 3] + "..."
 
 
+def make_default_diagnostics(
+    *,
+    staleness_threshold_sec: int = 120,
+    phase: str = "initializing",
+    reason: str = "",
+) -> Dict[str, Any]:
+    return {
+        "phase": phase,
+        "reason": reason,
+        "monitoredTags": 0,
+        "linkedTags": 0,
+        "unlinkedTags": 0,
+        "validLiveCount": 0,
+        "missingTimestampCount": 0,
+        "inferredTimestampCount": 0,
+        "liveErrorCount": 0,
+        "liveErrorLinked": 0,
+        "liveErrorUnlinked": 0,
+        "qualityFilteredCount": 0,
+        "qualityFilteredLinked": 0,
+        "qualityFilteredUnlinked": 0,
+        "staleFilteredCount": 0,
+        "staleFilteredLinked": 0,
+        "staleFilteredUnlinked": 0,
+        "historyErrorCount": 0,
+        "historyErrorLinked": 0,
+        "historyErrorUnlinked": 0,
+        "insufficientHistoryCount": 0,
+        "lowHistoryCandidateCount": 0,
+        "evaluatedLinked": 0,
+        "evaluatedUnlinked": 0,
+        "candidateLinked": 0,
+        "candidateUnlinked": 0,
+        "nearShiftCount": 0,
+        "nearShiftLinked": 0,
+        "nearShiftUnlinked": 0,
+        "stalenessThresholdSec": staleness_threshold_sec,
+        "staleSamples": [],
+        "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
+        "detectedSubsystemCount": 0,
+        "detectedSubsystems": [],
+        "candidateSubsystemCount": 0,
+        "candidateBySubsystem": {},
+        "subsystemShiftSignals": [],
+        "maxCandidatesPerSubsystem": 0,
+        "maxLlmTriagesPerSubsystem": 0,
+        "llmTriagedCount": 0,
+        "dedupSuppressedCount": 0,
+        "toolCalls": [],
+    }
+
+
 def _canonical_subsystem_type(kind: Any) -> str:
     value = str(kind or "").strip().lower()
     if value in {"view", "views"}:
@@ -970,8 +1022,19 @@ def emit_provider_failure_event(
     # -----------------------------
     def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
-        metrics = {"candidates": 0, "triaged": 0, "emitted": 0, "cycleMs": 0, "diagnostics": {}}
         thresholds = self.config.get("thresholds", {})
+        stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
+        metrics = {
+            "candidates": 0,
+            "triaged": 0,
+            "emitted": 0,
+            "cycleMs": 0,
+            "diagnostics": make_default_diagnostics(
+                staleness_threshold_sec=stale_threshold_sec,
+                phase="cycle_start",
+                reason="cycle_initialized",
+            ),
+        }
         min_history = int(self.config.get("minHistoryPoints", 30))
         max_candidates_total = max(1, int(self.config.get("maxCandidatesPerCycle", 25)))
         max_candidates_per_subsystem = max(1, int(self.config.get("maxCandidatesPerSubsystem", 8)))
@@ -987,6 +1050,8 @@ def run_cycle(self) -> Dict[str, Any]:
             )
             if emitted:
                 metrics["emitted"] += 1
+            metrics["diagnostics"]["phase"] = "cycle_early_exit"
+            metrics["diagnostics"]["reason"] = "ignition_not_configured"
             metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
             return metrics
 
@@ -999,6 +1064,8 @@ def run_cycle(self) -> Dict[str, Any]:
                 "recoverable": True,
                 "timestamp": utc_now_iso(),
             })
+            metrics["diagnostics"]["phase"] = "cycle_early_exit"
+            metrics["diagnostics"]["reason"] = "no_tags_found"
             metrics["cycleMs"] = int((time.time() - cycle_start) * 1000)
             return metrics
 
@@ -1468,6 +1535,11 @@ def _update_subsystem_signal(
             item.pop("sumZ", None)
 
         metrics["diagnostics"] = {
+            **make_default_diagnostics(
+                staleness_threshold_sec=int(thresholds.get("stalenessSec", 120)),
+                phase="cycle_complete",
+                reason="ok",
+            ),
             "monitoredTags": len(tag_paths),
             "linkedTags": linked_tag_count,
             "unlinkedTags": unlinked_tag_count,
@@ -1519,6 +1591,11 @@ def cleanup_retention(self) -> int:
     def run_forever(self) -> int:
         self.init_schema()
         self.upsert_run("running")
+        startup_diag = make_default_diagnostics(
+            staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
+            phase="startup",
+            reason="worker_started",
+        )
         emit("AGENT_STATUS", {
             "runId": self.run_id,
             "state": "running",
@@ -1526,6 +1603,7 @@ def run_forever(self) -> int:
             "candidates": 0,
             "triaged": 0,
             "emitted": 0,
+            "diagnostics": startup_diag,
             "timestamp": utc_now_iso(),
         })
 
@@ -1553,6 +1631,12 @@ def run_forever(self) -> int:
                 if self._cycle_count % cleanup_every == 0:
                     deleted = self.cleanup_retention()
                     if deleted > 0:
+                        cleanup_diag = make_default_diagnostics(
+                            staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
+                            phase="retention_cleanup",
+                            reason="cleanup_complete",
+                        )
+                        cleanup_diag["emittedCleanupCount"] = deleted
                         emit("AGENT_STATUS", {
                             "runId": self.run_id,
                             "state": "retention_cleanup",
@@ -1560,6 +1644,7 @@ def run_forever(self) -> int:
                             "candidates": 0,
                             "triaged": 0,
                             "emitted": deleted,
+                            "diagnostics": cleanup_diag,
                             "timestamp": utc_now_iso(),
                         })
             except Exception as exc:
@@ -1572,6 +1657,22 @@ def run_forever(self) -> int:
                     "recoverable": True,
                     "timestamp": utc_now_iso(),
                 })
+                error_diag = make_default_diagnostics(
+                    staleness_threshold_sec=int(self.config.get("thresholds", {}).get("stalenessSec", 120)),
+                    phase="cycle_error",
+                    reason="exception",
+                )
+                error_diag["errorMessage"] = str(exc)
+                emit("AGENT_STATUS", {
+                    "runId": self.run_id,
+                    "state": "running",
+                    "cycleMs": int((time.time() - cycle_started) * 1000),
+                    "candidates": 0,
+                    "triaged": 0,
+                    "emitted": 0,
+                    "diagnostics": error_diag,
+                    "timestamp": utc_now_iso(),
+                })
 
             elapsed_ms = int((time.time() - cycle_started) * 1000)
             remaining = max(0, poll_ms - elapsed_ms) / 1000.0

From f5252446348ee6f6837af8795605f36927c85cf6 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:16:24 +0000
Subject: [PATCH 14/16] Set agent poll interval default to 1s

Co-authored-by: leor <leor@fortresslabs.com>
---
 docs/agents_monitoring_handoff.md | 2 +-
 electron-ui/index.html            | 2 +-
 electron-ui/main.js               | 2 +-
 electron-ui/renderer.js           | 2 +-
 scripts/anomaly_monitor.py        | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/agents_monitoring_handoff.md b/docs/agents_monitoring_handoff.md
index a5368fb..4880c31 100644
--- a/docs/agents_monitoring_handoff.md
+++ b/docs/agents_monitoring_handoff.md
@@ -93,7 +93,7 @@ python3 scripts/anomaly_monitor.py replay-fixtures --fixture-file scripts/fixtur
 ### Monitor worker manual run
 
 ```bash
-python3 scripts/anomaly_monitor.py run --run-id demo-run --config-json '{"pollIntervalMs":15000}'
+python3 scripts/anomaly_monitor.py run --run-id demo-run --config-json '{"pollIntervalMs":1000}'
 ```
 
 ### Event operations
diff --git a/electron-ui/index.html b/electron-ui/index.html
index 2c43657..a4a7a9b 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -561,7 +561,7 @@ <h2>Long-Running Agents</h2>
 
         <div class="agents-config-row">
           <label>Poll (ms)</label>
-          <input class="input input-sm" id="agents-config-poll-ms" type="number" min="5000" step="1000" value="15000">
+          <input class="input input-sm" id="agents-config-poll-ms" type="number" min="1000" step="1000" value="1000">
           <label>History (min)</label>
           <input class="input input-sm" id="agents-config-history-min" type="number" min="10" step="10" value="360">
           <label>Min Points</label>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index 43eb3fb..5b6a081 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -209,7 +209,7 @@ function normalizeAgentConfig(config = {}) {
   const thresholds = (config && typeof config.thresholds === 'object' && config.thresholds) || {};
   const scope = (config && typeof config.scope === 'object' && config.scope) || {};
   return {
-    pollIntervalMs: Math.max(5000, Number(config.pollIntervalMs || 15000)),
+    pollIntervalMs: Math.max(1000, Number(config.pollIntervalMs || 1000)),
     historyWindowMinutes: Math.max(10, Number(config.historyWindowMinutes || 360)),
     minHistoryPoints: Math.max(10, Number(config.minHistoryPoints || 30)),
     maxMonitoredTags: Math.max(10, Number(config.maxMonitoredTags || 200)),
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 9b5da03..8479580 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3580,7 +3580,7 @@ function getAgentsElements() {
 function getAgentsConfigFromUI() {
   const el = getAgentsElements();
   return {
-    pollIntervalMs: Number(el.cfgPoll?.value || 15000),
+    pollIntervalMs: Number(el.cfgPoll?.value || 1000),
     historyWindowMinutes: Number(el.cfgHist?.value || 360),
     minHistoryPoints: Number(el.cfgPoints?.value || 30),
     maxCandidatesPerSubsystem: 8,
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 2fa7f73..ff033dc 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -238,7 +238,7 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     raw = dict(config or {})
     thresholds = raw.get("thresholds", {}) if isinstance(raw.get("thresholds"), dict) else {}
     defaults = {
-        "pollIntervalMs": 15000,
+        "pollIntervalMs": 1000,
         "historyWindowMinutes": 360,
         "minHistoryPoints": 30,
         "maxMonitoredTags": 200,
@@ -1607,7 +1607,7 @@ def run_forever(self) -> int:
             "timestamp": utc_now_iso(),
         })
 
-        poll_ms = int(self.config.get("pollIntervalMs", 15000))
+        poll_ms = int(self.config.get("pollIntervalMs", 1000))
         cleanup_every = max(1, int(self.config.get("cleanupEveryCycles", 40)))
         exit_code = 0
         reason = "stopped"

From 6dcfd15f81912b3ce743e6e5eab1507c2f6f093e Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 2 Mar 2026 23:18:37 +0000
Subject: [PATCH 15/16] Emit in-cycle agent status progress updates

Co-authored-by: leor <leor@fortresslabs.com>
---
 scripts/anomaly_monitor.py | 47 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index ff033dc..54065c8 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -1024,6 +1024,8 @@ def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
         thresholds = self.config.get("thresholds", {})
         stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
+        progress_emit_interval_tags = max(5, int(self.config.get("progressEveryTags", 10)))
+        progress_emit_interval_sec = max(1, int(self.config.get("progressEverySec", 2)))
         metrics = {
             "candidates": 0,
             "triaged": 0,
@@ -1148,6 +1150,42 @@ def run_cycle(self) -> Dict[str, Any]:
         near_shift_unlinked = 0
         stale_samples: List[Dict[str, Any]] = []
         subsystem_shift_signals: Dict[str, Dict[str, Any]] = {}
+        processed_live_count = 0
+        total_live_count = len(live_values)
+        last_progress_emit = 0.0
+
+        def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
+            nonlocal last_progress_emit
+            diag = make_default_diagnostics(
+                staleness_threshold_sec=stale_threshold_sec,
+                phase="cycle_in_progress",
+                reason=reason,
+            )
+            diag.update({
+                "processedLiveCount": processed_live_count,
+                "totalLiveCount": total_live_count,
+                "currentTag": current_tag,
+                "candidatesSoFar": len(candidates),
+                "liveErrorCount": live_error_count,
+                "qualityFilteredCount": quality_filtered_count,
+                "staleFilteredCount": stale_filtered_count,
+                "historyErrorCount": history_error_count,
+                "linkedTags": linked_tag_count,
+                "unlinkedTags": unlinked_tag_count,
+            })
+            emit("AGENT_STATUS", {
+                "runId": self.run_id,
+                "state": "running",
+                "cycleMs": int((time.time() - cycle_start) * 1000),
+                "candidates": len(candidates),
+                "triaged": 0,
+                "emitted": metrics.get("emitted", 0),
+                "diagnostics": diag,
+                "timestamp": utc_now_iso(),
+            })
+            last_progress_emit = time.time()
+
+        emit_cycle_progress("cycle_started")
 
         def _update_subsystem_signal(
             subsystem_ref: Dict[str, str], deterministic: Dict[str, Any], tag_path: str
@@ -1180,10 +1218,19 @@ def _update_subsystem_signal(
                 bucket["sampleTag"] = tag_path
 
         for tv in live_values:
+            processed_live_count += 1
             tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
             subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
             is_linked = bool(tag_meta.get("views") or tag_meta.get("equipment"))
 
+            now_progress = time.time()
+            if (
+                processed_live_count == 1
+                or processed_live_count % progress_emit_interval_tags == 0
+                or (now_progress - last_progress_emit) >= progress_emit_interval_sec
+            ):
+                emit_cycle_progress("processing_live_tags", current_tag=tv.path)
+
             if tv.error:
                 live_error_count += 1
                 if is_linked:

From 5157857bb4ff28e2d4b42f16027b5c7de52f3115 Mon Sep 17 00:00:00 2001
From: Leor Barak Fishman <leor.fishman@gmail.com>
Date: Mon, 2 Mar 2026 17:27:50 -0800
Subject: [PATCH 16/16] more agentics fixing

---
 electron-ui/index.html     |  20 +-
 electron-ui/main.js        |  13 ++
 electron-ui/preload.js     |   1 +
 electron-ui/renderer.js    | 353 +++++++++++++++++++++++++++++++---
 electron-ui/styles.css     | 385 ++++++++++++++++++++++++++++++++++++-
 scripts/anomaly_monitor.py | 316 ++++++++++++++++++++++++++----
 tests/quick_import_test.py |  76 ++++++++
 7 files changed, 1100 insertions(+), 64 deletions(-)
 create mode 100644 tests/quick_import_test.py

diff --git a/electron-ui/index.html b/electron-ui/index.html
index a4a7a9b..08adc5d 100644
--- a/electron-ui/index.html
+++ b/electron-ui/index.html
@@ -566,8 +566,11 @@ <h2>Long-Running Agents</h2>
           <input class="input input-sm" id="agents-config-history-min" type="number" min="10" step="10" value="360">
           <label>Min Points</label>
           <input class="input input-sm" id="agents-config-min-points" type="number" min="10" step="5" value="30">
-          <label>Max LLM/Cycle</label>
-          <input class="input input-sm" id="agents-config-max-llm" type="number" min="0" step="1" value="5">
+          <label class="agents-toggle-label">
+            <input type="checkbox" id="agents-config-auto-llm"> Auto LLM
+          </label>
+          <label>Max/Cycle</label>
+          <input class="input input-sm" id="agents-config-max-llm" type="number" min="1" step="1" value="5">
           <label>Z</label>
           <input class="input input-sm" id="agents-config-threshold-z" type="number" min="0.5" step="0.5" value="3">
           <label>MAD</label>
@@ -584,6 +587,18 @@ <h2>Long-Running Agents</h2>
           <div class="metric-card"><span class="metric-label">Last heartbeat</span><span class="metric-value" id="agents-metric-heartbeat">n/a</span></div>
         </div>
 
+        <div class="agents-health-section">
+          <div class="agents-health-header">
+            <h3>Subsystem Health</h3>
+            <div class="agents-health-actions">
+              <button class="btn btn-ghost btn-sm" id="btn-agents-clear-subsystem" style="display:none">Show All</button>
+            </div>
+          </div>
+          <div class="agents-health-grid" id="agents-health-grid">
+            <div class="agents-health-empty">Start monitoring to see subsystem health.</div>
+          </div>
+        </div>
+
         <div class="agents-main">
           <aside class="agents-feed-panel">
             <div class="agents-feed-header">
@@ -614,6 +629,7 @@ <h3>Anomaly Feed</h3>
             <div class="agents-detail-header">
               <h3>Event Details</h3>
               <div class="agents-detail-actions">
+                <button class="btn btn-sm btn-primary" id="btn-agents-deep-analyze" disabled>Deep Analyze</button>
                 <button class="btn btn-sm btn-secondary" id="btn-agents-open-graph" disabled>Open in Graph</button>
                 <button class="btn btn-sm btn-ghost" id="btn-agents-ack" disabled>Acknowledge</button>
               </div>
diff --git a/electron-ui/main.js b/electron-ui/main.js
index 5b6a081..f3034f1 100644
--- a/electron-ui/main.js
+++ b/electron-ui/main.js
@@ -1749,6 +1749,19 @@ ipcMain.handle('agents:clear-event', async (event, eventId, note = '') => {
   }
 });
 
+ipcMain.handle('agents:deep-analyze', async (event, eventId) => {
+  try {
+    const output = await runPythonScript('anomaly_monitor.py', [
+      'deep-analyze',
+      '--event-id',
+      String(eventId),
+    ]);
+    return JSON.parse(output || '{}');
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+});
+
 ipcMain.handle('agents:cleanup', async (event, retentionDays = 14) => {
   try {
     const output = await runPythonScript('anomaly_monitor.py', [
diff --git a/electron-ui/preload.js b/electron-ui/preload.js
index e94b546..cf1d75c 100644
--- a/electron-ui/preload.js
+++ b/electron-ui/preload.js
@@ -79,6 +79,7 @@ contextBridge.exposeInMainWorld('api', {
   agentsGetEvent: (eventId) => ipcRenderer.invoke('agents:get-event', eventId),
   agentsAckEvent: (eventId, note) => ipcRenderer.invoke('agents:ack-event', eventId, note),
   agentsClearEvent: (eventId, note) => ipcRenderer.invoke('agents:clear-event', eventId, note),
+  agentsDeepAnalyze: (eventId) => ipcRenderer.invoke('agents:deep-analyze', eventId),
   agentsCleanup: (retentionDays) => ipcRenderer.invoke('agents:cleanup', retentionDays),
   
   // Database connections
diff --git a/electron-ui/renderer.js b/electron-ui/renderer.js
index 8479580..bba8767 100644
--- a/electron-ui/renderer.js
+++ b/electron-ui/renderer.js
@@ -3539,12 +3539,17 @@ btnSaveDbCreds?.addEventListener('click', async () => {
 // Agents Tab - Long-running monitoring
 // ============================================
 
+const HEALTH_TREND_MAX_CYCLES = 20;
+
 const agentsState = {
   runId: null,
   status: 'idle',
   events: [],
   selectedEventId: null,
+  selectedSubsystemId: null,
   listenersReady: false,
+  subsystemHealth: {},
+  subsystemHistory: {},
 };
 
 function getAgentsElements() {
@@ -3553,6 +3558,7 @@ function getAgentsElements() {
     btnStop: document.getElementById('btn-agents-stop'),
     btnRefresh: document.getElementById('btn-agents-refresh'),
     btnCleanup: document.getElementById('btn-agents-cleanup'),
+    btnDeepAnalyze: document.getElementById('btn-agents-deep-analyze'),
     btnOpenGraph: document.getElementById('btn-agents-open-graph'),
     btnAck: document.getElementById('btn-agents-ack'),
     statusChip: document.getElementById('agents-status-chip'),
@@ -3570,6 +3576,7 @@ function getAgentsElements() {
     cfgPoll: document.getElementById('agents-config-poll-ms'),
     cfgHist: document.getElementById('agents-config-history-min'),
     cfgPoints: document.getElementById('agents-config-min-points'),
+    cfgAutoLlm: document.getElementById('agents-config-auto-llm'),
     cfgMaxLlm: document.getElementById('agents-config-max-llm'),
     cfgZ: document.getElementById('agents-config-threshold-z'),
     cfgMad: document.getElementById('agents-config-threshold-mad'),
@@ -3584,8 +3591,8 @@ function getAgentsConfigFromUI() {
     historyWindowMinutes: Number(el.cfgHist?.value || 360),
     minHistoryPoints: Number(el.cfgPoints?.value || 30),
     maxCandidatesPerSubsystem: 8,
-    maxLlmTriagesPerCycle: Number(el.cfgMaxLlm?.value || 5),
-    maxLlmTriagesPerSubsystem: 2,
+    maxLlmTriagesPerCycle: el.cfgAutoLlm?.checked ? Number(el.cfgMaxLlm?.value || 5) : 0,
+    maxLlmTriagesPerSubsystem: el.cfgAutoLlm?.checked ? 2 : 0,
     thresholds: {
       z: Number(el.cfgZ?.value || 3),
       mad: Number(el.cfgMad?.value || 3.5),
@@ -3606,6 +3613,250 @@ function formatAgentTime(ts) {
   return d.toLocaleString();
 }
 
+function computeHealthLevel(signal) {
+  const avgAbsZ = parseFloat(signal.avgAbsZ || 0);
+  const candidateRatio = parseFloat(signal.candidateRatio || 0);
+  const maxAbsZ = parseFloat(signal.maxAbsZ || 0);
+  if (candidateRatio >= 0.25 || maxAbsZ >= 5) return 'critical';
+  if (candidateRatio >= 0.10 || avgAbsZ >= 2.5) return 'warning';
+  if (signal.shiftRatio > 0.1 || avgAbsZ >= 1.5) return 'elevated';
+  return 'healthy';
+}
+
+function healthLevelToScore(level) {
+  return { healthy: 0.1, elevated: 0.4, warning: 0.7, critical: 1.0 }[level] || 0.1;
+}
+
+function updateSubsystemHealthFromDiagnostics(diagnostics) {
+  const tagMap = diagnostics?.subsystemTagMap;
+  if (tagMap && typeof tagMap === 'object') {
+    for (const [subId, info] of Object.entries(tagMap)) {
+      if (!agentsState.subsystemHealth[subId]) {
+        agentsState.subsystemHealth[subId] = {
+          subsystemId: subId,
+          subsystemType: info.type || 'global',
+          subsystemName: info.name || subId,
+          evaluated: (info.tags || []).length,
+          candidate: 0,
+          nearShift: 0,
+          maxAbsZ: 0,
+          avgAbsZ: 0,
+          healthLevel: 'healthy',
+          tagSignals: (info.tags || []).map((t) => ({
+            path: t.path,
+            name: t.name || t.path,
+            z: 0,
+            mad: 0,
+            value: null,
+          })),
+        };
+      }
+    }
+  }
+
+  const signals = diagnostics?.subsystemShiftSignals;
+  if (Array.isArray(signals) && signals.length) {
+    for (const sig of signals) {
+      const subId = sig.subsystemId || sig.subsystemName || 'global:all';
+      const healthLevel = computeHealthLevel(sig);
+      agentsState.subsystemHealth[subId] = { ...sig, healthLevel };
+
+      if (!agentsState.subsystemHistory[subId]) {
+        agentsState.subsystemHistory[subId] = [];
+      }
+      const history = agentsState.subsystemHistory[subId];
+      history.push({
+        healthLevel,
+        avgAbsZ: parseFloat(sig.avgAbsZ || 0),
+        candidateRatio: parseFloat(sig.candidateRatio || 0),
+        candidates: parseInt(sig.candidate || 0, 10),
+        evaluated: parseInt(sig.evaluated || 0, 10),
+        ts: Date.now(),
+      });
+      if (history.length > HEALTH_TREND_MAX_CYCLES) {
+        history.splice(0, history.length - HEALTH_TREND_MAX_CYCLES);
+      }
+    }
+  }
+
+  renderSubsystemHealthGrid();
+}
+
+function renderSubsystemHealthGrid() {
+  const container = document.getElementById('agents-health-grid');
+  if (!container) return;
+
+  const entries = Object.entries(agentsState.subsystemHealth);
+  if (!entries.length) {
+    container.innerHTML = '<div class="agents-health-empty">Start monitoring to see subsystem health.</div>';
+    return;
+  }
+
+  const severityOrder = { critical: 0, warning: 1, elevated: 2, healthy: 3 };
+  entries.sort((a, b) => {
+    const sa = severityOrder[a[1].healthLevel] ?? 3;
+    const sb = severityOrder[b[1].healthLevel] ?? 3;
+    if (sa !== sb) return sa - sb;
+    return (b[1].candidate || 0) - (a[1].candidate || 0);
+  });
+
+  container.innerHTML = entries
+    .map(([subId, sig]) => {
+      const level = sig.healthLevel || 'healthy';
+      const isExpanded = agentsState.selectedSubsystemId === subId;
+      const expandedClass = isExpanded ? ' expanded selected' : '';
+      const name = sig.subsystemName || subId;
+      const type = sig.subsystemType || 'global';
+      const evaluated = parseInt(sig.evaluated || 0, 10);
+      const candidates = parseInt(sig.candidate || 0, 10);
+      const maxZ = parseFloat(sig.maxAbsZ || 0).toFixed(1);
+      const anomalyClass = candidates > 0 ? (level === 'critical' ? ' has-critical' : ' has-anomalies') : '';
+      const history = agentsState.subsystemHistory[subId] || [];
+
+      let expandedBody = '';
+      if (isExpanded) {
+        const bigTrend = renderTrendBars(history, 48);
+        const tagRows = renderTagSignalRows(sig.tagSignals || []);
+        const tagCount = (sig.tagSignals || []).length;
+        expandedBody = `
+          <div class="health-expanded-body">
+            <div class="health-expanded-trend">${bigTrend}</div>
+            <div class="health-tag-list-header">
+              <h4>Tags</h4>
+              <span>${tagCount} tags</span>
+            </div>
+            <div class="health-tag-col-headers">
+              <span>Name</span><span>Trend</span><span>z-score</span><span>Avg</span><span>Current</span>
+            </div>
+            <div class="health-tag-list">${tagRows}</div>
+          </div>
+        `;
+      } else {
+        expandedBody = `<div class="health-trend">${renderTrendBars(history, 28)}</div>`;
+      }
+
+      return `
+        <div class="agents-health-card health-${escapeHtml(level)}${expandedClass}" data-subsystem-id="${escapeHtml(subId)}">
+          <div class="health-card-top">
+            <div class="health-card-identity">
+              <span class="health-indicator health-${escapeHtml(level)}"></span>
+              <span class="health-card-name" title="${escapeHtml(name)}">${escapeHtml(name)}</span>
+            </div>
+            <span class="health-card-type">${escapeHtml(type)}</span>
+          </div>
+          <div class="health-card-stats">
+            <div class="health-stat">
+              <span class="health-stat-label">Tags</span>
+              <span class="health-stat-value">${evaluated}</span>
+            </div>
+            <div class="health-stat">
+              <span class="health-stat-label">Anomalies</span>
+              <span class="health-stat-value${anomalyClass}">${candidates}</span>
+            </div>
+            <div class="health-stat">
+              <span class="health-stat-label">Peak z</span>
+              <span class="health-stat-value">${maxZ}</span>
+            </div>
+          </div>
+          ${expandedBody}
+          <span class="health-card-health-label health-${escapeHtml(level)}">${escapeHtml(level)}</span>
+        </div>
+      `;
+    })
+    .join('');
+
+  container.querySelectorAll('.agents-health-card').forEach((card) => {
+    card.addEventListener('click', (e) => {
+      if (e.target.closest('.health-tag-list')) return;
+      const subId = card.getAttribute('data-subsystem-id');
+      selectSubsystem(subId);
+    });
+  });
+}
+
+function renderTrendBars(history, maxHeight) {
+  const h = maxHeight || 28;
+  const slots = HEALTH_TREND_MAX_CYCLES;
+  const bars = [];
+  for (let i = 0; i < slots; i++) {
+    const idx = history.length - slots + i;
+    if (idx < 0) {
+      bars.push('<div class="health-trend-bar trend-empty" style="height: 3px"></div>');
+      continue;
+    }
+    const entry = history[idx];
+    const level = entry.healthLevel || 'healthy';
+    const score = healthLevelToScore(level);
+    const height = Math.max(3, Math.round(score * h));
+    bars.push(`<div class="health-trend-bar trend-${escapeHtml(level)}" style="height: ${height}px"></div>`);
+  }
+  return bars.join('');
+}
+
+function tagZToHealthLevel(absZ) {
+  if (absZ >= 5) return 'critical';
+  if (absZ >= 2.5) return 'warning';
+  if (absZ >= 1.5) return 'elevated';
+  return 'healthy';
+}
+
+function renderSparklineSvg(values, width, height) {
+  if (!values || values.length < 2) {
+    return `<svg width="${width}" height="${height}" class="tag-sparkline"><line x1="0" y1="${height / 2}" x2="${width}" y2="${height / 2}" stroke="var(--color-border)" stroke-width="1"/></svg>`;
+  }
+  const min = Math.min(...values);
+  const max = Math.max(...values);
+  const range = max - min || 1;
+  const pad = 1;
+  const usableH = height - pad * 2;
+  const step = width / (values.length - 1);
+  const points = values
+    .map((v, i) => `${(i * step).toFixed(1)},${(pad + usableH - ((v - min) / range) * usableH).toFixed(1)}`)
+    .join(' ');
+  return `<svg width="${width}" height="${height}" class="tag-sparkline" viewBox="0 0 ${width} ${height}" preserveAspectRatio="none"><polyline points="${points}" fill="none" stroke="var(--color-accent)" stroke-width="1.5" vector-effect="non-scaling-stroke"/></svg>`;
+}
+
+function renderTagSignalRows(tagSignals) {
+  if (!tagSignals || !tagSignals.length) {
+    return '<div class="health-tag-empty">No tag data available yet.</div>';
+  }
+
+  return tagSignals
+    .map((tag) => {
+      const absZ = Math.abs(tag.z || 0);
+      const level = tagZToHealthLevel(absZ);
+      const currentVal = tag.value != null ? String(tag.value) : '—';
+      const avgVal = tag.avg != null ? String(tag.avg) : '—';
+      const zDisplay = (tag.z || 0).toFixed(2);
+      const sparkline = tag.sparkline && tag.sparkline.length >= 2
+        ? renderSparklineSvg(tag.sparkline, 120, 24)
+        : renderSparklineSvg(null, 120, 24);
+      return `
+        <div class="health-tag-row" title="${escapeHtml(tag.path || tag.name || '')}">
+          <span class="health-tag-name">${escapeHtml(tag.name || tag.path || '')}</span>
+          <div class="health-tag-sparkline">${sparkline}</div>
+          <span class="health-tag-zscore tag-z-${escapeHtml(level)}">z ${escapeHtml(zDisplay)}</span>
+          <span class="health-tag-avg" title="Avg over window">${escapeHtml(avgVal)}</span>
+          <span class="health-tag-value" title="Current">${escapeHtml(currentVal)}</span>
+        </div>
+      `;
+    })
+    .join('');
+}
+
+function selectSubsystem(subId) {
+  const clearBtn = document.getElementById('btn-agents-clear-subsystem');
+  if (agentsState.selectedSubsystemId === subId) {
+    agentsState.selectedSubsystemId = null;
+    if (clearBtn) clearBtn.style.display = 'none';
+  } else {
+    agentsState.selectedSubsystemId = subId;
+    if (clearBtn) clearBtn.style.display = '';
+  }
+  renderSubsystemHealthGrid();
+  renderAgentEventList();
+}
+
 function updateAgentStatusUi(status, text) {
   const el = getAgentsElements();
   if (!el.statusChip || !el.statusText) return;
@@ -3633,9 +3884,15 @@ function getFilteredAgentEvents() {
   const state = (el.filterState?.value || '').toLowerCase();
   const severity = (el.filterSeverity?.value || '').toLowerCase();
   const search = (el.filterSearch?.value || '').trim().toLowerCase();
+  const subFilter = agentsState.selectedSubsystemId || '';
   return agentsState.events.filter((event) => {
     if (state && String(event.state || '').toLowerCase() !== state) return false;
     if (severity && String(event.severity || '').toLowerCase() !== severity) return false;
+    if (subFilter) {
+      const eventSubId = event.subsystem_id
+        || `${(event.subsystem_type || 'global')}:${(event.subsystem_name || 'all').toLowerCase()}`;
+      if (eventSubId !== subFilter) return false;
+    }
     if (search) {
       const haystack = [
         event.summary,
@@ -3660,7 +3917,13 @@ function renderAgentEventList() {
   if (!el.list) return;
   const events = getFilteredAgentEvents();
   if (!events.length) {
-    el.list.innerHTML = '<div class="agents-empty">No anomaly events match the current filters.</div>';
+    const subName = agentsState.selectedSubsystemId
+      ? (agentsState.subsystemHealth[agentsState.selectedSubsystemId]?.subsystemName || agentsState.selectedSubsystemId)
+      : '';
+    const msg = subName
+      ? `No anomaly events for "${subName}".`
+      : 'No anomaly events match the current filters.';
+    el.list.innerHTML = `<div class="agents-empty">${escapeHtml(msg)}</div>`;
     return;
   }
   el.list.innerHTML = events
@@ -3715,6 +3978,7 @@ function renderAgentEventDetails(event) {
   if (!el.detail) return;
   if (!event) {
     el.detail.innerHTML = '<p class="text-muted">Select an anomaly event from the feed.</p>';
+    if (el.btnDeepAnalyze) el.btnDeepAnalyze.disabled = true;
     if (el.btnOpenGraph) el.btnOpenGraph.disabled = true;
     if (el.btnAck) el.btnAck.disabled = true;
     return;
@@ -3764,6 +4028,10 @@ function renderAgentEventDetails(event) {
     </div>
   `;
 
+  if (el.btnDeepAnalyze) {
+    el.btnDeepAnalyze.disabled = false;
+    el.btnDeepAnalyze.textContent = event.llm_triaged ? 'Re-Analyze' : 'Deep Analyze';
+  }
   if (el.btnOpenGraph) el.btnOpenGraph.disabled = !resolveAgentGraphTarget(event);
   if (el.btnAck) {
     const state = String(event.state || '').toLowerCase();
@@ -3838,14 +4106,19 @@ async function refreshAgentStatus() {
 
 async function startAgentsMonitoring() {
   const config = getAgentsConfigFromUI();
-  console.warn('[Agents start requested]', config);
+  agentsState.subsystemHealth = {};
+  agentsState.subsystemHistory = {};
+  agentsState.selectedSubsystemId = null;
+  renderSubsystemHealthGrid();
+  const clearSubBtn = document.getElementById('btn-agents-clear-subsystem');
+  if (clearSubBtn) clearSubBtn.style.display = 'none';
   const result = await window.api.agentsStart(config);
   if (!result.success) {
     console.error('[Agents start failed]', result);
     updateAgentStatusUi('error', result.error || 'Failed to start monitoring');
     return;
   }
-  console.warn('[Agents started]', result);
+  console.log('[Agents] started, runId=' + (result.runId || 'n/a'));
   agentsState.runId = result.runId;
   agentsState.status = 'running';
   updateAgentStatusUi('running', `Run ${result.runId}`);
@@ -3862,6 +4135,36 @@ async function stopAgentsMonitoring() {
   updateAgentStatusUi('stopped', 'Monitoring stopped');
 }
 
+async function deepAnalyzeSelectedEvent() {
+  if (!agentsState.selectedEventId) return;
+  const el = getAgentsElements();
+  if (el.btnDeepAnalyze) {
+    el.btnDeepAnalyze.disabled = true;
+    el.btnDeepAnalyze.textContent = 'Analyzing…';
+  }
+  try {
+    const result = await window.api.agentsDeepAnalyze(agentsState.selectedEventId);
+    if (result.success && result.event) {
+      const idx = agentsState.events.findIndex((e) => e.event_id === agentsState.selectedEventId);
+      if (idx >= 0) agentsState.events[idx] = { ...agentsState.events[idx], ...result.event };
+      renderAgentEventList();
+      renderAgentEventDetails(result.event);
+    } else {
+      console.error('[Agents] deep-analyze failed:', result.error);
+      if (el.btnDeepAnalyze) {
+        el.btnDeepAnalyze.textContent = 'Failed — Retry';
+        el.btnDeepAnalyze.disabled = false;
+      }
+    }
+  } catch (err) {
+    console.error('[Agents] deep-analyze error:', err);
+    if (el.btnDeepAnalyze) {
+      el.btnDeepAnalyze.textContent = 'Failed — Retry';
+      el.btnDeepAnalyze.disabled = false;
+    }
+  }
+}
+
 async function acknowledgeSelectedAgentEvent() {
   if (!agentsState.selectedEventId) return;
   const selected = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
@@ -3904,31 +4207,25 @@ function ensureAgentListeners() {
 
   window.api.onAgentStatus((payload) => {
     if (!payload) return;
-    console.warn('[Agents status]', payload);
     if (payload.runId) agentsState.runId = payload.runId;
     agentsState.status = payload.state || agentsState.status;
     updateAgentStatusUi(agentsState.status, `Run ${agentsState.runId || 'n/a'}`);
     updateAgentMetrics(payload, payload.timestamp);
     const diagnostics = payload.diagnostics || {};
-    console.warn('[Agents diagnostics]', diagnostics);
-    console.warn('[Agents diagnostics summary]', {
-      phase: diagnostics.phase ?? null,
-      reason: diagnostics.reason ?? null,
-      monitoredTags: diagnostics.monitoredTags ?? null,
-      linkedTags: diagnostics.linkedTags ?? null,
-      validLiveCount: diagnostics.validLiveCount ?? null,
-      staleFilteredCount: diagnostics.staleFilteredCount ?? null,
-      missingTimestampCount: diagnostics.missingTimestampCount ?? null,
-      inferredTimestampCount: diagnostics.inferredTimestampCount ?? null,
-      detectedSubsystemCount: diagnostics.detectedSubsystemCount ?? null,
-      candidateSubsystemCount: diagnostics.candidateSubsystemCount ?? null,
-    });
-    console.warn('[Agents tool calls]', Array.isArray(diagnostics.toolCalls) ? diagnostics.toolCalls : []);
-    console.warn('[Agents stale samples]', Array.isArray(diagnostics.staleSamples) ? diagnostics.staleSamples : []);
+    const phase = diagnostics.phase || '?';
+    console.log(`[Agents] phase=${phase} tags=${diagnostics.monitoredTags ?? '?'}`);
+
+    if (phase === 'cycle_complete') {
+      const signals = diagnostics.subsystemShiftSignals;
+      const subCount = Array.isArray(signals) ? signals.length : 0;
+      const evaluated = (diagnostics.evaluatedLinked || 0) + (diagnostics.evaluatedUnlinked || 0);
+      console.log(`[Agents] cycle_complete: ${subCount} subsystems, ${evaluated} evaluated, ${diagnostics.candidateLinked || 0} candidates`);
+    }
+
+    updateSubsystemHealthFromDiagnostics(diagnostics);
   });
 
   window.api.onAgentEvent((payload) => {
-    console.warn('[Agents event]', payload);
     upsertRealtimeAgentEvent(payload);
   });
 
@@ -3940,7 +4237,7 @@ function ensureAgentListeners() {
 
   window.api.onAgentComplete((payload) => {
     if (!payload) return;
-    console.warn('[Agents complete]', payload);
+    console.log('[Agents] run complete, success=' + payload.success);
     agentsState.status = payload.success ? 'stopped' : 'failed';
     updateAgentStatusUi(agentsState.status, payload.reason || 'Run complete');
     refreshAgentStatus();
@@ -3960,6 +4257,7 @@ function initAgentsTab() {
       await window.api.agentsCleanup(14);
       await loadAgentEvents();
     });
+    el.btnDeepAnalyze?.addEventListener('click', deepAnalyzeSelectedEvent);
     el.btnAck?.addEventListener('click', acknowledgeSelectedAgentEvent);
     el.btnOpenGraph?.addEventListener('click', () => {
       const event = agentsState.events.find((e) => e.event_id === agentsState.selectedEventId);
@@ -3971,9 +4269,18 @@ function initAgentsTab() {
     el.filterState?.addEventListener('change', loadAgentEvents);
     el.filterSeverity?.addEventListener('change', loadAgentEvents);
     el.filterSearch?.addEventListener('input', renderAgentEventList);
+
+    const clearSubBtn = document.getElementById('btn-agents-clear-subsystem');
+    clearSubBtn?.addEventListener('click', () => {
+      agentsState.selectedSubsystemId = null;
+      clearSubBtn.style.display = 'none';
+      renderSubsystemHealthGrid();
+      renderAgentEventList();
+    });
   }
   refreshAgentStatus();
   loadAgentEvents();
+  renderSubsystemHealthGrid();
 }
 
 // Initialize graph tab when it's first shown
diff --git a/electron-ui/styles.css b/electron-ui/styles.css
index f1e066e..c967b08 100644
--- a/electron-ui/styles.css
+++ b/electron-ui/styles.css
@@ -3036,12 +3036,20 @@ select.input,
 
 .agents-config-row {
   display: grid;
-  grid-template-columns: repeat(14, minmax(0, 1fr));
+  grid-template-columns: repeat(16, minmax(0, 1fr));
   gap: var(--space-2);
   margin-bottom: var(--space-4);
   align-items: center;
 }
 
+.agents-toggle-label {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+  grid-column: span 2;
+  cursor: pointer;
+}
+
 .agents-config-row label {
   font-size: var(--text-xs);
   color: var(--color-text-secondary);
@@ -3081,6 +3089,381 @@ select.input,
   color: var(--color-text);
 }
 
+/* ---- Subsystem Health Dashboard ---- */
+
+.agents-health-section {
+  margin-bottom: var(--space-4);
+}
+
+.agents-health-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: var(--space-3);
+}
+
+.agents-health-header h3 {
+  font-size: var(--text-md);
+  font-weight: 600;
+  color: var(--color-text);
+}
+
+.agents-health-actions {
+  display: flex;
+  gap: var(--space-2);
+  align-items: center;
+}
+
+.agents-health-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
+  gap: var(--space-3);
+}
+
+.agents-health-empty {
+  grid-column: 1 / -1;
+  color: var(--color-text-muted);
+  font-size: var(--text-sm);
+  padding: var(--space-4);
+  text-align: center;
+  border: 1px dashed var(--color-border);
+  border-radius: var(--radius-lg);
+  background: var(--color-bg-panel);
+}
+
+.agents-health-card {
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel);
+  border-radius: var(--radius-lg);
+  padding: var(--space-3);
+  cursor: pointer;
+  transition: border-color var(--transition-fast), transform var(--transition-fast), box-shadow var(--transition-fast);
+  position: relative;
+  border-left: 3px solid var(--color-border);
+}
+
+.agents-health-card:hover {
+  border-color: var(--color-border-active);
+  transform: translateY(-1px);
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.2);
+}
+
+.agents-health-card.selected {
+  border-color: var(--color-accent);
+  box-shadow: 0 0 0 1px rgba(34, 211, 238, 0.25) inset, 0 2px 12px rgba(34, 211, 238, 0.08);
+}
+
+.agents-health-card.health-healthy {
+  border-left-color: #22c55e;
+}
+
+.agents-health-card.health-elevated {
+  border-left-color: #eab308;
+}
+
+.agents-health-card.health-warning {
+  border-left-color: #f97316;
+}
+
+.agents-health-card.health-critical {
+  border-left-color: #ef4444;
+}
+
+.health-card-top {
+  display: flex;
+  justify-content: space-between;
+  align-items: flex-start;
+  margin-bottom: var(--space-2);
+}
+
+.health-card-identity {
+  display: flex;
+  align-items: center;
+  gap: var(--space-2);
+  min-width: 0;
+  flex: 1;
+}
+
+.health-indicator {
+  width: 10px;
+  height: 10px;
+  border-radius: 50%;
+  flex-shrink: 0;
+  box-shadow: 0 0 6px currentColor;
+}
+
+.health-indicator.health-healthy {
+  background: #22c55e;
+  color: #22c55e;
+}
+
+.health-indicator.health-elevated {
+  background: #eab308;
+  color: #eab308;
+}
+
+.health-indicator.health-warning {
+  background: #f97316;
+  color: #f97316;
+}
+
+.health-indicator.health-critical {
+  background: #ef4444;
+  color: #ef4444;
+  animation: pulse-critical 2s ease-in-out infinite;
+}
+
+@keyframes pulse-critical {
+  0%, 100% { opacity: 1; box-shadow: 0 0 6px currentColor; }
+  50% { opacity: 0.6; box-shadow: 0 0 12px currentColor; }
+}
+
+.health-card-name {
+  font-size: var(--text-sm);
+  font-weight: 600;
+  color: var(--color-text);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-card-type {
+  font-size: 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+  color: var(--color-text-muted);
+  padding: 1px 6px;
+  border-radius: 999px;
+  border: 1px solid var(--color-border);
+  background: var(--color-bg-panel-2);
+  flex-shrink: 0;
+  white-space: nowrap;
+}
+
+.health-card-stats {
+  display: grid;
+  grid-template-columns: 1fr 1fr 1fr;
+  gap: var(--space-1);
+  margin-bottom: var(--space-2);
+}
+
+.health-stat {
+  display: flex;
+  flex-direction: column;
+  gap: 1px;
+}
+
+.health-stat-label {
+  font-size: 10px;
+  color: var(--color-text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.3px;
+}
+
+.health-stat-value {
+  font-family: var(--font-mono);
+  font-size: var(--text-sm);
+  color: var(--color-text);
+}
+
+.health-stat-value.has-anomalies {
+  color: #f97316;
+}
+
+.health-stat-value.has-critical {
+  color: #ef4444;
+}
+
+.health-trend {
+  display: flex;
+  align-items: flex-end;
+  gap: 2px;
+  height: 28px;
+  padding-top: var(--space-1);
+  border-top: 1px solid var(--color-border-subtle);
+}
+
+.health-trend-bar {
+  flex: 1;
+  min-width: 3px;
+  max-width: 8px;
+  border-radius: 2px 2px 0 0;
+  transition: height 0.3s ease;
+}
+
+.health-trend-bar.trend-healthy {
+  background: rgba(34, 197, 94, 0.5);
+}
+
+.health-trend-bar.trend-elevated {
+  background: rgba(234, 179, 8, 0.5);
+}
+
+.health-trend-bar.trend-warning {
+  background: rgba(249, 115, 22, 0.6);
+}
+
+.health-trend-bar.trend-critical {
+  background: rgba(239, 68, 68, 0.6);
+}
+
+.health-trend-bar.trend-empty {
+  background: var(--color-border-subtle);
+}
+
+.health-card-health-label {
+  font-size: 10px;
+  text-transform: uppercase;
+  letter-spacing: 0.4px;
+  font-weight: 600;
+  margin-top: 2px;
+}
+
+.health-card-health-label.health-healthy { color: #22c55e; }
+.health-card-health-label.health-elevated { color: #eab308; }
+.health-card-health-label.health-warning { color: #f97316; }
+.health-card-health-label.health-critical { color: #ef4444; }
+
+/* ---- Expanded Subsystem Card ---- */
+
+.agents-health-card.expanded {
+  grid-column: 1 / -1;
+  border-color: var(--color-accent);
+  background: var(--color-bg-elevated);
+}
+
+.health-expanded-body {
+  margin-top: var(--space-3);
+  border-top: 1px solid var(--color-border-subtle);
+  padding-top: var(--space-3);
+}
+
+.health-expanded-trend {
+  display: flex;
+  align-items: flex-end;
+  gap: 3px;
+  height: 48px;
+  margin-bottom: var(--space-3);
+}
+
+.health-expanded-trend .health-trend-bar {
+  max-width: 14px;
+}
+
+.health-tag-list-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: var(--space-2);
+}
+
+.health-tag-list-header h4 {
+  font-size: var(--text-sm);
+  font-weight: 600;
+  color: var(--color-text-secondary);
+}
+
+.health-tag-list-header span {
+  font-size: var(--text-xs);
+  color: var(--color-text-muted);
+}
+
+.health-tag-col-headers {
+  display: grid;
+  grid-template-columns: minmax(110px, 1fr) 120px 55px 55px 55px;
+  gap: var(--space-2);
+  padding: 0 var(--space-2) 2px;
+  font-size: 10px;
+  font-weight: 600;
+  color: var(--color-text-muted);
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+}
+
+.health-tag-col-headers span:nth-child(n+3) {
+  text-align: right;
+}
+
+.health-tag-list {
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-1);
+  max-height: 320px;
+  overflow-y: auto;
+}
+
+.health-tag-row {
+  display: grid;
+  grid-template-columns: minmax(110px, 1fr) 120px 55px 55px 55px;
+  gap: var(--space-2);
+  align-items: center;
+  padding: 5px var(--space-2);
+  border-radius: var(--radius-sm);
+  background: var(--color-bg-panel);
+  border: 1px solid var(--color-border-subtle);
+  font-size: var(--text-xs);
+}
+
+.health-tag-row:hover {
+  border-color: var(--color-border-active);
+}
+
+.health-tag-name {
+  font-family: var(--font-mono);
+  color: var(--color-text);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-tag-sparkline {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+}
+
+.tag-sparkline {
+  display: block;
+  width: 120px;
+  height: 24px;
+}
+
+.health-tag-zscore {
+  font-family: var(--font-mono);
+  color: var(--color-text-secondary);
+  text-align: right;
+}
+
+.health-tag-zscore.tag-z-healthy { color: #22c55e; }
+.health-tag-zscore.tag-z-elevated { color: #eab308; }
+.health-tag-zscore.tag-z-warning { color: #f97316; }
+.health-tag-zscore.tag-z-critical { color: #ef4444; }
+
+.health-tag-avg {
+  font-family: var(--font-mono);
+  color: var(--color-text-muted);
+  text-align: right;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-tag-value {
+  font-family: var(--font-mono);
+  color: var(--color-text);
+  text-align: right;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+}
+
+.health-tag-empty {
+  color: var(--color-text-muted);
+  font-size: var(--text-sm);
+  padding: var(--space-3);
+  text-align: center;
+}
+
 .agents-main {
   display: grid;
   grid-template-columns: minmax(300px, 38%) minmax(0, 1fr);
diff --git a/scripts/anomaly_monitor.py b/scripts/anomaly_monitor.py
index 54065c8..0d0d91a 100644
--- a/scripts/anomaly_monitor.py
+++ b/scripts/anomaly_monitor.py
@@ -244,11 +244,13 @@ def merge_defaults(config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
         "maxMonitoredTags": 200,
         "maxCandidatesPerCycle": 25,
         "maxCandidatesPerSubsystem": 8,
-        "maxLlmTriagesPerCycle": 5,
-        "maxLlmTriagesPerSubsystem": 2,
+        "maxLlmTriagesPerCycle": 0,
+        "maxLlmTriagesPerSubsystem": 0,
         "dedupCooldownMinutes": 10,
         "retentionDays": 14,
         "cleanupEveryCycles": 40,
+        "historyCacheTtlSec": 30,
+        "tagCacheTtlSec": 60,
         "runMode": "live",
         "scope": {
             "project": None,
@@ -337,6 +339,9 @@ def __init__(self, config: Dict[str, Any], run_id: Optional[str] = None):
         self._running = True
         self._cycle_count = 0
         self._prev_values: Dict[str, float] = {}
+        self._history_cache: Dict[str, Dict[str, Any]] = {}
+        self._tag_cache: Optional[Dict[str, Any]] = None
+        self._tag_cache_at: float = 0.0
 
     # -----------------------------
     # Schema / run lifecycle
@@ -389,6 +394,17 @@ def heartbeat(self, metrics: Dict[str, Any]) -> None:
     # Tag and context collection
     # -----------------------------
     def get_monitored_tags(self) -> List[Dict[str, Any]]:
+        ttl = float(self.config.get("tagCacheTtlSec", 60))
+        now = time.time()
+        if self._tag_cache is not None and ttl > 0 and (now - self._tag_cache_at) < ttl:
+            return self._tag_cache
+
+        result = self._fetch_monitored_tags()
+        self._tag_cache = result
+        self._tag_cache_at = time.time()
+        return result
+
+    def _fetch_monitored_tags(self) -> List[Dict[str, Any]]:
         max_tags = int(self.config.get("maxMonitoredTags", 200))
         scope = self.config.get("scope", {})
         tag_regex = scope.get("tagRegex")
@@ -589,6 +605,12 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
             if not rows and "tagHistory" in history_data and isinstance(history_data["tagHistory"], list):
                 rows = history_data["tagHistory"]
 
+        prefixed = self.api._ensure_provider_prefix(tag_path) if hasattr(self, "api") else tag_path
+        stripped = tag_path
+        if stripped.startswith("[") and "]" in stripped:
+            stripped = stripped[stripped.index("]") + 1:]
+        path_variants = {tag_path, prefixed, stripped}
+
         for row in rows:
             if isinstance(row, (int, float, str)):
                 val = safe_float(row)
@@ -600,21 +622,28 @@ def _extract_history_values(self, history_data: Any, tag_path: str) -> List[floa
             candidate = None
             if "value" in row:
                 candidate = row.get("value")
-            elif tag_path in row:
-                candidate = row.get(tag_path)
             else:
-                # Wide format often has timestamp + one tag column.
-                for k, v in row.items():
-                    if k.lower() in {"timestamp", "ts", "t", "time"}:
-                        continue
-                    candidate = v
-                    break
+                matched_key = next((k for k in path_variants if k in row), None)
+                if matched_key:
+                    candidate = row.get(matched_key)
+                elif len(row) <= 2:
+                    for k, v in row.items():
+                        if k.lower() in {"timestamp", "ts", "t", "time"}:
+                            continue
+                        candidate = v
+                        break
             val = safe_float(candidate)
             if val is not None:
                 values.append(val)
         return values
 
     def fetch_history_values(self, tag_path: str) -> tuple[List[float], Optional[str]]:
+        ttl = float(self.config.get("historyCacheTtlSec", 30))
+        now = time.time()
+        cached = self._history_cache.get(tag_path)
+        if cached and ttl > 0 and (now - cached["fetched_at"]) < ttl:
+            return list(cached["values"]), cached.get("error")
+
         minutes = int(self.config.get("historyWindowMinutes", 360))
         end_dt = datetime.now(timezone.utc)
         start_dt = end_dt - timedelta(minutes=minutes)
@@ -627,8 +656,61 @@ def fetch_history_values(self, tag_path: str) -> tuple[List[float], Optional[str
             return_format="Wide",
         )
         if isinstance(data, dict) and data.get("error"):
-            return [], str(data.get("error"))
-        return self._extract_history_values(data, tag_path), None
+            err = str(data.get("error"))
+            self._history_cache[tag_path] = {"values": [], "error": err, "fetched_at": now}
+            return [], err
+        values = self._extract_history_values(data, tag_path)
+        self._history_cache[tag_path] = {"values": values, "error": None, "fetched_at": now}
+        return values, None
+
+    def fetch_history_batch(self, tag_paths: List[str]) -> Dict[str, Tuple[List[float], Optional[str]]]:
+        """Fetch history for many tags, using cache and batched API calls."""
+        ttl = float(self.config.get("historyCacheTtlSec", 30))
+        now = time.time()
+        results: Dict[str, Tuple[List[float], Optional[str]]] = {}
+        uncached: List[str] = []
+
+        for path in tag_paths:
+            cached = self._history_cache.get(path)
+            if cached and ttl > 0 and (now - cached["fetched_at"]) < ttl:
+                results[path] = (list(cached["values"]), cached.get("error"))
+            else:
+                uncached.append(path)
+
+        if not uncached:
+            return results
+
+        minutes = int(self.config.get("historyWindowMinutes", 360))
+        end_dt = datetime.now(timezone.utc)
+        start_dt = end_dt - timedelta(minutes=minutes)
+        return_size = max(100, int(self.config.get("minHistoryPoints", 30)) * 4)
+        batch_size = 20
+
+        for i in range(0, len(uncached), batch_size):
+            batch = uncached[i : i + batch_size]
+            data = self.api.query_tag_history(
+                batch,
+                start_dt.isoformat(),
+                end_dt.isoformat(),
+                return_size=return_size,
+                aggregation_mode="Average",
+                return_format="Wide",
+            )
+            fetch_ts = time.time()
+
+            if isinstance(data, dict) and data.get("error"):
+                err = str(data.get("error"))
+                for path in batch:
+                    results[path] = ([], err)
+                    self._history_cache[path] = {"values": [], "error": err, "fetched_at": fetch_ts}
+                continue
+
+            for path in batch:
+                values = self._extract_history_values(data, path)
+                results[path] = (values, None)
+                self._history_cache[path] = {"values": values, "error": None, "fetched_at": fetch_ts}
+
+        return results
 
     def get_context(self, tag_path: str) -> Dict[str, Any]:
         with self.graph.session() as session:
@@ -648,7 +730,7 @@ def get_context(self, tag_path: str) -> Dict[str, Any]:
                        collect(DISTINCT eq.name) AS equipment,
                        collect(DISTINCT s.symptom) AS symptoms,
                        collect(DISTINCT fc.cause) AS causes,
-                       collect(DISTINCT p.pattern_name) AS patterns,
+                       collect(DISTINCT p.name) AS patterns,
                        collect(DISTINCT se.name) AS safety
                 LIMIT 1
                 """,
@@ -1024,8 +1106,6 @@ def run_cycle(self) -> Dict[str, Any]:
         cycle_start = time.time()
         thresholds = self.config.get("thresholds", {})
         stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
-        progress_emit_interval_tags = max(5, int(self.config.get("progressEveryTags", 10)))
-        progress_emit_interval_sec = max(1, int(self.config.get("progressEverySec", 2)))
         metrics = {
             "candidates": 0,
             "triaged": 0,
@@ -1083,6 +1163,24 @@ def run_cycle(self) -> Dict[str, Any]:
                 for t in tags
             }
         )
+
+        subsystem_tag_map: Dict[str, Dict[str, Any]] = {}
+        for t in tags:
+            sub = t.get("primary_subsystem") or _subsystem_ref("global", "all")
+            sub_id = sub.get("id", "global:all")
+            bucket = subsystem_tag_map.setdefault(sub_id, {
+                "type": sub.get("type", "global"),
+                "name": sub.get("name", "all"),
+                "tags": [],
+            })
+            bucket["tags"].append({
+                "path": t["path"],
+                "name": t.get("name", t["path"]),
+                "views": t.get("views", []),
+                "equipment": t.get("equipment", []),
+                "allSubsystems": [s.get("id") for s in (t.get("subsystems") or [])],
+            })
+
         live_values = self.api.read_tags(tag_paths)
         tool_calls: List[Dict[str, Any]] = []
         tool_calls.append({
@@ -1154,7 +1252,7 @@ def run_cycle(self) -> Dict[str, Any]:
         total_live_count = len(live_values)
         last_progress_emit = 0.0
 
-        def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
+        def emit_cycle_progress(reason: str, current_tag: str = "", include_tag_map: bool = False) -> None:
             nonlocal last_progress_emit
             diag = make_default_diagnostics(
                 staleness_threshold_sec=stale_threshold_sec,
@@ -1170,9 +1268,14 @@ def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
                 "qualityFilteredCount": quality_filtered_count,
                 "staleFilteredCount": stale_filtered_count,
                 "historyErrorCount": history_error_count,
+                "monitoredTags": len(tags),
                 "linkedTags": linked_tag_count,
                 "unlinkedTags": unlinked_tag_count,
+                "detectedSubsystemCount": len(detected_subsystems),
+                "detectedSubsystems": detected_subsystems[:10],
             })
+            if include_tag_map:
+                diag["subsystemTagMap"] = subsystem_tag_map
             emit("AGENT_STATUS", {
                 "runId": self.run_id,
                 "state": "running",
@@ -1185,14 +1288,16 @@ def emit_cycle_progress(reason: str, current_tag: str = "") -> None:
             })
             last_progress_emit = time.time()
 
-        emit_cycle_progress("cycle_started")
+        emit_cycle_progress("cycle_started", include_tag_map=True)
 
         def _update_subsystem_signal(
-            subsystem_ref: Dict[str, str], deterministic: Dict[str, Any], tag_path: str
+            subsystem_ref: Dict[str, str], deterministic: Dict[str, Any],
+            tag_path: str, live_value: Any = None,
         ) -> None:
             sub_id = subsystem_ref.get("id", "global:all")
             abs_z = abs(float(deterministic.get("z_score", 0.0)))
             z = float(deterministic.get("z_score", 0.0))
+            mad = float(deterministic.get("mad_score", 0.0))
             bucket = subsystem_shift_signals.setdefault(
                 sub_id,
                 {
@@ -1206,6 +1311,7 @@ def _update_subsystem_signal(
                     "sumZ": 0.0,
                     "maxAbsZ": 0.0,
                     "sampleTag": tag_path,
+                    "_tagEntries": [],
                 },
             )
             bucket["evaluated"] += 1
@@ -1216,21 +1322,29 @@ def _update_subsystem_signal(
             if abs_z > bucket["maxAbsZ"]:
                 bucket["maxAbsZ"] = abs_z
                 bucket["sampleTag"] = tag_path
+            tag_name = tag_path.rsplit("/", 1)[-1] if "/" in str(tag_path) else str(tag_path)
+            bucket["_tagEntries"].append({
+                "path": str(tag_path),
+                "name": tag_name,
+                "z": round(z, 3),
+                "absZ": round(abs_z, 3),
+                "mad": round(mad, 3),
+                "value": live_value,
+            })
 
-        for tv in live_values:
+        # ---- Phase 1: Filter live values (no I/O) ----
+        TagEntry = Tuple[Any, Dict[str, Any], Dict[str, str], bool]  # (tv, tag_meta, subsystem, is_linked)
+        tags_for_history: List[TagEntry] = []
+
+        for idx, tv in enumerate(live_values):
             processed_live_count += 1
-            tag_meta = tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
+            tag_meta = (
+                tags[idx] if idx < len(tags)
+                else tag_lookup.get(tv.path, {"path": tv.path, "name": tv.path})
+            )
             subsystem = tag_meta.get("primary_subsystem") or _subsystem_ref("global", "all")
             is_linked = bool(tag_meta.get("views") or tag_meta.get("equipment"))
 
-            now_progress = time.time()
-            if (
-                processed_live_count == 1
-                or processed_live_count % progress_emit_interval_tags == 0
-                or (now_progress - last_progress_emit) >= progress_emit_interval_sec
-            ):
-                emit_cycle_progress("processing_live_tags", current_tag=tv.path)
-
             if tv.error:
                 live_error_count += 1
                 if is_linked:
@@ -1246,7 +1360,6 @@ def _update_subsystem_signal(
             if isinstance(tv.config, dict) and bool(tv.config.get("timestamp_inferred")):
                 inferred_timestamp_count += 1
             if not is_quality_good(tv.quality):
-                # quality gate: only emit quality anomalies if this persists via triage.
                 quality_filtered_count += 1
                 if is_linked:
                     quality_filtered_linked += 1
@@ -1255,7 +1368,6 @@ def _update_subsystem_signal(
                 continue
             parsed_ts = parse_timestamp(tv.timestamp)
             age_sec = (now - parsed_ts).total_seconds() if parsed_ts is not None else None
-            stale_threshold_sec = int(thresholds.get("stalenessSec", 120))
             if is_stale(tv.timestamp, stale_threshold_sec, now=now):
                 stale_filtered_count += 1
                 if is_linked:
@@ -1273,7 +1385,27 @@ def _update_subsystem_signal(
                     })
                 continue
 
-            history, history_error = self.fetch_history_values(tv.path)
+            tags_for_history.append((tv, tag_meta, subsystem, is_linked))
+
+        emit_cycle_progress(
+            "filtering_complete",
+            current_tag=f"{len(tags_for_history)} tags passed filters",
+        )
+
+        # ---- Phase 2: Batched history fetch ----
+        history_fetch_start = time.time()
+        history_paths = [tv.path for tv, _, _, _ in tags_for_history]
+        history_results = self.fetch_history_batch(history_paths) if history_paths else {}
+        history_fetch_elapsed = time.time() - history_fetch_start
+        emit_cycle_progress(
+            "history_complete",
+            current_tag=f"{len(history_results)} in {round(history_fetch_elapsed, 1)}s",
+        )
+
+        # ---- Phase 3: Score and build candidates using pre-fetched history ----
+        for tv, tag_meta, subsystem, is_linked in tags_for_history:
+            history, history_error = history_results.get(tv.path, ([], "No history result"))
+
             if len(tool_calls) < 18:
                 tool_calls.append({
                     "tool": "query_tag_history",
@@ -1297,8 +1429,6 @@ def _update_subsystem_signal(
                 continue
             if len(history) < min_history:
                 insufficient_history_count += 1
-                # Low-history fallback: still score dramatic shifts when at least a
-                # small baseline exists, otherwise simulator users see no events.
                 if len(history) >= 5:
                     prev_val = self._prev_values.get(tv.path)
                     deterministic = compute_deviation_scores(
@@ -1311,7 +1441,7 @@ def _update_subsystem_signal(
                     if curr_num is not None:
                         self._prev_values[tv.path] = curr_num
 
-                    _update_subsystem_signal(subsystem, deterministic, tv.path)
+                    _update_subsystem_signal(subsystem, deterministic, tv.path, live_value=tv.value)
                     if is_linked:
                         evaluated_linked += 1
                     else:
@@ -1337,6 +1467,7 @@ def _update_subsystem_signal(
                                 "sumZ": 0.0,
                                 "maxAbsZ": 0.0,
                                 "sampleTag": tv.path,
+                                "_tagEntries": [],
                             },
                         )
                         sub_bucket["candidate"] += 1
@@ -1379,7 +1510,7 @@ def _update_subsystem_signal(
             if curr_num is not None:
                 self._prev_values[tv.path] = curr_num
 
-            _update_subsystem_signal(subsystem, deterministic, tv.path)
+            _update_subsystem_signal(subsystem, deterministic, tv.path, live_value=tv.value)
             if is_linked:
                 evaluated_linked += 1
             else:
@@ -1405,6 +1536,7 @@ def _update_subsystem_signal(
                         "sumZ": 0.0,
                         "maxAbsZ": 0.0,
                         "sampleTag": tv.path,
+                        "_tagEntries": [],
                     },
                 )
                 sub_bucket["candidate"] += 1
@@ -1432,6 +1564,8 @@ def _update_subsystem_signal(
                 sub_id = subsystem.get("id", "global:all")
                 candidate_subsystem_counts[sub_id] = candidate_subsystem_counts.get(sub_id, 0) + 1
 
+        emit_cycle_progress("scoring_complete")
+
         if live_values and live_error_count == len(live_values):
             emitted = self.emit_provider_failure_event(
                 "live_tag_provider_failed",
@@ -1513,9 +1647,16 @@ def _update_subsystem_signal(
         llm_per_subsystem: Dict[str, int] = {}
         dedup_suppressed_count = 0
 
-        for candidate in shortlisted:
+        if shortlisted:
+            emit_cycle_progress(
+                "triage_started",
+                current_tag=f"{len(shortlisted)} candidates to process",
+            )
+
+        for ci, candidate in enumerate(shortlisted):
             subsystem = candidate.get("subsystem") or _subsystem_ref("global", "all")
             sub_id = subsystem.get("id", "global:all")
+            tag_name = candidate["context"].get("tag_name", candidate["context"].get("tag_path", "?"))
             use_llm = (
                 llm_total < max_triage_total
                 and llm_per_subsystem.get(sub_id, 0) < max_triage_per_subsystem
@@ -1538,7 +1679,7 @@ def _update_subsystem_signal(
                     "verification_checks": [],
                     "probable_causes": [],
                     "safety_notes": [],
-                    "rationale": "Triaged in deterministic-only mode due per-cycle/per-subsystem LLM caps.",
+                    "rationale": "Deterministic-only triage (LLM triage disabled or cap reached).",
                     "related_entities": [],
                 }
             )
@@ -1559,6 +1700,12 @@ def _update_subsystem_signal(
             else:
                 dedup_suppressed_count += 1
 
+            if (ci + 1) % 5 == 0 or ci == len(shortlisted) - 1:
+                emit_cycle_progress(
+                    "triaging",
+                    current_tag=f"{ci + 1}/{len(shortlisted)} ({tag_name})",
+                )
+
         top_candidates_by_subsystem = dict(
             sorted(candidate_subsystem_counts.items(), key=lambda item: item[1], reverse=True)[:10]
         )
@@ -1571,7 +1718,8 @@ def _update_subsystem_signal(
                 int(item.get("evaluated", 0)),
             ),
             reverse=True,
-        )[:8]
+        )
+        sparkline_size = 20
         for item in top_shift_signals:
             evaluated = max(1, int(item.get("evaluated", 0)))
             item["avgAbsZ"] = round(float(item.get("sumAbsZ", 0.0)) / evaluated, 3)
@@ -1580,6 +1728,22 @@ def _update_subsystem_signal(
             item["candidateRatio"] = round(float(item.get("candidate", 0)) / evaluated, 3)
             item.pop("sumAbsZ", None)
             item.pop("sumZ", None)
+            raw_tags = item.pop("_tagEntries", [])
+            sorted_tags = sorted(raw_tags, key=lambda t: t.get("absZ", 0.0), reverse=True)
+            tag_signals = []
+            for t in sorted_tags:
+                entry = {k: v for k, v in t.items() if k != "absZ"}
+                cached_hist = self._history_cache.get(t.get("path", ""))
+                if cached_hist and cached_hist.get("values"):
+                    vals = cached_hist["values"]
+                    entry["avg"] = round(sum(vals) / len(vals), 2)
+                    if len(vals) <= sparkline_size:
+                        entry["sparkline"] = [round(v, 2) for v in vals]
+                    else:
+                        step = len(vals) / sparkline_size
+                        entry["sparkline"] = [round(vals[int(i * step)], 2) for i in range(sparkline_size)]
+                tag_signals.append(entry)
+            item["tagSignals"] = tag_signals
 
         metrics["diagnostics"] = {
             **make_default_diagnostics(
@@ -1619,6 +1783,7 @@ def _update_subsystem_signal(
             "timestampParseNote": "Naive timestamps are treated as local time by parse_timestamp().",
             "detectedSubsystemCount": len(detected_subsystems),
             "detectedSubsystems": detected_subsystems[:10],
+            "subsystemTagMap": subsystem_tag_map,
             "candidateSubsystemCount": len(candidate_subsystem_counts),
             "candidateBySubsystem": top_candidates_by_subsystem,
             "subsystemShiftSignals": top_shift_signals,
@@ -1786,6 +1951,74 @@ def clear_event(self, event_id: str, note: Optional[str]) -> Dict[str, Any]:
                 return {"success": False, "error": f"Event not found: {event_id}"}
         return {"success": True, "eventId": event_id}
 
+    def deep_analyze(self, event_id: str) -> Dict[str, Any]:
+        """Run LLM triage on an existing event and update it in-place."""
+        event = self.graph.get_anomaly_event(event_id)
+        if not event:
+            return {"success": False, "error": f"Event not found: {event_id}"}
+
+        tag_path = event.get("source_tag") or event.get("tag_name", "")
+        if not tag_path:
+            return {"success": False, "error": "Event has no source_tag"}
+
+        context = self.get_context(tag_path)
+        context["subsystem"] = {
+            "id": event.get("subsystem_id", "global:all"),
+            "type": event.get("subsystem_type", "global"),
+            "name": event.get("subsystem_name", "all"),
+        }
+
+        deterministic = {
+            "candidate": True,
+            "z_score": float(event.get("z_score", 0)),
+            "mad_score": float(event.get("mad_score", 0)),
+            "delta_rate": float(event.get("delta_rate", 0)),
+            "window_volatility": float(event.get("window_volatility", 0)),
+            "reasons": json.loads(event.get("deterministic_reasons_json", "[]")),
+            "category": event.get("category", "deviation"),
+        }
+
+        live_sample = {
+            "value": event.get("live_value"),
+            "quality": event.get("live_quality"),
+            "timestamp": event.get("live_timestamp"),
+        }
+
+        if not self.llm:
+            return {"success": False, "error": "LLM client not configured"}
+
+        triage = self.run_llm_triage(context, deterministic, live_sample)
+
+        severity = self._severity_from_scores(deterministic, triage)
+        with self.graph.session() as session:
+            session.run(
+                """
+                MATCH (e:AnomalyEvent {event_id: $event_id})
+                SET e.summary = $summary,
+                    e.explanation = $explanation,
+                    e.severity = $severity,
+                    e.confidence = $confidence,
+                    e.recommended_checks_json = $checks,
+                    e.probable_causes_json = $causes,
+                    e.safety_notes_json = $safety,
+                    e.updated_at = $updated_at,
+                    e.llm_triaged = true
+                RETURN e
+                """,
+                event_id=event_id,
+                summary=triage.get("summary", ""),
+                explanation=triage.get("rationale", ""),
+                severity=severity,
+                confidence=float(max(0.0, min(1.0, triage.get("confidence", 0.5)))),
+                checks=json.dumps(triage.get("verification_checks", []), default=str),
+                causes=json.dumps(triage.get("probable_causes", []), default=str),
+                safety=json.dumps(triage.get("safety_notes", []), default=str),
+                updated_at=utc_now_iso(),
+            )
+
+        updated_event = self.graph.get_anomaly_event(event_id)
+        return {"success": True, "event": updated_event}
+
     def get_status(self, run_id: str) -> Dict[str, Any]:
         with self.graph.session() as session:
             result = session.run(
@@ -1890,6 +2123,9 @@ def main() -> int:
     p_clear.add_argument("--event-id", required=True)
     p_clear.add_argument("--note")
 
+    p_deep = sub.add_parser("deep-analyze", help="Run LLM triage on an existing event")
+    p_deep.add_argument("--event-id", required=True)
+
     p_cleanup = sub.add_parser("cleanup", help="Delete old anomaly events")
     p_cleanup.add_argument("--retention-days", type=int, default=14)
 
@@ -1942,6 +2178,10 @@ def _signal_handler(_signum, _frame):
         print(json.dumps(monitor.clear_event(args.event_id, args.note), default=str))
         return 0
 
+    if args.command == "deep-analyze":
+        print(json.dumps(monitor.deep_analyze(args.event_id), default=str))
+        return 0
+
     if args.command == "cleanup":
         deleted = monitor.graph.cleanup_anomaly_events(args.retention_days)
         print(json.dumps({"success": True, "deleted": deleted}))
diff --git a/tests/quick_import_test.py b/tests/quick_import_test.py
new file mode 100644
index 0000000..1edd415
--- /dev/null
+++ b/tests/quick_import_test.py
@@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+"""
+Quick test script to verify import files are ready.
+Shows what will be imported without needing CODESYS running.
+"""
+
+import os
+from pathlib import Path
+
+
+def analyze_import_directory(import_dir):
+    """Analyze what will be imported."""
+    import_path = Path(import_dir)
+    
+    if not import_path.exists():
+        print(f"Error: Directory not found: {import_dir}")
+        return
+    
+    print(f"Analyzing import directory: {import_dir}\n")
+    print("=" * 60)
+    
+    pous = []
+    gvls = []
+    
+    for st_file in import_path.rglob("*.st"):
+        filename = st_file.name
+        
+        if filename.endswith('.prg.st'):
+            name = filename.replace('.prg.st', '')
+            pous.append(('Program', name, st_file))
+        elif filename.endswith('.fb.st'):
+            name = filename.replace('.fb.st', '')
+            pous.append(('FunctionBlock', name, st_file))
+        elif filename.endswith('.fun.st'):
+            name = filename.replace('.fun.st', '')
+            pous.append(('Function', name, st_file))
+        elif filename.endswith('.gvl.st'):
+            name = filename.replace('.gvl.st', '')
+            gvls.append((name, st_file))
+    
+    print(f"\nPOUs to import: {len(pous)}")
+    for pou_type, name, filepath in pous:
+        print(f"  - {name} ({pou_type})")
+        # Show first few lines
+        with open(filepath, 'r', encoding='utf-8') as f:
+            lines = f.readlines()[:5]
+            for line in lines:
+                print(f"    {line.rstrip()}")
+        print()
+    
+    print(f"\nGVLs to import: {len(gvls)}")
+    for name, filepath in gvls:
+        print(f"  - {name}")
+        # Show content
+        with open(filepath, 'r', encoding='utf-8') as f:
+            content = f.read()
+            print(f"    {content.strip()}")
+        print()
+    
+    print("=" * 60)
+    print(f"\nTotal: {len(pous)} POUs, {len(gvls)} GVLs")
+    print(f"\nTo import, run inside CODESYS:")
+    print(f'  codesys_import.py "<project_path>" "{import_dir}"')
+
+
+if __name__ == "__main__":
+    import sys
+    
+    if len(sys.argv) < 2:
+        print("Usage: python quick_import_test.py <import_dir>")
+        print("\nExample:")
+        print("  python quick_import_test.py test_cross_applied_export")
+        sys.exit(1)
+    
+    analyze_import_directory(sys.argv[1])
+