From afbadff71b524c0e10ea651388724853f7d8611c Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Tue, 24 Mar 2026 10:14:18 +0000
Subject: [PATCH 1/2] fix: catch unhandled exceptions in transform hooks to
 prevent 500 errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both transform hooks (system.transform and messages.transform) lacked
try-catch wrapping. Any SQLite error (corruption, busy timeout, schema
mismatch) propagated through OpenCode's Plugin.trigger mechanism and
surfaced as a 500 'Internal server error', halting the user's session.

Changes:
- system.transform: wrap knowledge injection block in try-catch. On
  error, log via log.error(), reset LTM tokens to 0, and push a fixed
  fallback note directing the LLM to use the recall tool. Track
  degraded sessions to avoid busting the provider's read-token cache
  on recovery — if the conversation is longer than the LTM content,
  keep the fallback note rather than switching mid-session.
- messages.transform: wrap the entire transform path in try-catch. On
  error, log via log.error() and leave output.messages unmodified
  (equivalent to layer 0 passthrough).
- gradient.ts: export getLastTransformEstimate() for the cache
  trade-off calculation.
- Tests: 4 new tests covering DB error survival for both hooks, plus
  cache-aware LTM recovery (skip on long sessions, proceed on short).
---
 src/gradient.ts    |   5 +
 src/index.ts       | 211 +++++++++++++++-----------
 test/index.test.ts | 361 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 493 insertions(+), 84 deletions(-)
diff --git a/src/gradient.ts b/src/gradient.ts
index 3a61473..12e5616 100644
--- a/src/gradient.ts
+++ b/src/gradient.ts
@@ -205,6 +205,11 @@ export function getLastTransformedCount(sessionID: string): number {
   return sessionStates.get(sessionID)?.lastTransformedCount ?? 0;
 }
 
+/** Returns the token estimate from the most recent transform() output. */
+export function getLastTransformEstimate(sessionID: string): number {
+  return sessionStates.get(sessionID)?.lastTransformEstimate ?? 0;
+}
+
 /** Returns the layer used by the most recent transform() call. For testing. */
 export function getLastLayer(sessionID?: string): SafetyLayer {
   if (sessionID) return sessionStates.get(sessionID)?.lastLayer ?? 0;
diff --git a/src/index.ts b/src/index.ts
index 4c6d610..203fa55 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -15,6 +15,7 @@ import {
   getLtmBudget,
   setForceMinLayer,
   getLastTransformedCount,
+  getLastTransformEstimate,
 } from "./gradient";
 import { formatKnowledge, formatDistillations } from "./prompt";
 import { createRecallTool } from "./reflect";
@@ -137,6 +138,10 @@ export const LorePlugin: Plugin = async (ctx) => {
     ltmSessionCache.clear();
   }
 
+  // Sessions where LTM injection failed and the fallback note was pushed.
+  // Used to decide whether recovering LTM is worth the prompt cache bust.
+  const ltmDegradedSessions = new Set<string>();
+
   // Track active sessions for distillation
   const activeSessions = new Set<string>();
 
@@ -478,34 +483,67 @@ export const LorePlugin: Plugin = async (ctx) => {
       // cache invalidation on every single turn.
       if (cfg.knowledge.enabled) {
         const sessionID = input.sessionID;
-        let cached = sessionID ? ltmSessionCache.get(sessionID) : undefined;
-
-        if (!cached) {
-          const ltmBudget = getLtmBudget(cfg.budget.ltm);
-          const entries = ltm.forSession(projectPath, sessionID, ltmBudget);
-          if (entries.length) {
-            const formatted = formatKnowledge(
-              entries.map((e) => ({
-                category: e.category,
-                title: e.title,
-                content: e.content,
-              })),
-              ltmBudget,
-            );
-
-            if (formatted) {
-              const tokenCount = Math.ceil(formatted.length / 3);
-              cached = { formatted, tokenCount };
-              if (sessionID) ltmSessionCache.set(sessionID, cached);
+        try {
+          let cached = sessionID ? ltmSessionCache.get(sessionID) : undefined;
+
+          if (!cached) {
+            const ltmBudget = getLtmBudget(cfg.budget.ltm);
+            const entries = ltm.forSession(projectPath, sessionID, ltmBudget);
+            if (entries.length) {
+              const formatted = formatKnowledge(
+                entries.map((e) => ({
+                  category: e.category,
+                  title: e.title,
+                  content: e.content,
+                })),
+                ltmBudget,
+              );
+
+              if (formatted) {
+                const tokenCount = Math.ceil(formatted.length / 3);
+
+                // If this session was previously degraded (fallback note instead of LTM),
+                // switching to real LTM changes the system prompt prefix → busts the
+                // provider's read-token cache for the entire conversation after this point.
+                // Only recover if the cache invalidation cost is small relative to LTM benefit.
+                if (sessionID && ltmDegradedSessions.has(sessionID)) {
+                  const conversationTokens = getLastTransformEstimate(sessionID);
+                  if (conversationTokens > tokenCount) {
+                    // Conversation is larger than LTM — cache bust costs more than
+                    // LTM is worth. Keep the fallback note for this session.
+                    setLtmTokens(0);
+                    output.system.push(
+                      "[Lore plugin] Long-term memory is temporarily unavailable. " +
+                        "Use the recall tool to search for project knowledge, " +
+                        "past decisions, and prior session context when needed.",
+                    );
+                    return;
+                  }
+                  // Conversation is small — LTM benefit outweighs cache cost. Recover.
+                  ltmDegradedSessions.delete(sessionID);
+                }
+
+                cached = { formatted, tokenCount };
+                if (sessionID) ltmSessionCache.set(sessionID, cached);
+              }
             }
           }
-        }
 
-        if (cached) {
-          setLtmTokens(cached.tokenCount);
-          output.system.push(cached.formatted);
-        } else {
+          if (cached) {
+            setLtmTokens(cached.tokenCount);
+            output.system.push(cached.formatted);
+          } else {
+            setLtmTokens(0);
+          }
+        } catch (e) {
+          log.error("system transform: knowledge injection failed:", e);
           setLtmTokens(0);
+          if (sessionID) ltmDegradedSessions.add(sessionID);
+          output.system.push(
+            "[Lore plugin] Long-term memory is temporarily unavailable. " +
+              "Use the recall tool to search for project knowledge, " +
+              "past decisions, and prior session context when needed.",
+          );
         }
       } else {
         setLtmTokens(0);
@@ -532,70 +570,75 @@ export const LorePlugin: Plugin = async (ctx) => {
 
       const sessionID = output.messages[0]?.info.sessionID;
 
-      // Skip gradient transform for lore worker sessions (lore-distill, lore-curator).
-      // Worker sessions are small (typically 5-15 messages) and don't need context
-      // management. More importantly, allowing them through would overwrite the
-      // per-session state for the MAIN session if they happen to share a session ID —
-      // and before per-session state was introduced, module-level variables were
-      // corrupted this way, causing calibration oscillation and layer 0 passthrough
-      // on the main session's next step. Belt-and-suspenders: even with per-session
-      // state, worker sessions waste CPU on transform() for no benefit.
-      if (sessionID && await shouldSkip(sessionID)) return;
-
-      const result = transform({
-        messages: output.messages,
-        projectPath,
-        sessionID,
-      });
+      try {
+        // Skip gradient transform for lore worker sessions (lore-distill, lore-curator).
+        // Worker sessions are small (typically 5-15 messages) and don't need context
+        // management. More importantly, allowing them through would overwrite the
+        // per-session state for the MAIN session if they happen to share a session ID —
+        // and before per-session state was introduced, module-level variables were
+        // corrupted this way, causing calibration oscillation and layer 0 passthrough
+        // on the main session's next step. Belt-and-suspenders: even with per-session
+        // state, worker sessions waste CPU on transform() for no benefit.
+        if (sessionID && await shouldSkip(sessionID)) return;
+
+        const result = transform({
+          messages: output.messages,
+          projectPath,
+          sessionID,
+        });
 
-      // The API requires the conversation to end with a user message.
-      // Drop trailing pure-text assistant messages (no tool parts), which would
-      // cause an Anthropic "does not support assistant message prefill" error.
-      // This must run at ALL layers, including layer 0 (passthrough) — the error
-      // can occur even when messages fit within the context budget.
-      //
-      // Crucially, assistant messages that contain tool parts (completed OR pending)
-      // must NOT be dropped:
-      // - Completed tool parts: OpenCode's SDK converts these into tool_result blocks
-      //   sent as user-role messages at the API level. The conversation already ends
-      //   with a user message — dropping would strip the entire current agentic turn
-      //   and cause an infinite tool-call loop (the model restarts from scratch).
-      // - Pending tool parts: the tool call hasn't returned yet; dropping would make
-      //   the model re-issue the same tool call on the next turn.
-      //
-      // Note: at layer 0, result.messages === output.messages (same reference), so
-      // mutating result.messages here also trims output.messages in place — which is
-      // safe for prompt caching since we only ever remove trailing messages, never
-      // reorder or insert.
-      while (
-        result.messages.length > 0 &&
-        result.messages.at(-1)!.info.role !== "user"
-      ) {
-        const last = result.messages.at(-1)!;
-        const hasToolParts = last.parts.some((p) => p.type === "tool");
-        if (hasToolParts) {
-          // Tool parts → tool_result (user-role) at the API level → no prefill error.
-          // Stop dropping; the conversation ends correctly as-is.
-          break;
+        // The API requires the conversation to end with a user message.
+        // Drop trailing pure-text assistant messages (no tool parts), which would
+        // cause an Anthropic "does not support assistant message prefill" error.
+        // This must run at ALL layers, including layer 0 (passthrough) — the error
+        // can occur even when messages fit within the context budget.
+        //
+        // Crucially, assistant messages that contain tool parts (completed OR pending)
+        // must NOT be dropped:
+        // - Completed tool parts: OpenCode's SDK converts these into tool_result blocks
+        //   sent as user-role messages at the API level. The conversation already ends
+        //   with a user message — dropping would strip the entire current agentic turn
+        //   and cause an infinite tool-call loop (the model restarts from scratch).
+        // - Pending tool parts: the tool call hasn't returned yet; dropping would make
+        //   the model re-issue the same tool call on the next turn.
+        //
+        // Note: at layer 0, result.messages === output.messages (same reference), so
+        // mutating result.messages here also trims output.messages in place — which is
+        // safe for prompt caching since we only ever remove trailing messages, never
+        // reorder or insert.
+        while (
+          result.messages.length > 0 &&
+          result.messages.at(-1)!.info.role !== "user"
+        ) {
+          const last = result.messages.at(-1)!;
+          const hasToolParts = last.parts.some((p) => p.type === "tool");
+          if (hasToolParts) {
+            // Tool parts → tool_result (user-role) at the API level → no prefill error.
+            // Stop dropping; the conversation ends correctly as-is.
+            break;
+          }
+          const dropped = result.messages.pop()!;
+          log.warn(
+            "dropping trailing pure-text",
+            dropped.info.role,
+            "message to prevent prefill error. id:",
+            dropped.info.id,
+          );
         }
-        const dropped = result.messages.pop()!;
-        log.warn(
-          "dropping trailing pure-text",
-          dropped.info.role,
-          "message to prevent prefill error. id:",
-          dropped.info.id,
-        );
-      }
 
-      // Only restructure messages when the gradient transform is active (layers 1-4).
-      // Layer 0 means all messages fit within the context budget — leave them alone
-      // so the append-only sequence stays intact for prompt caching.
-      if (result.layer > 0) {
-        output.messages.splice(0, output.messages.length, ...result.messages);
-      }
+        // Only restructure messages when the gradient transform is active (layers 1-4).
+        // Layer 0 means all messages fit within the context budget — leave them alone
+        // so the append-only sequence stays intact for prompt caching.
+        if (result.layer > 0) {
+          output.messages.splice(0, output.messages.length, ...result.messages);
+        }
 
-      if (result.layer >= 2 && sessionID) {
-        backgroundDistill(sessionID);
+        if (result.layer >= 2 && sessionID) {
+          backgroundDistill(sessionID);
+        }
+      } catch (e) {
+        log.error("messages transform: gradient transform failed:", e);
+        // output.messages untouched — session continues without context management
       }
     },
 
diff --git a/test/index.test.ts b/test/index.test.ts
index 3bfc596..a944985 100644
--- a/test/index.test.ts
+++ b/test/index.test.ts
@@ -1,7 +1,10 @@
 import { describe, test, expect, beforeEach } from "bun:test";
 import { isContextOverflow, buildRecoveryMessage, LorePlugin, isValidProjectPath } from "../src/index";
 import * as ltm from "../src/ltm";
+import { db } from "../src/db";
+import { getLtmTokens, setModelLimits, calibrate, setLtmTokens } from "../src/gradient";
 import type { Plugin } from "@opencode-ai/plugin";
+import type { Message, Part } from "@opencode-ai/sdk";
 
 // ── Pure function tests ──────────────────────────────────────────────
 
@@ -664,3 +667,361 @@ describe("LorePlugin — invalid project path", () => {
     expect(hooks.event).toBeDefined();
   });
 });
+
+// ── Transform hook error handling ─────────────────────────────────────
+//
+// Validates that both transform hooks catch DB errors and degrade gracefully
+// instead of propagating exceptions that surface as 500 errors.
+
+/**
+ * Helper: call the messages transform hook and return the output.
+ */
+async function callMessagesTransform(
+  hooks: Awaited<ReturnType<typeof LorePlugin>>,
+  messages: Array<{ info: Message; parts: Part[] }>,
+): Promise<Array<{ info: Message; parts: Part[] }>> {
+  const msgTransform = (hooks as Record<string, unknown>)[
+    "experimental.chat.messages.transform"
+  ] as (
+    input: unknown,
+    output: { messages: Array<{ info: Message; parts: Part[] }> },
+  ) => Promise<void>;
+  const output = { messages: [...messages] };
+  await msgTransform({}, output);
+  return output.messages;
+}
+
+function makeTestMsg(
+  id: string,
+  role: "user" | "assistant",
+  text: string,
+  sessionID = "ses_transform_err",
+): { info: Message; parts: Part[] } {
+  const info: Message =
+    role === "user"
+      ? {
+          id,
+          sessionID,
+          role: "user",
+          time: { created: Date.now() },
+          agent: "build",
+          model: {
+            providerID: "anthropic",
+            modelID: "claude-sonnet-4-20250514",
+          },
+        }
+      : {
+          id,
+          sessionID,
+          role: "assistant",
+          time: { created: Date.now() },
+          parentID: `parent-${id}`,
+          modelID: "claude-sonnet-4-20250514",
+          providerID: "anthropic",
+          mode: "build",
+          path: { cwd: "/test", root: "/test" },
+          cost: 0,
+          tokens: {
+            input: 100,
+            output: 50,
+            reasoning: 0,
+            cache: { read: 0, write: 0 },
+          },
+        };
+  return {
+    info,
+    parts: [
+      {
+        id: `part-${id}`,
+        sessionID,
+        messageID: id,
+        type: "text",
+        text,
+        time: { start: Date.now(), end: Date.now() },
+      },
+    ],
+  };
+}
+
+/**
+ * Helper: drop and recreate the knowledge + FTS tables with full schema
+ * (matching db.ts initial schema + all migrations).
+ * Used by error-handling tests that need to corrupt then restore the DB.
+ */
+function restoreKnowledgeTables() {
+  db().exec(`
+    CREATE TABLE IF NOT EXISTS knowledge (
+      id TEXT PRIMARY KEY,
+      project_id TEXT,
+      category TEXT NOT NULL,
+      title TEXT NOT NULL,
+      content TEXT NOT NULL,
+      source_session TEXT,
+      cross_project INTEGER DEFAULT 0,
+      confidence REAL DEFAULT 1.0,
+      created_at INTEGER NOT NULL DEFAULT (unixepoch()),
+      updated_at INTEGER NOT NULL DEFAULT (unixepoch()),
+      metadata TEXT,
+      embedding BLOB
+    )
+  `);
+  db().exec(`
+    CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_fts USING fts5(
+      title, content, category,
+      content=knowledge, content_rowid=rowid,
+      tokenize='porter unicode61'
+    )
+  `);
+  // Recreate FTS sync triggers
+  db().exec(`
+    CREATE TRIGGER IF NOT EXISTS knowledge_fts_insert AFTER INSERT ON knowledge BEGIN
+      INSERT INTO knowledge_fts(rowid, title, content, category)
+      VALUES (new.rowid, new.title, new.content, new.category);
+    END
+  `);
+  db().exec(`
+    CREATE TRIGGER IF NOT EXISTS knowledge_fts_delete AFTER DELETE ON knowledge BEGIN
+      INSERT INTO knowledge_fts(knowledge_fts, rowid, title, content, category)
+      VALUES('delete', old.rowid, old.title, old.content, old.category);
+    END
+  `);
+  db().exec(`
+    CREATE TRIGGER IF NOT EXISTS knowledge_fts_update AFTER UPDATE ON knowledge BEGIN
+      INSERT INTO knowledge_fts(knowledge_fts, rowid, title, content, category)
+      VALUES('delete', old.rowid, old.title, old.content, old.category);
+      INSERT INTO knowledge_fts(rowid, title, content, category)
+      VALUES (new.rowid, new.title, new.content, new.category);
+    END
+  `);
+}
+
+function restoreDistillationTables() {
+  db().exec(`
+    CREATE TABLE IF NOT EXISTS distillations (
+      id TEXT PRIMARY KEY,
+      project_id TEXT NOT NULL REFERENCES projects(id),
+      session_id TEXT NOT NULL,
+      narrative TEXT NOT NULL,
+      facts TEXT NOT NULL,
+      source_ids TEXT NOT NULL,
+      generation INTEGER DEFAULT 0,
+      token_count INTEGER DEFAULT 0,
+      created_at INTEGER NOT NULL DEFAULT (unixepoch()),
+      observations TEXT NOT NULL DEFAULT '',
+      archived INTEGER NOT NULL DEFAULT 0
+    )
+  `);
+  // Post-migration indexes: compound indexes from v6, single-column idx_distillation_project dropped
+  db().exec(`CREATE INDEX IF NOT EXISTS idx_distillation_session ON distillations(session_id)`);
+  db().exec(`CREATE INDEX IF NOT EXISTS idx_distillation_archived ON distillations(archived)`);
+  db().exec(`CREATE INDEX IF NOT EXISTS idx_distillation_project_session ON distillations(project_id, session_id)`);
+  db().exec(`CREATE INDEX IF NOT EXISTS idx_distillation_project_session_gen_archived ON distillations(project_id, session_id, generation, archived)`);
+  db().exec(`
+    CREATE VIRTUAL TABLE IF NOT EXISTS distillation_fts USING fts5(
+      observations, content='distillations', content_rowid='rowid'
+    )
+  `);
+  db().exec(`
+    CREATE TRIGGER IF NOT EXISTS distillation_fts_insert AFTER INSERT ON distillations BEGIN
+      INSERT INTO distillation_fts(rowid, observations) VALUES (new.rowid, new.observations);
+    END
+  `);
+  db().exec(`
+    CREATE TRIGGER IF NOT EXISTS distillation_fts_delete AFTER DELETE ON distillations BEGIN
+      INSERT INTO distillation_fts(distillation_fts, rowid, observations)
+      VALUES('delete', old.rowid, old.observations);
+    END
+  `);
+  db().exec(`
+    CREATE TRIGGER IF NOT EXISTS distillation_fts_update AFTER UPDATE ON distillations BEGIN
+      INSERT INTO distillation_fts(distillation_fts, rowid, observations)
+      VALUES('delete', old.rowid, old.observations);
+      INSERT INTO distillation_fts(rowid, observations) VALUES (new.rowid, new.observations);
+    END
+  `);
+}
+
+describe("transform hook error handling", () => {
+  test("system.transform catches DB error and pushes fallback note", async () => {
+    const { hooks, tmpDir, cleanup } = await initPlugin();
+    try {
+      // Seed a knowledge entry so forSession() would normally return data
+      ltm.create({
+        projectPath: tmpDir,
+        category: "pattern",
+        title: "Error handling test entry",
+        content: "This entry exists to trigger forSession code path",
+        scope: "project",
+      });
+
+      // Corrupt the knowledge table — forSession() queries it directly.
+      // Drop both FTS and base table to ensure a hard error.
+      db().exec("DROP TABLE IF EXISTS knowledge_fts");
+      db().exec("DROP TABLE IF EXISTS knowledge");
+
+      const sessionID = "ses_sys_db_error";
+      const result = await callSystemTransform(hooks!, sessionID);
+
+      // Should contain the fallback note (not crash)
+      const fallback = result.find((s) =>
+        s.includes("Long-term memory is temporarily unavailable"),
+      );
+      expect(fallback).toBeTruthy();
+      expect(fallback).toContain("recall tool");
+
+      // LTM tokens should be reset to 0
+      expect(getLtmTokens()).toBe(0);
+    } finally {
+      restoreKnowledgeTables();
+      cleanup();
+    }
+  });
+
+  test("messages.transform catches DB error and leaves messages unchanged", async () => {
+    const { hooks, cleanup } = await initPlugin();
+    try {
+      const sessionID = "ses_msg_db_error";
+      const messages = [
+        makeTestMsg("u1", "user", "Hello world", sessionID),
+        makeTestMsg("a1", "assistant", "Hi there!", sessionID),
+        makeTestMsg("u2", "user", "What next?", sessionID),
+      ];
+
+      // Corrupt the distillations table — transform() calls loadDistillations()
+      // which queries this table.
+      db().exec("DROP TABLE IF EXISTS distillation_fts");
+      db().exec("DROP TABLE IF EXISTS distillations");
+
+      // Should not throw — messages pass through unchanged
+      const result = await callMessagesTransform(hooks!, messages);
+
+      // Messages should be unchanged (layer 0 passthrough equivalent)
+      expect(result.length).toBe(messages.length);
+      expect(result[0].info.id).toBe("u1");
+      expect(result[2].info.id).toBe("u2");
+    } finally {
+      restoreDistillationTables();
+      cleanup();
+    }
+  });
+
+  test("LTM recovery skipped on long session to preserve prompt cache", async () => {
+    const { hooks, tmpDir, cleanup } = await initPlugin();
+    try {
+      // Seed knowledge entry
+      ltm.create({
+        projectPath: tmpDir,
+        category: "architecture",
+        title: "Cache trade-off test",
+        content: "Entry for testing LTM recovery trade-off",
+        scope: "project",
+      });
+
+      const sessionID = "ses_cache_tradeoff_long";
+
+      // First call: corrupt DB to trigger degraded mode
+      db().exec("DROP TABLE IF EXISTS knowledge_fts");
+      db().exec("DROP TABLE IF EXISTS knowledge");
+      const degraded = await callSystemTransform(hooks!, sessionID);
+      expect(
+        degraded.find((s) => s.includes("temporarily unavailable")),
+      ).toBeTruthy();
+
+      // Restore the knowledge table
+      restoreKnowledgeTables();
+
+      // Re-seed knowledge
+      ltm.create({
+        projectPath: tmpDir,
+        category: "architecture",
+        title: "Cache trade-off test restored",
+        content: "Entry for testing LTM recovery trade-off after restore",
+        scope: "project",
+      });
+
+      // Simulate a long conversation by doing a real transform + calibration
+      // so lastTransformEstimate is set high.
+      setModelLimits({ context: 200_000, output: 32_000 });
+
+      // Create a substantial message array to drive up lastTransformEstimate
+      const messages: Array<{ info: Message; parts: Part[] }> = [];
+      for (let i = 0; i < 100; i++) {
+        messages.push(
+          makeTestMsg(`u${i}`, "user", "x".repeat(500), sessionID),
+          makeTestMsg(`a${i}`, "assistant", "y".repeat(500), sessionID),
+        );
+      }
+
+      // Run messages transform to populate lastTransformEstimate for this session
+      await callMessagesTransform(hooks!, messages);
+
+      // Now call system transform again — DB is restored, forSession() would
+      // succeed, but the session is degraded + conversation is large.
+      // Should keep the fallback note to preserve prompt cache.
+      const recovered = await callSystemTransform(hooks!, sessionID);
+      const stillDegraded = recovered.find((s) =>
+        s.includes("temporarily unavailable"),
+      );
+      expect(stillDegraded).toBeTruthy();
+    } finally {
+      restoreKnowledgeTables();
+      cleanup();
+    }
+  });
+
+  test("LTM recovery proceeds on short session", async () => {
+    const { hooks, tmpDir, cleanup } = await initPlugin();
+    try {
+      // Seed knowledge entry
+      ltm.create({
+        projectPath: tmpDir,
+        category: "architecture",
+        title: "Short session recovery test",
+        content: "Entry for testing LTM recovery on a short session",
+        scope: "project",
+      });
+
+      const sessionID = "ses_cache_tradeoff_short";
+
+      // First call: corrupt DB to trigger degraded mode
+      db().exec("DROP TABLE IF EXISTS knowledge_fts");
+      db().exec("DROP TABLE IF EXISTS knowledge");
+      const degraded = await callSystemTransform(hooks!, sessionID);
+      expect(
+        degraded.find((s) => s.includes("temporarily unavailable")),
+      ).toBeTruthy();
+
+      // Restore the knowledge table
+      restoreKnowledgeTables();
+
+      // Re-seed knowledge
+      ltm.create({
+        projectPath: tmpDir,
+        category: "architecture",
+        title: "Short session recovery entry",
+        content: "Entry visible after DB recovery on short session",
+        scope: "project",
+      });
+
+      // Don't run any messages transform — lastTransformEstimate stays at 0
+      // (short/new session). LTM benefit outweighs zero cache cost.
+      const recovered = await callSystemTransform(hooks!, sessionID);
+
+      // Should recover real LTM (not the fallback note)
+      const ltmBlock = recovered.find((s) =>
+        s.includes("Long-term Knowledge"),
+      );
+      expect(ltmBlock).toBeTruthy();
+      expect(ltmBlock).toContain("Short session recovery entry");
+
+      // The fallback note should NOT be present
+      const fallback = recovered.find((s) =>
+        s.includes("temporarily unavailable"),
+      );
+      expect(fallback).toBeUndefined();
+    } finally {
+      restoreKnowledgeTables();
+      cleanup();
+    }
+  });
+});

From 89923f6c2e8de7c1d6f329a5bd30645d885cfc1a Mon Sep 17 00:00:00 2001
From: Burak Yigit Kaya <byk@sentry.io>
Date: Tue, 24 Mar 2026 10:35:21 +0000
Subject: [PATCH 2/2] perf: replace SELECT * with explicit columns to avoid
 loading embedding BLOBs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After adding the embedding BLOB column (schema v8), all SELECT * queries
in ltm.ts were loading 4KB of Float32Array data per knowledge entry that
was immediately discarded (KnowledgeEntry type doesn't include embedding).

This wasted ~200KB per forSession() call (2 queries × ~25 entries × 4KB)
and affected all other knowledge queries (search, searchLike, all, get,
forProject, searchScored).

Define KNOWLEDGE_COLS and KNOWLEDGE_COLS_K constants that list exactly
the columns needed for KnowledgeEntry, excluding the embedding BLOB.
The embedding column is only needed by vectorSearch() in embedding.ts,
which already selects it explicitly.
---
 src/ltm.ts | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/src/ltm.ts b/src/ltm.ts
index d6d2b57..6db2bf3 100644
--- a/src/ltm.ts
+++ b/src/ltm.ts
@@ -23,6 +23,15 @@ export type KnowledgeEntry = {
   metadata: string | null;
 };
 
+/** Columns to select for KnowledgeEntry — excludes the embedding BLOB
+ *  (4KB per entry) which is only needed by vectorSearch() in embedding.ts. */
+const KNOWLEDGE_COLS =
+  "id, project_id, category, title, content, source_session, cross_project, confidence, created_at, updated_at, metadata";
+
+/** Same columns with table alias prefix for use in JOIN queries. */
+const KNOWLEDGE_COLS_K =
+  "k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";
+
 export function create(input: {
   projectPath?: string;
   category: string;
@@ -150,7 +159,7 @@ export function forProject(
   if (includeCross) {
     return db()
       .query(
-        `SELECT * FROM knowledge
+        `SELECT ${KNOWLEDGE_COLS} FROM knowledge
          WHERE (project_id = ? OR (project_id IS NULL) OR (cross_project = 1))
          AND confidence > 0.2
          ORDER BY confidence DESC, updated_at DESC`,
@@ -159,7 +168,7 @@ export function forProject(
   }
   return db()
     .query(
-      `SELECT * FROM knowledge
+      `SELECT ${KNOWLEDGE_COLS} FROM knowledge
        WHERE project_id = ?
        AND confidence > 0.2
        ORDER BY confidence DESC, updated_at DESC`,
@@ -264,7 +273,7 @@ export function forSession(
   // --- 1. Load project-specific entries ---
   const projectEntries = db()
     .query(
-      `SELECT * FROM knowledge
+      `SELECT ${KNOWLEDGE_COLS} FROM knowledge
        WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
        ORDER BY confidence DESC, updated_at DESC`,
     )
@@ -273,7 +282,7 @@ export function forSession(
   // --- 2. Load cross-project candidates ---
   const crossEntries = db()
     .query(
-      `SELECT * FROM knowledge
+      `SELECT ${KNOWLEDGE_COLS} FROM knowledge
        WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
        ORDER BY confidence DESC, updated_at DESC`,
     )
@@ -370,7 +379,7 @@ export function forSession(
 export function all(): KnowledgeEntry[] {
   return db()
     .query(
-      "SELECT * FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC",
+      `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`,
     )
     .all() as KnowledgeEntry[];
 }
@@ -394,13 +403,13 @@ function searchLike(input: {
     const pid = ensureProject(input.projectPath);
     return db()
       .query(
-        `SELECT * FROM knowledge WHERE (project_id = ? OR project_id IS NULL OR cross_project = 1) AND confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
+        `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE (project_id = ? OR project_id IS NULL OR cross_project = 1) AND confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
       )
       .all(pid, ...likeParams, input.limit) as KnowledgeEntry[];
   }
   return db()
     .query(
-      `SELECT * FROM knowledge WHERE confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
+      `SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
     )
     .all(...likeParams, input.limit) as KnowledgeEntry[];
 }
@@ -417,13 +426,13 @@ export function search(input: {
   const pid = input.projectPath ? ensureProject(input.projectPath) : null;
 
   const ftsSQL = pid
-    ? `SELECT k.* FROM knowledge k
+    ? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
        JOIN knowledge_fts f ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
        AND k.confidence > 0.2
        ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`
-    : `SELECT k.* FROM knowledge k
+    : `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
        JOIN knowledge_fts f ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND k.confidence > 0.2
@@ -474,13 +483,13 @@ export function searchScored(input: {
   const { title, content, category } = ftsWeights();
 
   const ftsSQL = pid
-    ? `SELECT k.*, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
+    ? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
        JOIN knowledge_fts f ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
        AND k.confidence > 0.2
        ORDER BY rank LIMIT ?`
-    : `SELECT k.*, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
+    : `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
        JOIN knowledge_fts f ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND k.confidence > 0.2
@@ -507,7 +516,7 @@ export function searchScored(input: {
 
 export function get(id: string): KnowledgeEntry | null {
   return db()
-    .query("SELECT * FROM knowledge WHERE id = ?")
+    .query(`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE id = ?`)
     .get(id) as KnowledgeEntry | null;
 }