Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/gradient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ export function getLastTransformedCount(sessionID: string): number {
return sessionStates.get(sessionID)?.lastTransformedCount ?? 0;
}

/** Returns the token estimate from the most recent transform() output. */
export function getLastTransformEstimate(sessionID: string): number {
return sessionStates.get(sessionID)?.lastTransformEstimate ?? 0;
}

/** Returns the layer used by the most recent transform() call. For testing. */
export function getLastLayer(sessionID?: string): SafetyLayer {
if (sessionID) return sessionStates.get(sessionID)?.lastLayer ?? 0;
Expand Down
211 changes: 127 additions & 84 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
getLtmBudget,
setForceMinLayer,
getLastTransformedCount,
getLastTransformEstimate,
} from "./gradient";
import { formatKnowledge, formatDistillations } from "./prompt";
import { createRecallTool } from "./reflect";
Expand Down Expand Up @@ -137,6 +138,10 @@ export const LorePlugin: Plugin = async (ctx) => {
ltmSessionCache.clear();
}

// Sessions where LTM injection failed and the fallback note was pushed.
// Used to decide whether recovering LTM is worth the prompt cache bust.
const ltmDegradedSessions = new Set<string>();

// Track active sessions for distillation
const activeSessions = new Set<string>();

Expand Down Expand Up @@ -478,34 +483,67 @@ export const LorePlugin: Plugin = async (ctx) => {
// cache invalidation on every single turn.
if (cfg.knowledge.enabled) {
const sessionID = input.sessionID;
let cached = sessionID ? ltmSessionCache.get(sessionID) : undefined;

if (!cached) {
const ltmBudget = getLtmBudget(cfg.budget.ltm);
const entries = ltm.forSession(projectPath, sessionID, ltmBudget);
if (entries.length) {
const formatted = formatKnowledge(
entries.map((e) => ({
category: e.category,
title: e.title,
content: e.content,
})),
ltmBudget,
);

if (formatted) {
const tokenCount = Math.ceil(formatted.length / 3);
cached = { formatted, tokenCount };
if (sessionID) ltmSessionCache.set(sessionID, cached);
try {
let cached = sessionID ? ltmSessionCache.get(sessionID) : undefined;

if (!cached) {
const ltmBudget = getLtmBudget(cfg.budget.ltm);
const entries = ltm.forSession(projectPath, sessionID, ltmBudget);
if (entries.length) {
const formatted = formatKnowledge(
entries.map((e) => ({
category: e.category,
title: e.title,
content: e.content,
})),
ltmBudget,
);

if (formatted) {
const tokenCount = Math.ceil(formatted.length / 3);

// If this session was previously degraded (fallback note instead of LTM),
// switching to real LTM changes the system prompt prefix → busts the
// provider's read-token cache for the entire conversation after this point.
// Only recover if the cache invalidation cost is small relative to LTM benefit.
if (sessionID && ltmDegradedSessions.has(sessionID)) {
const conversationTokens = getLastTransformEstimate(sessionID);
if (conversationTokens > tokenCount) {
// Conversation is larger than LTM — cache bust costs more than
// LTM is worth. Keep the fallback note for this session.
setLtmTokens(0);
output.system.push(
"[Lore plugin] Long-term memory is temporarily unavailable. " +
"Use the recall tool to search for project knowledge, " +
"past decisions, and prior session context when needed.",
);
return;
}
// Conversation is small — LTM benefit outweighs cache cost. Recover.
ltmDegradedSessions.delete(sessionID);
}

cached = { formatted, tokenCount };
if (sessionID) ltmSessionCache.set(sessionID, cached);
}
}
}
}

if (cached) {
setLtmTokens(cached.tokenCount);
output.system.push(cached.formatted);
} else {
if (cached) {
setLtmTokens(cached.tokenCount);
output.system.push(cached.formatted);
} else {
setLtmTokens(0);
}
} catch (e) {
log.error("system transform: knowledge injection failed:", e);
setLtmTokens(0);
if (sessionID) ltmDegradedSessions.add(sessionID);
output.system.push(
"[Lore plugin] Long-term memory is temporarily unavailable. " +
"Use the recall tool to search for project knowledge, " +
"past decisions, and prior session context when needed.",
);
}
} else {
setLtmTokens(0);
Expand All @@ -532,70 +570,75 @@ export const LorePlugin: Plugin = async (ctx) => {

const sessionID = output.messages[0]?.info.sessionID;

// Skip gradient transform for lore worker sessions (lore-distill, lore-curator).
// Worker sessions are small (typically 5-15 messages) and don't need context
// management. More importantly, allowing them through would overwrite the
// per-session state for the MAIN session if they happen to share a session ID —
// and before per-session state was introduced, module-level variables were
// corrupted this way, causing calibration oscillation and layer 0 passthrough
// on the main session's next step. Belt-and-suspenders: even with per-session
// state, worker sessions waste CPU on transform() for no benefit.
if (sessionID && await shouldSkip(sessionID)) return;

const result = transform({
messages: output.messages,
projectPath,
sessionID,
});
try {
// Skip gradient transform for lore worker sessions (lore-distill, lore-curator).
// Worker sessions are small (typically 5-15 messages) and don't need context
// management. More importantly, allowing them through would overwrite the
// per-session state for the MAIN session if they happen to share a session ID —
// and before per-session state was introduced, module-level variables were
// corrupted this way, causing calibration oscillation and layer 0 passthrough
// on the main session's next step. Belt-and-suspenders: even with per-session
// state, worker sessions waste CPU on transform() for no benefit.
if (sessionID && await shouldSkip(sessionID)) return;

const result = transform({
messages: output.messages,
projectPath,
sessionID,
});

// The API requires the conversation to end with a user message.
// Drop trailing pure-text assistant messages (no tool parts), which would
// cause an Anthropic "does not support assistant message prefill" error.
// This must run at ALL layers, including layer 0 (passthrough) — the error
// can occur even when messages fit within the context budget.
//
// Crucially, assistant messages that contain tool parts (completed OR pending)
// must NOT be dropped:
// - Completed tool parts: OpenCode's SDK converts these into tool_result blocks
// sent as user-role messages at the API level. The conversation already ends
// with a user message — dropping would strip the entire current agentic turn
// and cause an infinite tool-call loop (the model restarts from scratch).
// - Pending tool parts: the tool call hasn't returned yet; dropping would make
// the model re-issue the same tool call on the next turn.
//
// Note: at layer 0, result.messages === output.messages (same reference), so
// mutating result.messages here also trims output.messages in place — which is
// safe for prompt caching since we only ever remove trailing messages, never
// reorder or insert.
while (
result.messages.length > 0 &&
result.messages.at(-1)!.info.role !== "user"
) {
const last = result.messages.at(-1)!;
const hasToolParts = last.parts.some((p) => p.type === "tool");
if (hasToolParts) {
// Tool parts → tool_result (user-role) at the API level → no prefill error.
// Stop dropping; the conversation ends correctly as-is.
break;
// The API requires the conversation to end with a user message.
// Drop trailing pure-text assistant messages (no tool parts), which would
// cause an Anthropic "does not support assistant message prefill" error.
// This must run at ALL layers, including layer 0 (passthrough) — the error
// can occur even when messages fit within the context budget.
//
// Crucially, assistant messages that contain tool parts (completed OR pending)
// must NOT be dropped:
// - Completed tool parts: OpenCode's SDK converts these into tool_result blocks
// sent as user-role messages at the API level. The conversation already ends
// with a user message — dropping would strip the entire current agentic turn
// and cause an infinite tool-call loop (the model restarts from scratch).
// - Pending tool parts: the tool call hasn't returned yet; dropping would make
// the model re-issue the same tool call on the next turn.
//
// Note: at layer 0, result.messages === output.messages (same reference), so
// mutating result.messages here also trims output.messages in place — which is
// safe for prompt caching since we only ever remove trailing messages, never
// reorder or insert.
while (
result.messages.length > 0 &&
result.messages.at(-1)!.info.role !== "user"
) {
const last = result.messages.at(-1)!;
const hasToolParts = last.parts.some((p) => p.type === "tool");
if (hasToolParts) {
// Tool parts → tool_result (user-role) at the API level → no prefill error.
// Stop dropping; the conversation ends correctly as-is.
break;
}
const dropped = result.messages.pop()!;
log.warn(
"dropping trailing pure-text",
dropped.info.role,
"message to prevent prefill error. id:",
dropped.info.id,
);
}
const dropped = result.messages.pop()!;
log.warn(
"dropping trailing pure-text",
dropped.info.role,
"message to prevent prefill error. id:",
dropped.info.id,
);
}

// Only restructure messages when the gradient transform is active (layers 1-4).
// Layer 0 means all messages fit within the context budget — leave them alone
// so the append-only sequence stays intact for prompt caching.
if (result.layer > 0) {
output.messages.splice(0, output.messages.length, ...result.messages);
}
// Only restructure messages when the gradient transform is active (layers 1-4).
// Layer 0 means all messages fit within the context budget — leave them alone
// so the append-only sequence stays intact for prompt caching.
if (result.layer > 0) {
output.messages.splice(0, output.messages.length, ...result.messages);
}

if (result.layer >= 2 && sessionID) {
backgroundDistill(sessionID);
if (result.layer >= 2 && sessionID) {
backgroundDistill(sessionID);
}
} catch (e) {
log.error("messages transform: gradient transform failed:", e);
// output.messages untouched — session continues without context management
}
},

Expand Down
33 changes: 21 additions & 12 deletions src/ltm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,15 @@ export type KnowledgeEntry = {
metadata: string | null;
};

/** Columns to select for KnowledgeEntry — excludes the embedding BLOB
* (4KB per entry) which is only needed by vectorSearch() in embedding.ts. */
const KNOWLEDGE_COLS =
"id, project_id, category, title, content, source_session, cross_project, confidence, created_at, updated_at, metadata";

/** Same columns with table alias prefix for use in JOIN queries. */
const KNOWLEDGE_COLS_K =
"k.id, k.project_id, k.category, k.title, k.content, k.source_session, k.cross_project, k.confidence, k.created_at, k.updated_at, k.metadata";

export function create(input: {
projectPath?: string;
category: string;
Expand Down Expand Up @@ -150,7 +159,7 @@ export function forProject(
if (includeCross) {
return db()
.query(
`SELECT * FROM knowledge
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
WHERE (project_id = ? OR (project_id IS NULL) OR (cross_project = 1))
AND confidence > 0.2
ORDER BY confidence DESC, updated_at DESC`,
Expand All @@ -159,7 +168,7 @@ export function forProject(
}
return db()
.query(
`SELECT * FROM knowledge
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
WHERE project_id = ?
AND confidence > 0.2
ORDER BY confidence DESC, updated_at DESC`,
Expand Down Expand Up @@ -264,7 +273,7 @@ export function forSession(
// --- 1. Load project-specific entries ---
const projectEntries = db()
.query(
`SELECT * FROM knowledge
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
WHERE project_id = ? AND cross_project = 0 AND confidence > 0.2
ORDER BY confidence DESC, updated_at DESC`,
)
Expand All @@ -273,7 +282,7 @@ export function forSession(
// --- 2. Load cross-project candidates ---
const crossEntries = db()
.query(
`SELECT * FROM knowledge
`SELECT ${KNOWLEDGE_COLS} FROM knowledge
WHERE (project_id IS NULL OR cross_project = 1) AND confidence > 0.2
ORDER BY confidence DESC, updated_at DESC`,
)
Expand Down Expand Up @@ -370,7 +379,7 @@ export function forSession(
export function all(): KnowledgeEntry[] {
return db()
.query(
"SELECT * FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC",
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 ORDER BY confidence DESC, updated_at DESC`,
)
.all() as KnowledgeEntry[];
}
Expand All @@ -394,13 +403,13 @@ function searchLike(input: {
const pid = ensureProject(input.projectPath);
return db()
.query(
`SELECT * FROM knowledge WHERE (project_id = ? OR project_id IS NULL OR cross_project = 1) AND confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE (project_id = ? OR project_id IS NULL OR cross_project = 1) AND confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
)
.all(pid, ...likeParams, input.limit) as KnowledgeEntry[];
}
return db()
.query(
`SELECT * FROM knowledge WHERE confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE confidence > 0.2 AND ${conditions} ORDER BY updated_at DESC LIMIT ?`,
)
.all(...likeParams, input.limit) as KnowledgeEntry[];
}
Expand All @@ -417,13 +426,13 @@ export function search(input: {
const pid = input.projectPath ? ensureProject(input.projectPath) : null;

const ftsSQL = pid
? `SELECT k.* FROM knowledge k
? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
JOIN knowledge_fts f ON k.rowid = f.rowid
WHERE knowledge_fts MATCH ?
AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
AND k.confidence > 0.2
ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`
: `SELECT k.* FROM knowledge k
: `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
JOIN knowledge_fts f ON k.rowid = f.rowid
WHERE knowledge_fts MATCH ?
AND k.confidence > 0.2
Expand Down Expand Up @@ -474,13 +483,13 @@ export function searchScored(input: {
const { title, content, category } = ftsWeights();

const ftsSQL = pid
? `SELECT k.*, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
JOIN knowledge_fts f ON k.rowid = f.rowid
WHERE knowledge_fts MATCH ?
AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
AND k.confidence > 0.2
ORDER BY rank LIMIT ?`
: `SELECT k.*, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
: `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
JOIN knowledge_fts f ON k.rowid = f.rowid
WHERE knowledge_fts MATCH ?
AND k.confidence > 0.2
Expand All @@ -507,7 +516,7 @@ export function searchScored(input: {

export function get(id: string): KnowledgeEntry | null {
return db()
.query("SELECT * FROM knowledge WHERE id = ?")
.query(`SELECT ${KNOWLEDGE_COLS} FROM knowledge WHERE id = ?`)
.get(id) as KnowledgeEntry | null;
}

Expand Down
Loading
Loading