Abernaughty · Abernaughty · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/dev-suite/src/memory/chroma_store.py b/dev-suite/src/memory/chroma_store.py
@@ -246,10 +246,15 @@ def query(
         tiers: list[MemoryTier] | None = None,
         n_results: int = 10,
         task_id: str | None = None,
+        min_score: float | None = None,
     ) -> list[MemoryQueryResult]:
         """Query memory with optional tier, module, and task_id filters.
 
         Returns list of MemoryQueryResult. Automatically excludes expired entries.
+
+        Args:
+            min_score: Minimum similarity score (0.0-1.0) to include. Entries
+                below this threshold are filtered out. None means no filtering.
         """
         where_clauses: list[dict] = []
         if tiers:
@@ -286,6 +291,10 @@ def query(
             if expires > 0 and expires < now:
                 continue
 
+            score = 1 - dist
+            if min_score is not None and score < min_score:
+                continue
+
             entries.append(
                 MemoryQueryResult(
                     content=doc,
@@ -299,7 +308,7 @@ def query(
                     verified=meta.get("verified", True),
                     sandbox_origin=meta.get("sandbox_origin", "none"),
                     mutable=meta.get("mutable", True),
-                    score=1 - dist,
+                    score=score,
                 )
             )
 

diff --git a/dev-suite/src/memory/protocol.py b/dev-suite/src/memory/protocol.py
@@ -160,8 +160,13 @@ def query(
         tiers: list[MemoryTier] | None = None,
         n_results: int = 10,
         task_id: str | None = None,
+        min_score: float | None = None,
     ) -> list[MemoryQueryResult]:
-        """Query memory with optional filters. Excludes expired entries."""
+        """Query memory with optional filters. Excludes expired entries.
+
+        Args:
+            min_score: Minimum similarity score (0.0-1.0). None = no filtering.
+        """
         ...
 
     def get_pending_approvals(self) -> list[dict]:

diff --git a/dev-suite/src/orchestrator.py b/dev-suite/src/orchestrator.py
@@ -212,7 +212,7 @@ def _get_memory_store() -> MemoryStore:
 def _fetch_memory_context(task_description: str) -> list[str]:
     try:
         store = _get_memory_store()
-        results = store.query(task_description, n_results=10)
+        results = store.query(task_description, n_results=10, min_score=0.3)
         return [r.content for r in results]
     except Exception:
         return []
@@ -444,7 +444,17 @@ def developer_node(state: GraphState, config: RunnableConfig | None = None) -> d
     content = _extract_text_content(response.content)
     trace.append(f"developer: code generated ({len(content)} chars)")
     logger.info("[DEV] done. tokens_used now=%d", tokens_used)
-    memory_writes.append({"content": f"Implemented blueprint {blueprint.task_id}: {blueprint.instructions[:200]}", "tier": "l1", "module": _infer_module(blueprint.target_files), "source_agent": "developer", "confidence": 1.0, "sandbox_origin": "locked-down", "related_files": ",".join(blueprint.target_files), "task_id": blueprint.task_id})
+    # Deduplicate: on retries, replace existing developer entry for this task_id
+    # rather than appending duplicates (trace review showed 3x identical entries).
+    new_entry = {"content": f"Implemented blueprint {blueprint.task_id}: {blueprint.instructions[:200]}", "tier": "l1", "module": _infer_module(blueprint.target_files), "source_agent": "developer", "confidence": 1.0, "sandbox_origin": "locked-down", "related_files": ",".join(blueprint.target_files), "task_id": blueprint.task_id}
+    replaced = False
+    for i, existing in enumerate(memory_writes):
+        if existing.get("task_id") == blueprint.task_id and existing.get("source_agent") == "developer":
+            memory_writes[i] = new_entry
+            replaced = True
+            break
+    if not replaced:
+        memory_writes.append(new_entry)
     return {"generated_code": content, "status": WorkflowStatus.REVIEWING, "tokens_used": tokens_used, "trace": trace, "memory_writes": memory_writes, "tool_calls_log": tool_calls_log}
 
 
@@ -459,6 +469,42 @@ def apply_code_node(state: GraphState) -> dict:
     if not blueprint:
         trace.append("apply_code: no blueprint -- skipping")
         return {"parsed_files": [], "trace": trace}
+
+    # Issue #105: Use per-task workspace from GraphState, fall back to env var
+    ws_from_state = state.get("workspace_root", "")
+    workspace_root = Path(ws_from_state).resolve() if ws_from_state else _get_workspace_root()
+
+    # When the developer used filesystem_write tools, the clean code is already
+    # on disk. Reading from disk avoids re-parsing the LLM's summary text which
+    # often contains markdown fences and prose that corrupt the code.
+    # Trace review (task-ba22eafa) showed 2 wasted retries from this mismatch.
+    tool_calls_log = state.get("tool_calls_log", [])
+    wrote_via_tools = any(
+        tc.get("tool") == "filesystem_write" and tc.get("success")
+        for tc in tool_calls_log
+    )
+
+    if wrote_via_tools and blueprint.target_files:
+        disk_files = []
+        for target_path in blueprint.target_files:
+            full_path = workspace_root / target_path
+            if full_path.is_file():
+                try:
+                    content = full_path.read_text(encoding="utf-8")
+                    disk_files.append({"path": target_path, "content": content})
+                except Exception as e:
+                    logger.warning("[APPLY_CODE] Failed to read tool-written file %s: %s", target_path, e)
+                    trace.append(f"apply_code: failed to read {target_path} from disk -- {e}")
+
+        if disk_files:
+            total_chars = sum(len(f["content"]) for f in disk_files)
+            trace.append(f"apply_code: read {len(disk_files)} file(s) from disk (tool-written, {total_chars:,} chars total)")
+            logger.info("[APPLY_CODE] Read %d tool-written files (%d chars) from %s", len(disk_files), total_chars, workspace_root)
+            return {"parsed_files": disk_files, "trace": trace}
+
+        trace.append("apply_code: tool-written files not found on disk, falling back to parser")
+
+    # Fallback: parse generated_code (single-shot mode or disk read failed)
     try:
         parsed = parse_generated_code(generated_code)
     except CodeParserError as e:
@@ -468,9 +514,6 @@ def apply_code_node(state: GraphState) -> dict:
     if not parsed:
         trace.append("apply_code: parser returned no files")
         return {"parsed_files": [], "trace": trace}
-    # Issue #105: Use per-task workspace from GraphState, fall back to env var
-    ws_from_state = state.get("workspace_root", "")
-    workspace_root = Path(ws_from_state).resolve() if ws_from_state else _get_workspace_root()
     safe_files = validate_paths_for_workspace(parsed, workspace_root)
     if len(safe_files) < len(parsed):
         skipped = len(parsed) - len(safe_files)
@@ -671,18 +714,6 @@ def flush_memory_node(state: GraphState) -> dict:
 
 
 async def _publish_code_async(state: dict) -> dict:
-    """Push parsed files to a branch and open a PR.
-
-    This is an infrastructure node -- no LLM calls. It uses the
-    GitHubPRProvider directly via httpx for branch creation,
-    file push, and PR opening.
-
-    Guard chain (any condition skips gracefully):
-    1. No GITHUB_TOKEN configured
-    2. publish_pr is False (user opted out)
-    3. No parsed_files in state
-    4. No blueprint in state
-    """
     from .api.github_prs import github_pr_provider
 
     trace = list(state.get("trace", []))
@@ -724,13 +755,10 @@ async def _publish_code_async(state: dict) -> dict:
             return {"trace": trace}
 
         trace.append(f"publish_code: pushing {len(parsed_files)} file(s)")
-        # Compute repo-relative paths: if workspace_root is a subdirectory
-        # of the repo, prefix file paths so the PR targets correct locations.
         workspace_root = state.get("workspace_root", "")
         repo_subdir = ""
         if workspace_root:
             ws = Path(workspace_root).resolve()
-            # Walk up to find .git directory — that's the repo root
             for parent in [ws, *ws.parents]:
                 if (parent / ".git").exists():
                     try:
@@ -805,12 +833,7 @@ async def _publish_code_async(state: dict) -> dict:
 
 
 def publish_code_node(state: GraphState) -> dict:
-    """Push files to GitHub branch and open PR after QA passes.
-
-    Issue #89: This is an infrastructure node (no LLM calls).
-    Runs between QA-pass and flush_memory in the graph.
-    Uses _run_async() to bridge sync node execution to async GitHub API.
-    """
+    """Push files to GitHub branch and open PR after QA passes."""
     return _run_async(_publish_code_async(state))
 
 

diff --git a/dev-suite/tests/test_apply_code.py b/dev-suite/tests/test_apply_code.py
@@ -179,3 +179,115 @@ def test_status_not_changed(self, tmp_path):
 
         # status should not be in the result (unchanged)
         assert "status" not in result
+
+
+class TestApplyCodeDiskRead:
+    """Tests for apply_code_node's disk-read path when developer used tools."""
+
+    def _make_state(self, **overrides):
+        from src.agents.architect import Blueprint
+
+        base = {
+            "task_description": "test task",
+            "blueprint": Blueprint(
+                task_id="test-disk",
+                target_files=["app.py"],
+                instructions="build the app",
+                constraints=[],
+                acceptance_criteria=["runs"],
+            ),
+            "generated_code": "## Summary\n\nDone.\n\n# --- FILE: app.py ---\n```python\nprint('hello')\n```\n",
+            "failure_report": None,
+            "status": WorkflowStatus.BUILDING,
+            "retry_count": 0,
+            "tokens_used": 0,
+            "error_message": "",
+            "memory_context": [],
+            "memory_writes": [],
+            "trace": [],
+            "sandbox_result": None,
+            "parsed_files": [],
+            "tool_calls_log": [],
+        }
+        base.update(overrides)
+        return base
+
+    def test_prefers_disk_over_parser_when_tools_used(self, tmp_path):
+        """When filesystem_write was used, read files from disk instead of re-parsing."""
+        from src.orchestrator import apply_code_node
+
+        # Write clean code to disk (simulating what filesystem_write did)
+        (tmp_path / "app.py").write_text("print('hello from disk')\n")
+
+        # generated_code has fences that would corrupt parsing
+        state = self._make_state(
+            workspace_root=str(tmp_path),
+            tool_calls_log=[
+                {"agent": "developer", "turn": 1, "tool": "filesystem_write",
+                 "args_preview": "{}", "result_preview": "ok", "success": True},
+            ],
+        )
+
+        result = apply_code_node(state)
+
+        assert len(result["parsed_files"]) == 1
+        assert result["parsed_files"][0]["path"] == "app.py"
+        assert result["parsed_files"][0]["content"] == "print('hello from disk')\n"
+        assert any("tool-written" in t for t in result["trace"])
+
+    def test_falls_back_to_parser_without_tools(self, tmp_path):
+        """Without filesystem_write calls, uses the parser as before."""
+        from src.orchestrator import apply_code_node
+
+        state = self._make_state(
+            workspace_root=str(tmp_path),
+            generated_code="# --- FILE: app.py ---\nprint('from parser')\n",
+            tool_calls_log=[],
+        )
+
+        with patch("src.orchestrator._get_workspace_root", return_value=tmp_path):
+            result = apply_code_node(state)
+
+        assert len(result["parsed_files"]) == 1
+        assert "from parser" in result["parsed_files"][0]["content"]
+        assert not any("tool-written" in t for t in result["trace"])
+
+    def test_falls_back_when_disk_files_missing(self, tmp_path):
+        """If tool-written files aren't on disk, falls back to parser."""
+        from src.orchestrator import apply_code_node
+
+        # Don't write anything to disk
+        state = self._make_state(
+            workspace_root=str(tmp_path),
+            generated_code="# --- FILE: app.py ---\nprint('fallback')\n",
+            tool_calls_log=[
+                {"agent": "developer", "turn": 1, "tool": "filesystem_write",
+                 "args_preview": "{}", "result_preview": "ok", "success": True},
+            ],
+        )
+
+        result = apply_code_node(state)
+
+        assert len(result["parsed_files"]) == 1
+        assert "fallback" in result["parsed_files"][0]["content"]
+        assert any("falling back" in t for t in result["trace"])
+
+    def test_ignores_failed_filesystem_write(self, tmp_path):
+        """Failed filesystem_write calls don't trigger disk-read path."""
+        from src.orchestrator import apply_code_node
+
+        state = self._make_state(
+            workspace_root=str(tmp_path),
+            generated_code="# --- FILE: app.py ---\nprint('parsed')\n",
+            tool_calls_log=[
+                {"agent": "developer", "turn": 1, "tool": "filesystem_write",
+                 "args_preview": "{}", "result_preview": "Error", "success": False},
+            ],
+        )
+
+        with patch("src.orchestrator._get_workspace_root", return_value=tmp_path):
+            result = apply_code_node(state)
+
+        # Should use parser path since the write failed
+        assert len(result["parsed_files"]) == 1
+        assert "parsed" in result["parsed_files"][0]["content"]
diff --git a/dev-suite/tests/test_memory.py b/dev-suite/tests/test_memory.py
@@ -110,6 +110,36 @@ def test_query_filter_by_task_id(self, store):
         assert len(results_a) == 1
         assert results_a[0].task_id == "task-a"
 
+    def test_query_min_score_filters_irrelevant(self, store):
+        """min_score param filters out low-relevance entries."""
+        store.add_l1("Python triforce ASCII art pattern printer")
+        store.add_l1("JavaScript greet function with type hints")
+        # Query for triforce — greet entry should be dissimilar
+        all_results = store.query("triforce ASCII art", min_score=None)
+        assert len(all_results) == 2  # both returned without filter
+        # With a score threshold, only the relevant entry should survive
+        filtered = store.query("triforce ASCII art", min_score=0.3)
+        # The triforce entry should score higher than the greet entry
+        assert len(filtered) >= 1
+        assert any("triforce" in r.content.lower() for r in filtered)
+
+    def test_query_min_score_none_returns_all(self, store):
+        """min_score=None (default) returns all results like before."""
+        store.add_l1("Alpha entry about dogs")
+        store.add_l1("Beta entry about cats")
+        store.add_l1("Gamma entry about quantum physics")
+        results_none = store.query("dogs", min_score=None)
+        results_default = store.query("dogs")
+        assert len(results_none) == len(results_default)
+
+    def test_query_min_score_high_threshold(self, store):
+        """Very high min_score filters out most results."""
+        store.add_l1("Specific technical entry about Rust ownership")
+        store.add_l1("Completely unrelated cooking recipe for pasta")
+        results = store.query("Rust ownership borrow checker", min_score=0.99)
+        # At 0.99 threshold, almost nothing should match
+        assert len(results) <= 1
+
 
 class TestBackwardCompatibility:
     def test_add_and_query_l0_core(self, store):