docker · dgageot · Mar 11, 2026 · Mar 10, 2026 · Mar 11, 2026 · docker-agent
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"strings"
 	"sync"
 	"sync/atomic"
 
@@ -53,17 +54,28 @@ type UpdateTodosArgs struct {
 
 // Output types for JSON-structured responses.
 
+type CreateTodoOutput struct {
+	Created  Todo   `json:"created" jsonschema:"The created todo item"`
+	AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
+	Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
+}
+
 type CreateTodosOutput struct {
-	Created []Todo `json:"created" jsonschema:"List of created todo items"`
+	Created  []Todo `json:"created" jsonschema:"List of created todo items"`
+	AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
+	Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
 }
 
 type UpdateTodosOutput struct {
 	Updated  []TodoUpdate `json:"updated,omitempty" jsonschema:"List of successfully updated todos"`
 	NotFound []string     `json:"not_found,omitempty" jsonschema:"IDs of todos that were not found"`
+	AllTodos []Todo       `json:"all_todos" jsonschema:"Current state of all todo items"`
+	Reminder string       `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
 }
 
 type ListTodosOutput struct {
-	Todos []Todo `json:"todos" jsonschema:"List of all current todo items"`
+	Todos    []Todo `json:"todos" jsonschema:"List of all current todo items"`
+	Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
 }
 
 // TodoStorage defines the storage layer for todo items.
@@ -157,17 +169,20 @@ func (t *TodoTool) Instructions() string {
 IMPORTANT: You MUST use these tools to track the progress of your tasks:
 
 1. Before starting any complex task:
-	- Create a todo for each major step using create_todo
+	- Create a todo for each major step using create_todos (prefer batch creation)
 	- Break down complex steps into smaller todos
 
 2. While working:
+	- Update todo status to "in-progress" BEFORE starting each task
+	- Mark todos as "completed" IMMEDIATELY after finishing each task
 	- Use list_todos frequently to keep track of remaining work
-	- Mark todos as "completed" when finished
 
-3. Task Management Rules:
-	- Never start a new task without creating a todo for it
-	- Always check list_todos before responding to ensure no steps are missed
-	- Update todo status to reflect current progress
+3. Task Completion Rules:
+	- EVERY todo you create MUST eventually be marked "completed"
+	- Before sending your final response, call list_todos to verify ALL todos are completed
+	- If any todos remain pending or in-progress, complete them or mark them completed before responding
+	- Never leave todos in a pending or in-progress state when you are done working
+	- When updating multiple todos, batch them in a single update_todos call
 
 This toolset is REQUIRED for maintaining task state and ensuring all steps are completed.`
 }
@@ -196,15 +211,24 @@ func (h *todoHandler) jsonResult(v any) (*tools.ToolCallResult, error) {
 }
 
 func (h *todoHandler) createTodo(_ context.Context, params CreateTodoArgs) (*tools.ToolCallResult, error) {
-	return h.jsonResult(h.addTodo(params.Description))
+	created := h.addTodo(params.Description)
+	return h.jsonResult(CreateTodoOutput{
+		Created:  created,
+		AllTodos: h.storage.All(),
+		Reminder: h.incompleteReminder(),
+	})
 }
 
 func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*tools.ToolCallResult, error) {
 	created := make([]Todo, 0, len(params.Descriptions))
 	for _, desc := range params.Descriptions {
 		created = append(created, h.addTodo(desc))
 	}
-	return h.jsonResult(CreateTodosOutput{Created: created})
+	return h.jsonResult(CreateTodosOutput{
+		Created:  created,
+		AllTodos: h.storage.All(),
+		Reminder: h.incompleteReminder(),
+	})
 }
 
 func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*tools.ToolCallResult, error) {
@@ -233,32 +257,48 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t
 		return res, nil
 	}
 
-	if h.allCompleted() {
-		h.storage.Clear()
-	}
+	result.AllTodos = h.storage.All()
+	result.Reminder = h.incompleteReminder()
 
 	return h.jsonResult(result)
 }
 
-func (h *todoHandler) allCompleted() bool {
+// incompleteReminder returns a reminder string listing any non-completed todos,
+// or an empty string if all are completed (or storage is empty).
+func (h *todoHandler) incompleteReminder() string {
 	all := h.storage.All()
-	if len(all) == 0 {
-		return false
-	}
+	var pending, inProgress []string
 	for _, todo := range all {
-		if todo.Status != "completed" {
-			return false
+		switch todo.Status {
+		case "pending":
+			pending = append(pending, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
+		case "in-progress":
+			inProgress = append(inProgress, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
 		}
 	}
-	return true
+	if len(pending) == 0 && len(inProgress) == 0 {
+		return ""
+	}
+
+	var b strings.Builder
+	b.WriteString("The following todos are still incomplete and MUST be completed:")
+	for _, s := range inProgress {
+		b.WriteString(" (in-progress) " + s)
+	}
+	for _, s := range pending {
+		b.WriteString(" (pending) " + s)
+	}
+	return b.String()
 }
 
 func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.ToolCallResult, error) {
 	todos := h.storage.All()
 	if todos == nil {
 		todos = []Todo{}
 	}
-	return h.jsonResult(ListTodosOutput{Todos: todos})
+	out := ListTodosOutput{Todos: todos}
+	out.Reminder = h.incompleteReminder()
+	return h.jsonResult(out)
 }
 
 func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) {
@@ -268,7 +308,7 @@ func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) {
 			Category:     "todo",
 			Description:  "Create a new todo item with a description",
 			Parameters:   tools.MustSchemaFor[CreateTodoArgs](),
-			OutputSchema: tools.MustSchemaFor[Todo](),
+			OutputSchema: tools.MustSchemaFor[CreateTodoOutput](),
 			Handler:      tools.NewHandler(t.handler.createTodo),
 			Annotations: tools.ToolAnnotations{
 				Title:        "Create TODO",

@@ -31,11 +31,16 @@ func TestTodoTool_CreateTodo(t *testing.T) {
 	})
 	require.NoError(t, err)
 
-	var output Todo
+	var output CreateTodoOutput
 	require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
-	assert.Equal(t, "todo_1", output.ID)
-	assert.Equal(t, "Test todo item", output.Description)
-	assert.Equal(t, "pending", output.Status)
+	assert.Equal(t, "todo_1", output.Created.ID)
+	assert.Equal(t, "Test todo item", output.Created.Description)
+	assert.Equal(t, "pending", output.Created.Status)
+
+	// Full state is included in the response
+	require.Len(t, output.AllTodos, 1)
+	assert.Equal(t, "todo_1", output.AllTodos[0].ID)
+	assert.Contains(t, output.Reminder, "todo_1")
 
 	require.Equal(t, 1, storage.Len())
 	requireMeta(t, result, 1)
@@ -59,10 +64,16 @@ func TestTodoTool_CreateTodos(t *testing.T) {
 	assert.Equal(t, "todo_2", output.Created[1].ID)
 	assert.Equal(t, "todo_3", output.Created[2].ID)
 
+	// Full state included in response
+	require.Len(t, output.AllTodos, 3)
+	assert.Contains(t, output.Reminder, "todo_1")
+	assert.Contains(t, output.Reminder, "todo_2")
+	assert.Contains(t, output.Reminder, "todo_3")
+
 	assert.Equal(t, 3, storage.Len())
 	requireMeta(t, result, 3)
 
-	// A second call continues the ID sequence
+	// A second call continues the ID sequence and includes all 4 items
 	result, err = tool.handler.createTodos(t.Context(), CreateTodosArgs{
 		Descriptions: []string{"Last"},
 	})
@@ -71,6 +82,7 @@ func TestTodoTool_CreateTodos(t *testing.T) {
 	require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
 	require.Len(t, output.Created, 1)
 	assert.Equal(t, "todo_4", output.Created[0].ID)
+	require.Len(t, output.AllTodos, 4)
 	assert.Equal(t, 4, storage.Len())
 	requireMeta(t, result, 4)
 }
@@ -95,9 +107,28 @@ func TestTodoTool_ListTodos(t *testing.T) {
 		assert.Equal(t, "pending", output.Todos[i].Status)
 	}
 
+	// All pending, so reminder should list all of them
+	assert.Contains(t, output.Reminder, "todo_1")
+	assert.Contains(t, output.Reminder, "todo_2")
+	assert.Contains(t, output.Reminder, "todo_3")
+
 	requireMeta(t, result, 3)
 }
 
+func TestTodoTool_ListTodos_Empty(t *testing.T) {
+	tool := NewTodoTool()
+
+	result, err := tool.handler.listTodos(t.Context(), tools.ToolCall{})
+	require.NoError(t, err)
+
+	var output ListTodosOutput
+	require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
+	assert.Empty(t, output.Todos)
+	assert.Empty(t, output.Reminder)
+
+	requireMeta(t, result, 0)
+}
+
 func TestTodoTool_UpdateTodos(t *testing.T) {
 	storage := NewMemoryTodoStorage()
 	tool := NewTodoTool(WithStorage(storage))
@@ -125,6 +156,17 @@ func TestTodoTool_UpdateTodos(t *testing.T) {
 	assert.Equal(t, "in-progress", output.Updated[1].Status)
 	assert.Empty(t, output.NotFound)
 
+	// Full state included in response
+	require.Len(t, output.AllTodos, 3)
+	assert.Equal(t, "completed", output.AllTodos[0].Status)
+	assert.Equal(t, "pending", output.AllTodos[1].Status)
+	assert.Equal(t, "in-progress", output.AllTodos[2].Status)
+
+	// Reminder should list incomplete todos
+	assert.Contains(t, output.Reminder, "todo_2")
+	assert.Contains(t, output.Reminder, "todo_3")
+	assert.NotContains(t, output.Reminder, "todo_1") // completed, should not appear
+
 	todos := storage.All()
 	require.Len(t, todos, 3)
 	assert.Equal(t, "completed", todos[0].Status)
@@ -159,6 +201,9 @@ func TestTodoTool_UpdateTodos_PartialFailure(t *testing.T) {
 	require.Len(t, output.NotFound, 1)
 	assert.Equal(t, "nonexistent", output.NotFound[0])
 
+	// Reminder should mention the still-pending todo
+	assert.Contains(t, output.Reminder, "todo_2")
+
 	todos := storage.All()
 	require.Len(t, todos, 2)
 	assert.Equal(t, "completed", todos[0].Status)
@@ -185,7 +230,7 @@ func TestTodoTool_UpdateTodos_AllNotFound(t *testing.T) {
 	assert.Equal(t, "nonexistent2", output.NotFound[1])
 }
 
-func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) {
+func TestTodoTool_UpdateTodos_AllCompleted_NoAutoRemoval(t *testing.T) {
 	storage := NewMemoryTodoStorage()
 	tool := NewTodoTool(WithStorage(storage))
 
@@ -205,9 +250,16 @@ func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) {
 	var output UpdateTodosOutput
 	require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
 	require.Len(t, output.Updated, 2)
+	assert.Empty(t, output.Reminder) // no reminder when all completed
 
-	assert.Empty(t, storage.All())
-	requireMeta(t, result, 0)
+	// Full state shows both items as completed
+	require.Len(t, output.AllTodos, 2)
+	assert.Equal(t, "completed", output.AllTodos[0].Status)
+	assert.Equal(t, "completed", output.AllTodos[1].Status)
+
+	// Todos remain in storage (no auto-clear on completion)
+	assert.Equal(t, 2, storage.Len())
+	requireMeta(t, result, 2)
 }
 
 func TestTodoTool_WithStorage(t *testing.T) {
@@ -254,6 +306,55 @@ func TestTodoTool_ParametersAreObjects(t *testing.T) {
 	}
 }
 
+func TestTodoTool_CreateTodo_FullStateOutput(t *testing.T) {
+	tool := NewTodoTool()
+
+	// Create first todo
+	result1, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "First"})
+	require.NoError(t, err)
+	var out1 CreateTodoOutput
+	require.NoError(t, json.Unmarshal([]byte(result1.Output), &out1))
+	require.Len(t, out1.AllTodos, 1)
+	assert.Contains(t, out1.Reminder, "todo_1")
+
+	// Create second todo — response shows both
+	result2, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "Second"})
+	require.NoError(t, err)
+	var out2 CreateTodoOutput
+	require.NoError(t, json.Unmarshal([]byte(result2.Output), &out2))
+	require.Len(t, out2.AllTodos, 2)
+	assert.Contains(t, out2.Reminder, "todo_1")
+	assert.Contains(t, out2.Reminder, "todo_2")
+}
+
+func TestTodoTool_UpdateTodos_FullStateOutput(t *testing.T) {
+	tool := NewTodoTool()
+
+	_, err := tool.handler.createTodos(t.Context(), CreateTodosArgs{
+		Descriptions: []string{"A", "B", "C"},
+	})
+	require.NoError(t, err)
+
+	result, err := tool.handler.updateTodos(t.Context(), UpdateTodosArgs{
+		Updates: []TodoUpdate{{ID: "todo_1", Status: "completed"}},
+	})
+	require.NoError(t, err)
+
+	var output UpdateTodosOutput
+	require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
+
+	// AllTodos shows full state including the completed item
+	require.Len(t, output.AllTodos, 3)
+	assert.Equal(t, "completed", output.AllTodos[0].Status)
+	assert.Equal(t, "pending", output.AllTodos[1].Status)
+	assert.Equal(t, "pending", output.AllTodos[2].Status)
+
+	// Reminder only lists incomplete items
+	assert.NotContains(t, output.Reminder, "todo_1")
+	assert.Contains(t, output.Reminder, "todo_2")
+	assert.Contains(t, output.Reminder, "todo_3")
+}
+
 // requireMeta asserts that result.Meta is a []Todo of the expected length.
 func requireMeta(t *testing.T, result *tools.ToolCallResult, expectedLen int) {
 	t.Helper()