diff --git a/pkg/tools/builtin/todo.go b/pkg/tools/builtin/todo.go index 0cf1b55fb..14d9a6bf9 100644 --- a/pkg/tools/builtin/todo.go +++ b/pkg/tools/builtin/todo.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "strings" "sync" "sync/atomic" @@ -53,17 +54,28 @@ type UpdateTodosArgs struct { // Output types for JSON-structured responses. +type CreateTodoOutput struct { + Created Todo `json:"created" jsonschema:"The created todo item"` + AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"` + Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"` +} + type CreateTodosOutput struct { - Created []Todo `json:"created" jsonschema:"List of created todo items"` + Created []Todo `json:"created" jsonschema:"List of created todo items"` + AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"` + Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"` } type UpdateTodosOutput struct { Updated []TodoUpdate `json:"updated,omitempty" jsonschema:"List of successfully updated todos"` NotFound []string `json:"not_found,omitempty" jsonschema:"IDs of todos that were not found"` + AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"` + Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"` } type ListTodosOutput struct { - Todos []Todo `json:"todos" jsonschema:"List of all current todo items"` + Todos []Todo `json:"todos" jsonschema:"List of all current todo items"` + Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"` } // TodoStorage defines the storage layer for todo items. @@ -157,17 +169,20 @@ func (t *TodoTool) Instructions() string { IMPORTANT: You MUST use these tools to track the progress of your tasks: 1. Before starting any complex task: - - Create a todo for each major step using create_todo + - Create a todo for each major step using create_todos (prefer batch creation) - Break down complex steps into smaller todos 2. While working: + - Update todo status to "in-progress" BEFORE starting each task + - Mark todos as "completed" IMMEDIATELY after finishing each task - Use list_todos frequently to keep track of remaining work - - Mark todos as "completed" when finished -3. Task Management Rules: - - Never start a new task without creating a todo for it - - Always check list_todos before responding to ensure no steps are missed - - Update todo status to reflect current progress +3. Task Completion Rules: + - EVERY todo you create MUST eventually be marked "completed" + - Before sending your final response, call list_todos to verify ALL todos are completed + - If any todos remain pending or in-progress, complete them or mark them completed before responding + - Never leave todos in a pending or in-progress state when you are done working + - When updating multiple todos, batch them in a single update_todos call This toolset is REQUIRED for maintaining task state and ensuring all steps are completed.` } @@ -196,7 +211,12 @@ func (h *todoHandler) jsonResult(v any) (*tools.ToolCallResult, error) { } func (h *todoHandler) createTodo(_ context.Context, params CreateTodoArgs) (*tools.ToolCallResult, error) { - return h.jsonResult(h.addTodo(params.Description)) + created := h.addTodo(params.Description) + return h.jsonResult(CreateTodoOutput{ + Created: created, + AllTodos: h.storage.All(), + Reminder: h.incompleteReminder(), + }) } func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*tools.ToolCallResult, error) { @@ -204,7 +224,11 @@ func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*t for _, desc := range params.Descriptions { created = append(created, h.addTodo(desc)) } - return h.jsonResult(CreateTodosOutput{Created: created}) + return h.jsonResult(CreateTodosOutput{ + Created: created, + AllTodos: h.storage.All(), + Reminder: h.incompleteReminder(), + }) } func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*tools.ToolCallResult, error) { @@ -233,24 +257,38 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t return res, nil } - if h.allCompleted() { - h.storage.Clear() - } + result.AllTodos = h.storage.All() + result.Reminder = h.incompleteReminder() return h.jsonResult(result) } -func (h *todoHandler) allCompleted() bool { +// incompleteReminder returns a reminder string listing any non-completed todos, +// or an empty string if all are completed (or storage is empty). +func (h *todoHandler) incompleteReminder() string { all := h.storage.All() - if len(all) == 0 { - return false - } + var pending, inProgress []string for _, todo := range all { - if todo.Status != "completed" { - return false + switch todo.Status { + case "pending": + pending = append(pending, fmt.Sprintf("[%s] %s", todo.ID, todo.Description)) + case "in-progress": + inProgress = append(inProgress, fmt.Sprintf("[%s] %s", todo.ID, todo.Description)) } } - return true + if len(pending) == 0 && len(inProgress) == 0 { + return "" + } + + var b strings.Builder + b.WriteString("The following todos are still incomplete and MUST be completed:") + for _, s := range inProgress { + b.WriteString(" (in-progress) " + s) + } + for _, s := range pending { + b.WriteString(" (pending) " + s) + } + return b.String() } func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.ToolCallResult, error) { @@ -258,7 +296,9 @@ func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.Too if todos == nil { todos = []Todo{} } - return h.jsonResult(ListTodosOutput{Todos: todos}) + out := ListTodosOutput{Todos: todos} + out.Reminder = h.incompleteReminder() + return h.jsonResult(out) } func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) { @@ -268,7 +308,7 @@ func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) { Category: "todo", Description: "Create a new todo item with a description", Parameters: tools.MustSchemaFor[CreateTodoArgs](), - OutputSchema: tools.MustSchemaFor[Todo](), + OutputSchema: tools.MustSchemaFor[CreateTodoOutput](), Handler: tools.NewHandler(t.handler.createTodo), Annotations: tools.ToolAnnotations{ Title: "Create TODO", diff --git a/pkg/tools/builtin/todo_test.go b/pkg/tools/builtin/todo_test.go index 6b72f7cf5..b17da8103 100644 --- a/pkg/tools/builtin/todo_test.go +++ b/pkg/tools/builtin/todo_test.go @@ -31,11 +31,16 @@ func TestTodoTool_CreateTodo(t *testing.T) { }) require.NoError(t, err) - var output Todo + var output CreateTodoOutput require.NoError(t, json.Unmarshal([]byte(result.Output), &output)) - assert.Equal(t, "todo_1", output.ID) - assert.Equal(t, "Test todo item", output.Description) - assert.Equal(t, "pending", output.Status) + assert.Equal(t, "todo_1", output.Created.ID) + assert.Equal(t, "Test todo item", output.Created.Description) + assert.Equal(t, "pending", output.Created.Status) + + // Full state is included in the response + require.Len(t, output.AllTodos, 1) + assert.Equal(t, "todo_1", output.AllTodos[0].ID) + assert.Contains(t, output.Reminder, "todo_1") require.Equal(t, 1, storage.Len()) requireMeta(t, result, 1) @@ -59,10 +64,16 @@ func TestTodoTool_CreateTodos(t *testing.T) { assert.Equal(t, "todo_2", output.Created[1].ID) assert.Equal(t, "todo_3", output.Created[2].ID) + // Full state included in response + require.Len(t, output.AllTodos, 3) + assert.Contains(t, output.Reminder, "todo_1") + assert.Contains(t, output.Reminder, "todo_2") + assert.Contains(t, output.Reminder, "todo_3") + assert.Equal(t, 3, storage.Len()) requireMeta(t, result, 3) - // A second call continues the ID sequence + // A second call continues the ID sequence and includes all 4 items result, err = tool.handler.createTodos(t.Context(), CreateTodosArgs{ Descriptions: []string{"Last"}, }) @@ -71,6 +82,7 @@ func TestTodoTool_CreateTodos(t *testing.T) { require.NoError(t, json.Unmarshal([]byte(result.Output), &output)) require.Len(t, output.Created, 1) assert.Equal(t, "todo_4", output.Created[0].ID) + require.Len(t, output.AllTodos, 4) assert.Equal(t, 4, storage.Len()) requireMeta(t, result, 4) } @@ -95,9 +107,28 @@ func TestTodoTool_ListTodos(t *testing.T) { assert.Equal(t, "pending", output.Todos[i].Status) } + // All pending, so reminder should list all of them + assert.Contains(t, output.Reminder, "todo_1") + assert.Contains(t, output.Reminder, "todo_2") + assert.Contains(t, output.Reminder, "todo_3") + requireMeta(t, result, 3) } +func TestTodoTool_ListTodos_Empty(t *testing.T) { + tool := NewTodoTool() + + result, err := tool.handler.listTodos(t.Context(), tools.ToolCall{}) + require.NoError(t, err) + + var output ListTodosOutput + require.NoError(t, json.Unmarshal([]byte(result.Output), &output)) + assert.Empty(t, output.Todos) + assert.Empty(t, output.Reminder) + + requireMeta(t, result, 0) +} + func TestTodoTool_UpdateTodos(t *testing.T) { storage := NewMemoryTodoStorage() tool := NewTodoTool(WithStorage(storage)) @@ -125,6 +156,17 @@ func TestTodoTool_UpdateTodos(t *testing.T) { assert.Equal(t, "in-progress", output.Updated[1].Status) assert.Empty(t, output.NotFound) + // Full state included in response + require.Len(t, output.AllTodos, 3) + assert.Equal(t, "completed", output.AllTodos[0].Status) + assert.Equal(t, "pending", output.AllTodos[1].Status) + assert.Equal(t, "in-progress", output.AllTodos[2].Status) + + // Reminder should list incomplete todos + assert.Contains(t, output.Reminder, "todo_2") + assert.Contains(t, output.Reminder, "todo_3") + assert.NotContains(t, output.Reminder, "todo_1") // completed, should not appear + todos := storage.All() require.Len(t, todos, 3) assert.Equal(t, "completed", todos[0].Status) @@ -159,6 +201,9 @@ func TestTodoTool_UpdateTodos_PartialFailure(t *testing.T) { require.Len(t, output.NotFound, 1) assert.Equal(t, "nonexistent", output.NotFound[0]) + // Reminder should mention the still-pending todo + assert.Contains(t, output.Reminder, "todo_2") + todos := storage.All() require.Len(t, todos, 2) assert.Equal(t, "completed", todos[0].Status) @@ -185,7 +230,7 @@ func TestTodoTool_UpdateTodos_AllNotFound(t *testing.T) { assert.Equal(t, "nonexistent2", output.NotFound[1]) } -func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) { +func TestTodoTool_UpdateTodos_AllCompleted_NoAutoRemoval(t *testing.T) { storage := NewMemoryTodoStorage() tool := NewTodoTool(WithStorage(storage)) @@ -205,9 +250,16 @@ func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) { var output UpdateTodosOutput require.NoError(t, json.Unmarshal([]byte(result.Output), &output)) require.Len(t, output.Updated, 2) + assert.Empty(t, output.Reminder) // no reminder when all completed - assert.Empty(t, storage.All()) - requireMeta(t, result, 0) + // Full state shows both items as completed + require.Len(t, output.AllTodos, 2) + assert.Equal(t, "completed", output.AllTodos[0].Status) + assert.Equal(t, "completed", output.AllTodos[1].Status) + + // Todos remain in storage (no auto-clear on completion) + assert.Equal(t, 2, storage.Len()) + requireMeta(t, result, 2) } func TestTodoTool_WithStorage(t *testing.T) { @@ -254,6 +306,55 @@ func TestTodoTool_ParametersAreObjects(t *testing.T) { } } +func TestTodoTool_CreateTodo_FullStateOutput(t *testing.T) { + tool := NewTodoTool() + + // Create first todo + result1, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "First"}) + require.NoError(t, err) + var out1 CreateTodoOutput + require.NoError(t, json.Unmarshal([]byte(result1.Output), &out1)) + require.Len(t, out1.AllTodos, 1) + assert.Contains(t, out1.Reminder, "todo_1") + + // Create second todo — response shows both + result2, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "Second"}) + require.NoError(t, err) + var out2 CreateTodoOutput + require.NoError(t, json.Unmarshal([]byte(result2.Output), &out2)) + require.Len(t, out2.AllTodos, 2) + assert.Contains(t, out2.Reminder, "todo_1") + assert.Contains(t, out2.Reminder, "todo_2") +} + +func TestTodoTool_UpdateTodos_FullStateOutput(t *testing.T) { + tool := NewTodoTool() + + _, err := tool.handler.createTodos(t.Context(), CreateTodosArgs{ + Descriptions: []string{"A", "B", "C"}, + }) + require.NoError(t, err) + + result, err := tool.handler.updateTodos(t.Context(), UpdateTodosArgs{ + Updates: []TodoUpdate{{ID: "todo_1", Status: "completed"}}, + }) + require.NoError(t, err) + + var output UpdateTodosOutput + require.NoError(t, json.Unmarshal([]byte(result.Output), &output)) + + // AllTodos shows full state including the completed item + require.Len(t, output.AllTodos, 3) + assert.Equal(t, "completed", output.AllTodos[0].Status) + assert.Equal(t, "pending", output.AllTodos[1].Status) + assert.Equal(t, "pending", output.AllTodos[2].Status) + + // Reminder only lists incomplete items + assert.NotContains(t, output.Reminder, "todo_1") + assert.Contains(t, output.Reminder, "todo_2") + assert.Contains(t, output.Reminder, "todo_3") +} + // requireMeta asserts that result.Meta is a []Todo of the expected length. func requireMeta(t *testing.T, result *tools.ToolCallResult, expectedLen int) { t.Helper()