Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 62 additions & 22 deletions pkg/tools/builtin/todo.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"strings"
"sync"
"sync/atomic"

Expand Down Expand Up @@ -53,17 +54,28 @@ type UpdateTodosArgs struct {

// Output types for JSON-structured responses.

type CreateTodoOutput struct {
Created Todo `json:"created" jsonschema:"The created todo item"`
AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
}

type CreateTodosOutput struct {
Created []Todo `json:"created" jsonschema:"List of created todo items"`
Created []Todo `json:"created" jsonschema:"List of created todo items"`
AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
}

type UpdateTodosOutput struct {
Updated []TodoUpdate `json:"updated,omitempty" jsonschema:"List of successfully updated todos"`
NotFound []string `json:"not_found,omitempty" jsonschema:"IDs of todos that were not found"`
AllTodos []Todo `json:"all_todos" jsonschema:"Current state of all todo items"`
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
}

type ListTodosOutput struct {
Todos []Todo `json:"todos" jsonschema:"List of all current todo items"`
Todos []Todo `json:"todos" jsonschema:"List of all current todo items"`
Reminder string `json:"reminder,omitempty" jsonschema:"Reminder about incomplete todos that still need to be completed"`
}

// TodoStorage defines the storage layer for todo items.
Expand Down Expand Up @@ -157,17 +169,20 @@ func (t *TodoTool) Instructions() string {
IMPORTANT: You MUST use these tools to track the progress of your tasks:

1. Before starting any complex task:
- Create a todo for each major step using create_todo
- Create a todo for each major step using create_todos (prefer batch creation)
- Break down complex steps into smaller todos

2. While working:
- Update todo status to "in-progress" BEFORE starting each task
- Mark todos as "completed" IMMEDIATELY after finishing each task
- Use list_todos frequently to keep track of remaining work
- Mark todos as "completed" when finished

3. Task Management Rules:
- Never start a new task without creating a todo for it
- Always check list_todos before responding to ensure no steps are missed
- Update todo status to reflect current progress
3. Task Completion Rules:
- EVERY todo you create MUST eventually be marked "completed"
- Before sending your final response, call list_todos to verify ALL todos are completed
- If any todos remain pending or in-progress, complete them or mark them completed before responding
- Never leave todos in a pending or in-progress state when you are done working
- When updating multiple todos, batch them in a single update_todos call

This toolset is REQUIRED for maintaining task state and ensuring all steps are completed.`
}
Expand Down Expand Up @@ -196,15 +211,24 @@ func (h *todoHandler) jsonResult(v any) (*tools.ToolCallResult, error) {
}

func (h *todoHandler) createTodo(_ context.Context, params CreateTodoArgs) (*tools.ToolCallResult, error) {
return h.jsonResult(h.addTodo(params.Description))
created := h.addTodo(params.Description)
return h.jsonResult(CreateTodoOutput{
Created: created,
AllTodos: h.storage.All(),
Reminder: h.incompleteReminder(),
})
}

func (h *todoHandler) createTodos(_ context.Context, params CreateTodosArgs) (*tools.ToolCallResult, error) {
created := make([]Todo, 0, len(params.Descriptions))
for _, desc := range params.Descriptions {
created = append(created, h.addTodo(desc))
}
return h.jsonResult(CreateTodosOutput{Created: created})
return h.jsonResult(CreateTodosOutput{
Created: created,
AllTodos: h.storage.All(),
Reminder: h.incompleteReminder(),
})
}

func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*tools.ToolCallResult, error) {
Expand Down Expand Up @@ -233,32 +257,48 @@ func (h *todoHandler) updateTodos(_ context.Context, params UpdateTodosArgs) (*t
return res, nil
}

if h.allCompleted() {
h.storage.Clear()
}
result.AllTodos = h.storage.All()
result.Reminder = h.incompleteReminder()

return h.jsonResult(result)
}

func (h *todoHandler) allCompleted() bool {
// incompleteReminder returns a reminder string listing any non-completed todos,
// or an empty string if all are completed (or storage is empty).
func (h *todoHandler) incompleteReminder() string {
all := h.storage.All()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[LOW] Minor inefficiency: storage.All() called twice

When incompleteReminder() is called, allCompleted() has already called h.storage.All() to check completion status. This results in iterating the storage twice. Consider refactoring to pass the slice or cache the result.

This doesn't affect correctness, just performance (minimal impact in practice given todo lists are typically small).

if len(all) == 0 {
return false
}
var pending, inProgress []string
for _, todo := range all {
if todo.Status != "completed" {
return false
switch todo.Status {
case "pending":
pending = append(pending, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
case "in-progress":
inProgress = append(inProgress, fmt.Sprintf("[%s] %s", todo.ID, todo.Description))
}
}
return true
if len(pending) == 0 && len(inProgress) == 0 {
return ""
}

var b strings.Builder
b.WriteString("The following todos are still incomplete and MUST be completed:")
for _, s := range inProgress {
b.WriteString(" (in-progress) " + s)
}
for _, s := range pending {
b.WriteString(" (pending) " + s)
}
return b.String()
}

func (h *todoHandler) listTodos(_ context.Context, _ tools.ToolCall) (*tools.ToolCallResult, error) {
todos := h.storage.All()
if todos == nil {
todos = []Todo{}
}
return h.jsonResult(ListTodosOutput{Todos: todos})
out := ListTodosOutput{Todos: todos}
out.Reminder = h.incompleteReminder()
return h.jsonResult(out)
}

func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) {
Expand All @@ -268,7 +308,7 @@ func (t *TodoTool) Tools(context.Context) ([]tools.Tool, error) {
Category: "todo",
Description: "Create a new todo item with a description",
Parameters: tools.MustSchemaFor[CreateTodoArgs](),
OutputSchema: tools.MustSchemaFor[Todo](),
OutputSchema: tools.MustSchemaFor[CreateTodoOutput](),
Handler: tools.NewHandler(t.handler.createTodo),
Annotations: tools.ToolAnnotations{
Title: "Create TODO",
Expand Down
117 changes: 109 additions & 8 deletions pkg/tools/builtin/todo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,16 @@ func TestTodoTool_CreateTodo(t *testing.T) {
})
require.NoError(t, err)

var output Todo
var output CreateTodoOutput
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
assert.Equal(t, "todo_1", output.ID)
assert.Equal(t, "Test todo item", output.Description)
assert.Equal(t, "pending", output.Status)
assert.Equal(t, "todo_1", output.Created.ID)
assert.Equal(t, "Test todo item", output.Created.Description)
assert.Equal(t, "pending", output.Created.Status)

// Full state is included in the response
require.Len(t, output.AllTodos, 1)
assert.Equal(t, "todo_1", output.AllTodos[0].ID)
assert.Contains(t, output.Reminder, "todo_1")

require.Equal(t, 1, storage.Len())
requireMeta(t, result, 1)
Expand All @@ -59,10 +64,16 @@ func TestTodoTool_CreateTodos(t *testing.T) {
assert.Equal(t, "todo_2", output.Created[1].ID)
assert.Equal(t, "todo_3", output.Created[2].ID)

// Full state included in response
require.Len(t, output.AllTodos, 3)
assert.Contains(t, output.Reminder, "todo_1")
assert.Contains(t, output.Reminder, "todo_2")
assert.Contains(t, output.Reminder, "todo_3")

assert.Equal(t, 3, storage.Len())
requireMeta(t, result, 3)

// A second call continues the ID sequence
// A second call continues the ID sequence and includes all 4 items
result, err = tool.handler.createTodos(t.Context(), CreateTodosArgs{
Descriptions: []string{"Last"},
})
Expand All @@ -71,6 +82,7 @@ func TestTodoTool_CreateTodos(t *testing.T) {
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
require.Len(t, output.Created, 1)
assert.Equal(t, "todo_4", output.Created[0].ID)
require.Len(t, output.AllTodos, 4)
assert.Equal(t, 4, storage.Len())
requireMeta(t, result, 4)
}
Expand All @@ -95,9 +107,28 @@ func TestTodoTool_ListTodos(t *testing.T) {
assert.Equal(t, "pending", output.Todos[i].Status)
}

// All pending, so reminder should list all of them
assert.Contains(t, output.Reminder, "todo_1")
assert.Contains(t, output.Reminder, "todo_2")
assert.Contains(t, output.Reminder, "todo_3")

requireMeta(t, result, 3)
}

func TestTodoTool_ListTodos_Empty(t *testing.T) {
tool := NewTodoTool()

result, err := tool.handler.listTodos(t.Context(), tools.ToolCall{})
require.NoError(t, err)

var output ListTodosOutput
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
assert.Empty(t, output.Todos)
assert.Empty(t, output.Reminder)

requireMeta(t, result, 0)
}

func TestTodoTool_UpdateTodos(t *testing.T) {
storage := NewMemoryTodoStorage()
tool := NewTodoTool(WithStorage(storage))
Expand Down Expand Up @@ -125,6 +156,17 @@ func TestTodoTool_UpdateTodos(t *testing.T) {
assert.Equal(t, "in-progress", output.Updated[1].Status)
assert.Empty(t, output.NotFound)

// Full state included in response
require.Len(t, output.AllTodos, 3)
assert.Equal(t, "completed", output.AllTodos[0].Status)
assert.Equal(t, "pending", output.AllTodos[1].Status)
assert.Equal(t, "in-progress", output.AllTodos[2].Status)

// Reminder should list incomplete todos
assert.Contains(t, output.Reminder, "todo_2")
assert.Contains(t, output.Reminder, "todo_3")
assert.NotContains(t, output.Reminder, "todo_1") // completed, should not appear

todos := storage.All()
require.Len(t, todos, 3)
assert.Equal(t, "completed", todos[0].Status)
Expand Down Expand Up @@ -159,6 +201,9 @@ func TestTodoTool_UpdateTodos_PartialFailure(t *testing.T) {
require.Len(t, output.NotFound, 1)
assert.Equal(t, "nonexistent", output.NotFound[0])

// Reminder should mention the still-pending todo
assert.Contains(t, output.Reminder, "todo_2")

todos := storage.All()
require.Len(t, todos, 2)
assert.Equal(t, "completed", todos[0].Status)
Expand All @@ -185,7 +230,7 @@ func TestTodoTool_UpdateTodos_AllNotFound(t *testing.T) {
assert.Equal(t, "nonexistent2", output.NotFound[1])
}

func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) {
func TestTodoTool_UpdateTodos_AllCompleted_NoAutoRemoval(t *testing.T) {
storage := NewMemoryTodoStorage()
tool := NewTodoTool(WithStorage(storage))

Expand All @@ -205,9 +250,16 @@ func TestTodoTool_UpdateTodos_ClearsWhenAllCompleted(t *testing.T) {
var output UpdateTodosOutput
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))
require.Len(t, output.Updated, 2)
assert.Empty(t, output.Reminder) // no reminder when all completed

assert.Empty(t, storage.All())
requireMeta(t, result, 0)
// Full state shows both items as completed
require.Len(t, output.AllTodos, 2)
assert.Equal(t, "completed", output.AllTodos[0].Status)
assert.Equal(t, "completed", output.AllTodos[1].Status)

// Todos remain in storage (no auto-clear on completion)
assert.Equal(t, 2, storage.Len())
requireMeta(t, result, 2)
}

func TestTodoTool_WithStorage(t *testing.T) {
Expand Down Expand Up @@ -254,6 +306,55 @@ func TestTodoTool_ParametersAreObjects(t *testing.T) {
}
}

func TestTodoTool_CreateTodo_FullStateOutput(t *testing.T) {
tool := NewTodoTool()

// Create first todo
result1, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "First"})
require.NoError(t, err)
var out1 CreateTodoOutput
require.NoError(t, json.Unmarshal([]byte(result1.Output), &out1))
require.Len(t, out1.AllTodos, 1)
assert.Contains(t, out1.Reminder, "todo_1")

// Create second todo — response shows both
result2, err := tool.handler.createTodo(t.Context(), CreateTodoArgs{Description: "Second"})
require.NoError(t, err)
var out2 CreateTodoOutput
require.NoError(t, json.Unmarshal([]byte(result2.Output), &out2))
require.Len(t, out2.AllTodos, 2)
assert.Contains(t, out2.Reminder, "todo_1")
assert.Contains(t, out2.Reminder, "todo_2")
}

func TestTodoTool_UpdateTodos_FullStateOutput(t *testing.T) {
tool := NewTodoTool()

_, err := tool.handler.createTodos(t.Context(), CreateTodosArgs{
Descriptions: []string{"A", "B", "C"},
})
require.NoError(t, err)

result, err := tool.handler.updateTodos(t.Context(), UpdateTodosArgs{
Updates: []TodoUpdate{{ID: "todo_1", Status: "completed"}},
})
require.NoError(t, err)

var output UpdateTodosOutput
require.NoError(t, json.Unmarshal([]byte(result.Output), &output))

// AllTodos shows full state including the completed item
require.Len(t, output.AllTodos, 3)
assert.Equal(t, "completed", output.AllTodos[0].Status)
assert.Equal(t, "pending", output.AllTodos[1].Status)
assert.Equal(t, "pending", output.AllTodos[2].Status)

// Reminder only lists incomplete items
assert.NotContains(t, output.Reminder, "todo_1")
assert.Contains(t, output.Reminder, "todo_2")
assert.Contains(t, output.Reminder, "todo_3")
}

// requireMeta asserts that result.Meta is a []Todo of the expected length.
func requireMeta(t *testing.T, result *tools.ToolCallResult, expectedLen int) {
t.Helper()
Expand Down
Loading