diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 768e857a2f..4f6d697db7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -565,6 +565,19 @@ jobs: DEPLOYMENT_URL: "http://localhost:${{ matrix.app.name == 'sveltekit' && '4173' || (matrix.app.name == 'astro' && '4321' || '3000') }}" NEXT_CANARY: ${{ matrix.app.canary && '1' || '' }} + - name: Run Low-Concurrency Worker-Slot Test + if: ${{ !matrix.app.canary && matrix.app.name == 'nextjs-turbopack' }} + run: | + cd "${{ steps.prepare-workbench.outputs.workbench_app_path }}" && PORT=3001 WORKFLOW_POSTGRES_WORKER_CONCURRENCY=1 pnpm start & + echo "starting low-concurrency tests in 10 seconds" && sleep 10 + pnpm vitest run packages/core/e2e/e2e.test.ts -t "frees worker slots for unrelated workflows while a waiter is blocked" + env: + NODE_OPTIONS: "--enable-source-maps" + APP_NAME: ${{ matrix.app.name }} + WORKBENCH_APP_PATH: ${{ steps.prepare-workbench.outputs.workbench_app_path }} + DEPLOYMENT_URL: "http://localhost:3001" + WORKFLOW_LIMITS_LOW_CONCURRENCY: "1" + - name: Generate E2E summary if: always() run: node .github/scripts/aggregate-e2e-results.js . --job-name "E2E Local Postgres (${{ matrix.app.name }})" >> $GITHUB_STEP_SUMMARY || true diff --git a/packages/core/e2e/e2e.test.ts b/packages/core/e2e/e2e.test.ts index 1c9eeb8451..77a5960231 100644 --- a/packages/core/e2e/e2e.test.ts +++ b/packages/core/e2e/e2e.test.ts @@ -14,15 +14,17 @@ import { expect, test, } from 'vitest'; -import type { Run } from '../src/runtime'; +import { createLimitsRuntimeSuite } from '../../world-testing/src/limits-runtime.js'; +import type { Run, StartOptions } from '../src/runtime.js'; import { + cancelRun, getHookByToken, getRun, getWorld, healthCheck, start as rawStart, resumeHook, -} from '../src/runtime'; +} from '../src/runtime.js'; import { cliCancel, cliHealthJson, @@ -49,10 +51,25 @@ if (!deploymentUrl) { * Tracked wrapper around start() that automatically registers runs * for diagnostics on test failure and observability metadata collection. */ -async function start( - ...args: Parameters> -): Promise> { - const run = await rawStart(...args); +type E2EWorkflowMetadata = Awaited>; + +async function start( + workflow: E2EWorkflowMetadata, + options?: StartOptions +): Promise>; +async function start( + workflow: E2EWorkflowMetadata, + args: TArgs, + options?: StartOptions +): Promise>; +async function start( + workflow: E2EWorkflowMetadata, + argsOrOptions?: unknown[] | StartOptions, + options?: StartOptions +): Promise> { + const run = Array.isArray(argsOrOptions) + ? await rawStart(workflow, argsOrOptions, options) + : await rawStart(workflow, argsOrOptions); trackRun(run); return run; } @@ -220,11 +237,175 @@ describe('e2e', () => { const isNext = process.env.APP_NAME?.includes('nextjs'); const isLocal = deploymentUrl.includes('localhost'); + const isPostgresWorld = + process.env.WORKFLOW_TARGET_WORLD === '@workflow/world-postgres'; + const isLocalWorld = isLocalDeployment() && !isPostgresWorld; // only works with framework that transpiles react and // doesn't work on Vercel due to eval hack so react isn't // bundled in function const shouldSkipReactRenderTest = !(isNext && isLocal); + if (isLocalWorld || isPostgresWorld) { + createLimitsRuntimeSuite( + `limits runtime (${isPostgresWorld ? 'postgres' : 'local'})`, + async () => ({ + async runWorkflowWithScopedLocks(userId) { + const run = await start(await e2e('workflowWithScopedLocks'), [ + userId, + ]); + return await run.returnValue; + }, + async runWorkflowLockContention(userId, holdMs) { + const workflow = await e2e('workflowLockContentionWorkflow'); + const runA = await start(workflow, [userId, holdMs]); + await sleep(100); + const runB = await start(workflow, [userId, holdMs]); + return await Promise.all([runA.returnValue, runB.returnValue]); + }, + async runLockedStepCallContention( + key, + holdMs, + labelA = 'A', + labelB = 'B' + ) { + const workflow = await e2e('lockedStepCallContentionWorkflow'); + const runA = await start(workflow, [key, holdMs, labelA]); + await sleep(100); + const runB = await start(workflow, [key, holdMs, labelB]); + return await Promise.all([runA.returnValue, runB.returnValue]); + }, + async runWorkflowLockAcrossSuspension(userId, holdMs) { + const workflow = await e2e('workflowOnlyLockContentionWorkflow'); + const runA = await start(workflow, [userId, holdMs, 'A']); + await sleep(100); + const runB = await start(workflow, [userId, holdMs, 'B']); + return await Promise.all([runA.returnValue, runB.returnValue]); + }, + async runWorkflowExpiredLeaseRecovery(userId, leaseTtlMs) { + const leakedWorkflow = await e2e('workflowLeakedLockWorkflow'); + const waiterWorkflow = await e2e( + 'workflowOnlyLockContentionWorkflow' + ); + const leakedRun = await start(leakedWorkflow, [ + userId, + leaseTtlMs, + 'A', + ]); + const leakedResult = await leakedRun.returnValue; + const waiterRun = await start(waiterWorkflow, [userId, 0, 'B']); + const waiterResult = await waiterRun.returnValue; + return [leakedResult, waiterResult]; + }, + async runLeakedKeyExpiredLeaseRecovery(userId, leaseTtlMs) { + const leakedWorkflow = await e2e('leakedKeyLockWorkflow'); + const waiterWorkflow = await e2e('lockedStepCallContentionWorkflow'); + const leakedRun = await start(leakedWorkflow, [ + userId, + leaseTtlMs, + 'A', + ]); + const leakedResult = await leakedRun.returnValue; + const waiterRun = await start(waiterWorkflow, [ + leakedResult.key, + 0, + 'B', + ]); + const waiterResult = await waiterRun.returnValue; + return [leakedResult, waiterResult]; + }, + async runWorkflowMixedLimitContention(userId, holdMs, periodMs) { + const workflow = await e2e('workflowMixedLimitContentionWorkflow'); + const runA = await start(workflow, [userId, holdMs, periodMs, 'A']); + await sleep(100); + const runB = await start(workflow, [userId, holdMs, periodMs, 'B']); + return await Promise.all([runA.returnValue, runB.returnValue]); + }, + async runWorkflowFifoThreeWaiters(userId, holdMs) { + const workflow = await e2e('workflowOnlyLockContentionWorkflow'); + const runA = await start(workflow, [userId, holdMs, 'A']); + await sleep(100); + const runB = await start(workflow, [userId, holdMs, 'B']); + await sleep(100); + const runC = await start(workflow, [userId, holdMs, 'C']); + return await Promise.all([ + runA.returnValue, + runB.returnValue, + runC.returnValue, + ]); + }, + async runCancelledWorkflowWaiter(userId, holdMs) { + const workflow = await e2e('workflowOnlyLockContentionWorkflow'); + const runA = await start(workflow, [userId, holdMs, 'A']); + await sleep(100); + const runB = await start(workflow, [userId, holdMs, 'B']); + await sleep(100); + await cancelRun(getWorld(), runB.runId); + const cancelledError = await runB.returnValue.catch((error) => error); + const runC = await start(workflow, [userId, holdMs, 'C']); + const [resultA, resultC] = await Promise.all([ + runA.returnValue, + runC.returnValue, + ]); + return { cancelledError, resultA, resultC }; + }, + async runIndependentWorkflowKeys(holdMs) { + const workflow = await e2e('workflowOnlyLockContentionWorkflow'); + const runA = await start(workflow, ['user-a', holdMs]); + await sleep(100); + const runB = await start(workflow, ['user-b', holdMs]); + return await Promise.all([runA.returnValue, runB.returnValue]); + }, + async runIndependentStepKeys(holdMs) { + const workflow = await e2e('lockedStepCallContentionWorkflow'); + const runA = await start(workflow, [ + 'step:db:isolation:a', + holdMs, + 'A', + ]); + await sleep(100); + const runB = await start(workflow, [ + 'step:db:isolation:b', + holdMs, + 'B', + ]); + return await Promise.all([runA.returnValue, runB.returnValue]); + }, + async runBlockedWaiterWithUnrelatedWorkflow(holdMs) { + const workflow = await e2e('workflowOnlyLockContentionWorkflow'); + const runA = await start(workflow, [ + 'worker-slot-shared', + holdMs, + 'A', + ]); + await sleep(100); + const runB = await start(workflow, [ + 'worker-slot-shared', + holdMs, + 'B', + ]); + await sleep(100); + const runC = await start(workflow, [ + 'worker-slot-unrelated', + Math.max(100, Math.floor(holdMs / 4)), + 'C', + ]); + + const [holder, waiter, unrelated] = await Promise.all([ + runA.returnValue, + runB.returnValue, + runC.returnValue, + ]); + return { holder, waiter, unrelated }; + }, + async runWorkflowSingleLockAcrossMultipleSteps(holdMs) { + const workflow = await e2e('singleLockAcrossMultipleStepsWorkflow'); + const run = await start(workflow, ['step:db:batch', holdMs]); + return await run.returnValue; + }, + }) + ); + } + test.skipIf(shouldSkipReactRenderTest)( 'should work with react rendering in step', async () => { @@ -1776,7 +1957,7 @@ describe('e2e', () => { // Cancel the run using the core runtime cancelRun function. // This exercises the same cancelRun code path that the CLI uses // (the CLI delegates directly to this function). - const { cancelRun } = await import('../src/runtime'); + const { cancelRun } = await import('../src/runtime.js'); await cancelRun(getWorld(), run.runId); // Verify the run was cancelled - returnValue should throw WorkflowRunCancelledError diff --git a/packages/core/src/async-deserialization-ordering.test.ts b/packages/core/src/async-deserialization-ordering.test.ts index 0774b7d9d8..463a661ec0 100644 --- a/packages/core/src/async-deserialization-ordering.test.ts +++ b/packages/core/src/async-deserialization-ordering.test.ts @@ -36,6 +36,7 @@ function setupWorkflowContext(events: Event[]): WorkflowOrchestratorContext { onUnconsumedEvent: () => {}, getPromiseQueue: () => Promise.resolve(), }), + nextLockIndex: 0, invocationsQueue: new Map(), generateUlid: () => ulid(workflowStartedAt), generateNanoid: nanoid.customRandom(nanoid.urlAlphabet, 21, (size) => diff --git a/packages/core/src/global.ts b/packages/core/src/global.ts index 3dd5c52ac8..6891e0a761 100644 --- a/packages/core/src/global.ts +++ b/packages/core/src/global.ts @@ -28,10 +28,17 @@ export interface WaitInvocationQueueItem { hasCreatedEvent?: boolean; } +export interface LimitWaitInvocationQueueItem { + type: 'limit_wait'; + correlationId: string; + resumeAt: Date; +} + export type QueueItem = | StepInvocationQueueItem | HookInvocationQueueItem - | WaitInvocationQueueItem; + | WaitInvocationQueueItem + | LimitWaitInvocationQueueItem; /** * An error that is thrown when one or more operations (steps/hooks/etc.) are called but do @@ -61,7 +68,9 @@ export class WorkflowSuspension extends Error { else if (item.type === 'hook') { if (item.disposed) hookDisposedCount++; else hookCount++; - } else if (item.type === 'wait') waitCount++; + } else if (item.type === 'wait' || item.type === 'limit_wait') { + waitCount++; + } } // Build description parts diff --git a/packages/core/src/hook-sleep-interaction.test.ts b/packages/core/src/hook-sleep-interaction.test.ts index a706628b81..9ec1bca88d 100644 --- a/packages/core/src/hook-sleep-interaction.test.ts +++ b/packages/core/src/hook-sleep-interaction.test.ts @@ -42,6 +42,7 @@ function setupWorkflowContext(events: Event[]): WorkflowOrchestratorContext { onUnconsumedEvent: () => {}, getPromiseQueue: () => promiseQueueHolder.current, }), + nextLockIndex: 0, invocationsQueue: new Map(), generateUlid: () => ulid(workflowStartedAt), generateNanoid: nanoid.customRandom(nanoid.urlAlphabet, 21, (size) => diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 1d969aeaa6..413f87fa74 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -25,6 +25,12 @@ export { type WebhookOptions, } from './create-hook.js'; export { defineHook, type TypedHook } from './define-hook.js'; +export { + lock, + type LockHandle, + type LockOptions, + LIMITS_NOT_IMPLEMENTED_MESSAGE, +} from './lock.js'; export { sleep } from './sleep.js'; export { getStepMetadata, diff --git a/packages/core/src/lock.test.ts b/packages/core/src/lock.test.ts new file mode 100644 index 0000000000..9cc1e2dcee --- /dev/null +++ b/packages/core/src/lock.test.ts @@ -0,0 +1,64 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { + lock, + LIMITS_NOT_IMPLEMENTED_MESSAGE, + LOCK_WORKFLOW_ONLY_MESSAGE, +} from './lock.js'; +import { contextStorage } from './step/context-storage.js'; +import { WORKFLOW_LOCK } from './symbols.js'; + +afterEach(() => { + delete (globalThis as any)[WORKFLOW_LOCK]; +}); + +describe('lock', () => { + it('throws when called outside workflow or step execution context', async () => { + await expect( + lock({ + key: 'workflow:user:test', + concurrency: { max: 1 }, + }) + ).rejects.toThrow(LIMITS_NOT_IMPLEMENTED_MESSAGE); + }); + + it('prefers the workflow runtime lock when both runtimes are present', async () => { + const workflowHandle = { leaseId: 'lease_workflow' }; + const workflowLock = vi.fn().mockResolvedValue(workflowHandle); + (globalThis as any)[WORKFLOW_LOCK] = workflowLock; + const options = { + key: 'workflow:user:test', + concurrency: { max: 1 }, + }; + + await expect(lock(options)).resolves.toBe(workflowHandle); + expect(workflowLock).toHaveBeenCalledWith(options); + }); + + it('throws a workflow-only error when called inside a step context', async () => { + const options = { + key: 'step:db:cheap', + concurrency: { max: 2 }, + }; + + await expect( + contextStorage.run( + { + stepMetadata: { + stepId: 'step_test', + stepName: 'testStep', + stepStartedAt: new Date(), + attempt: 1, + }, + workflowMetadata: { + workflowName: 'testWorkflow', + workflowRunId: 'wrun_test', + workflowStartedAt: new Date(), + url: 'http://localhost:3000', + }, + ops: [], + }, + () => lock(options) + ) + ).rejects.toThrow(LOCK_WORKFLOW_ONLY_MESSAGE); + }); +}); diff --git a/packages/core/src/lock.ts b/packages/core/src/lock.ts new file mode 100644 index 0000000000..9791c39e13 --- /dev/null +++ b/packages/core/src/lock.ts @@ -0,0 +1,54 @@ +import { + createLimitsNotImplementedError, + type LimitDefinition, + type LimitKey, + type LimitLease, +} from '@workflow/world'; +import { contextStorage } from './step/context-storage.js'; +import { WORKFLOW_LOCK } from './symbols.js'; + +export { LIMITS_NOT_IMPLEMENTED_MESSAGE } from '@workflow/world'; + +export const LOCK_WORKFLOW_ONLY_MESSAGE = + '`lock()` is only supported in workflow functions. Wrap the step call with `await using` in workflow code.'; + +/** + * Reserved first-pass user-facing API for future flow concurrency and rate + * limiting inside workflow functions. + */ +export interface LockOptions extends LimitDefinition { + key: LimitKey; + leaseTtlMs?: number; +} + +/** + * Reserved handle shape for future lock acquisition. + */ +export interface LockHandle + extends Pick< + LimitLease, + 'leaseId' | 'key' | 'lockId' | 'runId' | 'lockIndex' | 'expiresAt' + > { + dispose(): Promise; + heartbeat(ttlMs?: number): Promise; + [Symbol.asyncDispose](): Promise; +} + +/** + * Reserved workflow API for future concurrency and rate limiting. + */ +export async function lock(options: LockOptions): Promise { + const workflowLock = (globalThis as any)[WORKFLOW_LOCK] as + | ((options: LockOptions) => Promise) + | undefined; + + if (workflowLock) { + return workflowLock(options); + } + + if (contextStorage.getStore()) { + throw new Error(LOCK_WORKFLOW_ONLY_MESSAGE); + } + + throw createLimitsNotImplementedError(); +} diff --git a/packages/core/src/private.ts b/packages/core/src/private.ts index 0eabc7b70f..ac827aae05 100644 --- a/packages/core/src/private.ts +++ b/packages/core/src/private.ts @@ -93,6 +93,7 @@ export interface WorkflowOrchestratorContext { encryptionKey: CryptoKey | undefined; globalThis: typeof globalThis; eventsConsumer: EventsConsumer; + nextLockIndex: number; /** * Map of pending invocations keyed by correlationId. * Using Map instead of Array for O(1) lookup/delete operations. diff --git a/packages/core/src/runtime/step-handler.test.ts b/packages/core/src/runtime/step-handler.test.ts index f73c9e1de6..3c7aae614f 100644 --- a/packages/core/src/runtime/step-handler.test.ts +++ b/packages/core/src/runtime/step-handler.test.ts @@ -5,9 +5,14 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; const { capturedHandlerRef, mockEventsCreate, + mockEventsListByCorrelationId, + mockLimitsAcquire, + mockLimitsHeartbeat, + mockLimitsRelease, mockQueue, mockRuntimeLogger, mockStepLogger, + mockStepGet, mockQueueMessage, mockStepFn, } = vi.hoisted(() => { @@ -19,6 +24,14 @@ const { current: null as null | ((...args: unknown[]) => Promise), }, mockEventsCreate: vi.fn(), + mockEventsListByCorrelationId: vi.fn().mockResolvedValue({ + data: [], + cursor: null, + hasMore: false, + }), + mockLimitsAcquire: vi.fn(), + mockLimitsHeartbeat: vi.fn(), + mockLimitsRelease: vi.fn().mockResolvedValue(undefined), mockQueue: vi.fn().mockResolvedValue({ messageId: 'msg_test' }), mockRuntimeLogger: { warn: vi.fn(), @@ -33,6 +46,16 @@ const { error: vi.fn(), }, mockQueueMessage: vi.fn().mockResolvedValue(undefined), + mockStepGet: vi.fn().mockResolvedValue({ + stepId: 'step_abc', + runId: 'wrun_test123', + stepName: 'myStep', + status: 'pending', + input: [], + attempt: 0, + createdAt: new Date(), + updatedAt: new Date(), + }), mockStepFn, }; }); @@ -48,7 +71,18 @@ vi.mock('@vercel/functions', () => ({ // Mock the world module - createQueueHandler captures the handler vi.mock('./world.js', () => ({ getWorld: vi.fn(() => ({ - events: { create: mockEventsCreate }, + events: { + create: mockEventsCreate, + listByCorrelationId: mockEventsListByCorrelationId, + }, + limits: { + acquire: mockLimitsAcquire, + heartbeat: mockLimitsHeartbeat, + release: mockLimitsRelease, + }, + steps: { + get: mockStepGet, + }, queue: mockQueue, getEncryptionKeyForRun: vi.fn().mockResolvedValue(undefined), })), @@ -203,9 +237,38 @@ describe('step-handler 409 handling', () => { mockStepFn.mockReset().mockResolvedValue('step-result'); mockStepFn.maxRetries = 3; mockQueueMessage.mockResolvedValue(undefined); + mockEventsListByCorrelationId.mockReset().mockResolvedValue({ + data: [], + cursor: null, + hasMore: false, + }); + mockLimitsAcquire.mockReset(); + mockLimitsHeartbeat.mockReset(); + mockLimitsRelease.mockReset().mockResolvedValue(undefined); + mockStepGet.mockReset().mockResolvedValue({ + stepId: 'step_abc', + runId: 'wrun_test123', + stepName: 'myStep', + status: 'pending', + input: [], + attempt: 0, + createdAt: new Date(), + updatedAt: new Date(), + }); // Re-set getWorld mock since clearAllMocks resets it vi.mocked(getWorld).mockReturnValue({ - events: { create: mockEventsCreate }, + events: { + create: mockEventsCreate, + listByCorrelationId: mockEventsListByCorrelationId, + }, + limits: { + acquire: mockLimitsAcquire, + heartbeat: mockLimitsHeartbeat, + release: mockLimitsRelease, + }, + steps: { + get: mockStepGet, + }, queue: mockQueue, getEncryptionKeyForRun: vi.fn().mockResolvedValue(undefined), } as any); @@ -227,6 +290,14 @@ describe('step-handler 409 handling', () => { vi.restoreAllMocks(); }); + it('does not call limits for ordinary step execution without lock()', async () => { + await capturedHandler(createMessage(), createMetadata('myStep')); + + expect(mockLimitsAcquire).not.toHaveBeenCalled(); + expect(mockLimitsHeartbeat).not.toHaveBeenCalled(); + expect(mockLimitsRelease).not.toHaveBeenCalled(); + }); + describe('step_completed 409', () => { it('should warn and return when step_completed gets a 409', async () => { // step_started succeeds, step function succeeds, step_completed returns 409 diff --git a/packages/core/src/runtime/step-handler.ts b/packages/core/src/runtime/step-handler.ts index d6d74adc2a..ec60d06b04 100644 --- a/packages/core/src/runtime/step-handler.ts +++ b/packages/core/src/runtime/step-handler.ts @@ -121,7 +121,7 @@ const stepHandler = getWorldHandlers().createQueueHandler( // - Step not in terminal state (returns 409) // - retryAfter timestamp reached (returns 425 with Retry-After header) // - Workflow still active (returns 410 if completed) - let step; + let step: Awaited>; try { const startResult = await world.events.create( workflowRunId, diff --git a/packages/core/src/runtime/suspension-handler.ts b/packages/core/src/runtime/suspension-handler.ts index d84dd3de29..6c8de48fa7 100644 --- a/packages/core/src/runtime/suspension-handler.ts +++ b/packages/core/src/runtime/suspension-handler.ts @@ -15,6 +15,7 @@ import { import { importKey } from '../encryption.js'; import type { HookInvocationQueueItem, + LimitWaitInvocationQueueItem, StepInvocationQueueItem, WaitInvocationQueueItem, WorkflowSuspension, @@ -83,6 +84,9 @@ export async function handleSuspension({ const waitItems = suspension.steps.filter( (item): item is WaitInvocationQueueItem => item.type === 'wait' ); + const limitWaitItems = suspension.steps.filter( + (item): item is LimitWaitInvocationQueueItem => item.type === 'limit_wait' + ); // Split hooks by what actions they need const hooksNeedingCreation = allHookItems.filter( @@ -303,6 +307,38 @@ export async function handleSuspension({ } } + // Lock waits: schedule a delayed workflow replay keyed by correlationId so a + // later immediate wake-up can replace it. + for (const queueItem of limitWaitItems) { + ops.push( + (async () => { + /* + Lock waits are runtime control flow, not user-visible wait events. + We only enqueue a fallback replay here; promoted waiters can replace it. + */ + const delayMs = Math.max( + 1000, + queueItem.resumeAt.getTime() - Date.now() + ); + const traceCarrier = await serializeTraceCarrier(); + await queueMessage( + world, + `__wkf_workflow_${workflowName}`, + { + runId, + traceCarrier, + requestedAt: new Date(), + }, + { + delaySeconds: Math.ceil(delayMs / 1000), + idempotencyKey: queueItem.correlationId, + headers: extractTraceHeaders(traceCarrier), + } + ); + })() + ); + } + // Wait for all step and wait operations to complete waitUntil( Promise.all(ops).catch((opErr) => { diff --git a/packages/core/src/step.test.ts b/packages/core/src/step.test.ts index a8f080e0b7..506def4361 100644 --- a/packages/core/src/step.test.ts +++ b/packages/core/src/step.test.ts @@ -26,6 +26,7 @@ function setupWorkflowContext(events: Event[]): WorkflowOrchestratorContext { onUnconsumedEvent: () => {}, getPromiseQueue: () => Promise.resolve(), }), + nextLockIndex: 0, invocationsQueue: new Map(), generateUlid: () => ulid(workflowStartedAt), // All generated ulids use the workflow's started at time generateNanoid: nanoid.customRandom(nanoid.urlAlphabet, 21, (size) => diff --git a/packages/core/src/step.ts b/packages/core/src/step.ts index bd45c3008c..33e544d19e 100644 --- a/packages/core/src/step.ts +++ b/packages/core/src/step.ts @@ -96,7 +96,7 @@ export function createUseStep(ctx: WorkflowOrchestratorContext) { return EventConsumerResult.Finished; } queueItem.hasCreatedEvent = true; - // Continue waiting for step_started/step_completed/step_failed events + // Continue waiting for later step lifecycle events. return EventConsumerResult.Consumed; } diff --git a/packages/core/src/symbols.ts b/packages/core/src/symbols.ts index 92df4058db..790f2fe46f 100644 --- a/packages/core/src/symbols.ts +++ b/packages/core/src/symbols.ts @@ -1,6 +1,7 @@ export const WORKFLOW_USE_STEP = Symbol.for('WORKFLOW_USE_STEP'); export const WORKFLOW_CREATE_HOOK = Symbol.for('WORKFLOW_CREATE_HOOK'); export const WORKFLOW_SLEEP = Symbol.for('WORKFLOW_SLEEP'); +export const WORKFLOW_LOCK = Symbol.for('WORKFLOW_LOCK'); export const WORKFLOW_CONTEXT = Symbol.for('WORKFLOW_CONTEXT'); export const WORKFLOW_GET_STREAM_ID = Symbol.for('WORKFLOW_GET_STREAM_ID'); export const STABLE_ULID = Symbol.for('WORKFLOW_STABLE_ULID'); diff --git a/packages/core/src/workflow.ts b/packages/core/src/workflow.ts index 5d18c085b4..01883a0fee 100644 --- a/packages/core/src/workflow.ts +++ b/packages/core/src/workflow.ts @@ -22,6 +22,7 @@ import { STABLE_ULID, WORKFLOW_CREATE_HOOK, WORKFLOW_GET_STREAM_ID, + WORKFLOW_LOCK, WORKFLOW_SLEEP, WORKFLOW_USE_STEP, } from './symbols.js'; @@ -32,6 +33,7 @@ import { createContext } from './vm/index.js'; import type { WorkflowMetadata } from './workflow/get-workflow-metadata.js'; import { WORKFLOW_CONTEXT_SYMBOL } from './workflow/get-workflow-metadata.js'; import { createCreateHook } from './workflow/hook.js'; +import { createLock } from './workflow/lock.js'; import { createSleep } from './workflow/sleep.js'; /** @@ -137,6 +139,7 @@ export async function runWorkflow( globalThis: vmGlobalThis, onWorkflowError: workflowDiscontinuation.reject, eventsConsumer, + nextLockIndex: 0, generateUlid: () => ulid(+startedAt), generateNanoid, invocationsQueue: new Map(), @@ -184,6 +187,7 @@ export async function runWorkflow( const useStep = createUseStep(workflowContext); const createHook = createCreateHook(workflowContext); + const lock = createLock(workflowContext); const sleep = createSleep(workflowContext); // @ts-expect-error - `@types/node` says symbol is not valid, but it does work @@ -191,6 +195,8 @@ export async function runWorkflow( // @ts-expect-error - `@types/node` says symbol is not valid, but it does work vmGlobalThis[WORKFLOW_CREATE_HOOK] = createHook; // @ts-expect-error - `@types/node` says symbol is not valid, but it does work + vmGlobalThis[WORKFLOW_LOCK] = lock; + // @ts-expect-error - `@types/node` says symbol is not valid, but it does work vmGlobalThis[WORKFLOW_SLEEP] = sleep; // @ts-expect-error - `@types/node` says symbol is not valid, but it does work vmGlobalThis[WORKFLOW_GET_STREAM_ID] = (namespace?: string) => diff --git a/packages/core/src/workflow/hook.test.ts b/packages/core/src/workflow/hook.test.ts index baa108cb03..ead1169ea3 100644 --- a/packages/core/src/workflow/hook.test.ts +++ b/packages/core/src/workflow/hook.test.ts @@ -28,6 +28,7 @@ function setupWorkflowContext(events: Event[]): WorkflowOrchestratorContext { onUnconsumedEvent: () => {}, getPromiseQueue: () => Promise.resolve(), }), + nextLockIndex: 0, invocationsQueue: new Map(), generateUlid: () => ulid(workflowStartedAt), generateNanoid: nanoid.customRandom(nanoid.urlAlphabet, 21, (size) => diff --git a/packages/core/src/workflow/index.ts b/packages/core/src/workflow/index.ts index 61cc317491..86807ed04b 100644 --- a/packages/core/src/workflow/index.ts +++ b/packages/core/src/workflow/index.ts @@ -6,6 +6,12 @@ export { type RetryableErrorOptions, } from '@workflow/errors'; export type { Hook, HookOptions } from '../create-hook.js'; +export { + lock, + type LockHandle, + type LockOptions, + LIMITS_NOT_IMPLEMENTED_MESSAGE, +} from '../lock.js'; export { sleep } from '../sleep.js'; export { createHook, createWebhook } from './create-hook.js'; export { defineHook } from './define-hook.js'; diff --git a/packages/core/src/workflow/lock.ts b/packages/core/src/workflow/lock.ts new file mode 100644 index 0000000000..2010c1fd61 --- /dev/null +++ b/packages/core/src/workflow/lock.ts @@ -0,0 +1,113 @@ +import { WorkflowSuspension } from '../global.js'; +import type { LockHandle, LockOptions } from '../lock.js'; +import { createLockWakeCorrelationId, type LimitLease } from '@workflow/world'; +import { + scheduleWhenIdle, + type WorkflowOrchestratorContext, +} from '../private.js'; +import { getWorld } from '../runtime/world.js'; + +const DEFAULT_LOCK_LEASE_TTL_MS = 24 * 60 * 60 * 1000; + +function createLockHandle( + lease: Pick< + LimitLease, + 'leaseId' | 'key' | 'lockId' | 'runId' | 'lockIndex' | 'expiresAt' + >, + ctx: WorkflowOrchestratorContext +): LockHandle { + let currentLease = lease; + let disposed = false; + + const dispose = async () => { + if (disposed) return; + disposed = true; + await getWorld().limits.release({ + leaseId: currentLease.leaseId, + key: currentLease.key, + lockId: currentLease.lockId, + }); + }; + + const heartbeat = async (ttlMs?: number) => { + currentLease = await getWorld().limits.heartbeat({ + leaseId: currentLease.leaseId, + ttlMs, + }); + }; + + const handle: LockHandle = { + get leaseId() { + return currentLease.leaseId; + }, + get key() { + return currentLease.key; + }, + get lockId() { + return currentLease.lockId; + }, + get runId() { + return currentLease.runId; + }, + get lockIndex() { + return currentLease.lockIndex; + }, + get expiresAt() { + return currentLease.expiresAt; + }, + dispose, + heartbeat, + [Symbol.asyncDispose]: dispose, + }; + + const vmAsyncDispose = ctx.globalThis.Symbol.asyncDispose; + if (vmAsyncDispose && vmAsyncDispose !== Symbol.asyncDispose) { + (handle as any)[vmAsyncDispose] = dispose; + } + + return handle; +} + +export function createLock(ctx: WorkflowOrchestratorContext) { + return async function lockImpl(options: LockOptions): Promise { + /* + Blocked workflow locks suspend the workflow turn instead of creating a real + wait event. Postgres can wake this correlation id early when the waiter is + promoted, and the delayed replay is just a fallback. + */ + const lockIndex = ctx.nextLockIndex++; + const correlationId = createLockWakeCorrelationId(ctx.runId, lockIndex); + const definition = { + concurrency: options.concurrency, + rate: options.rate, + }; + + while (true) { + const result = await getWorld().limits.acquire({ + key: options.key, + runId: ctx.runId, + lockIndex, + definition, + leaseTtlMs: options.leaseTtlMs ?? DEFAULT_LOCK_LEASE_TTL_MS, + }); + + if (result.status === 'acquired') { + return createLockHandle(result.lease, ctx); + } + + ctx.invocationsQueue.set(correlationId, { + type: 'limit_wait', + correlationId, + resumeAt: new Date(Date.now() + (result.retryAfterMs || 1000)), + }); + + scheduleWhenIdle(ctx, () => { + ctx.onWorkflowError( + new WorkflowSuspension(ctx.invocationsQueue, ctx.globalThis) + ); + }); + + await new Promise(() => {}); + } + }; +} diff --git a/packages/core/src/workflow/sleep.test.ts b/packages/core/src/workflow/sleep.test.ts index 8b77ca2c76..b6853c4405 100644 --- a/packages/core/src/workflow/sleep.test.ts +++ b/packages/core/src/workflow/sleep.test.ts @@ -32,6 +32,7 @@ function setupWorkflowContext(events: Event[]): WorkflowOrchestratorContext { }, getPromiseQueue: () => Promise.resolve(), }), + nextLockIndex: 0, invocationsQueue: new Map(), generateUlid: () => ulid(workflowStartedAt), generateNanoid: nanoid.customRandom(nanoid.urlAlphabet, 21, (size) => diff --git a/packages/workflow/src/internal/builtins.ts b/packages/workflow/src/internal/builtins.ts index 886686e50e..624ebbaebd 100644 --- a/packages/workflow/src/internal/builtins.ts +++ b/packages/workflow/src/internal/builtins.ts @@ -2,6 +2,9 @@ * These are the built-in steps that are "automatically available" in the workflow scope. They are * similar to "stdlib" except that are not meant to be imported by users, but are instead "just available" * alongside user defined steps. They are used internally by the runtime + * + * These helpers intentionally rely on the method receiver (`this`) so workflow + * objects like `Request` and `Response` can round-trip through step execution. */ export async function __builtin_response_array_buffer( diff --git a/packages/world-local/README.md b/packages/world-local/README.md index 9e3f0d95cc..fccc554eac 100644 --- a/packages/world-local/README.md +++ b/packages/world-local/README.md @@ -4,5 +4,13 @@ Filesystem-based workflow backend for local development and testing. Stores workflow data as JSON files on disk and provides in-memory queuing. Automatically detects development server port for queue transport. -Used by default on `next dev` and `next start`. +The `limits` namespace implements the shared flow-limits contract for local development: + +- keyed concurrency and rate limits +- FIFO waiter promotion per key +- cancelled workflow / failed step waiter pruning +- prompt wake-ups with delayed fallback retries +Limit state is persisted on disk, but queue delivery is still in-memory. That means local world matches the same live-process lock semantics as other implemented worlds, while crash-survival and durable backlog behavior remain a PostgreSQL-only advantage today. + +Used by default on `next dev` and `next start`. diff --git a/packages/world-local/src/index.ts b/packages/world-local/src/index.ts index 6ec4800c8e..142fe26ccf 100644 --- a/packages/world-local/src/index.ts +++ b/packages/world-local/src/index.ts @@ -12,9 +12,10 @@ import { readJSON, } from './fs.js'; import { initDataDir } from './init.js'; +import { createLimits } from './limits.js'; import { createQueue, type DirectHandler } from './queue.js'; -import { createStorage } from './storage.js'; import { hashToken } from './storage/helpers.js'; +import { createStorage } from './storage.js'; import { createStreamer } from './streamer.js'; // Re-export init types and utilities for consumers @@ -27,7 +28,7 @@ export { parseVersion, } from './init.js'; -export { type DirectHandler } from './queue.js'; +export type { DirectHandler } from './queue.js'; export type LocalWorld = World & { /** Register a direct in-process handler for a queue prefix, bypassing HTTP. */ @@ -60,9 +61,15 @@ export function createLocalWorld(args?: Partial): LocalWorld { const mergedConfig = { ...config.value, ...definedArgs }; const tag = mergedConfig.tag; const queue = createQueue(mergedConfig); + const storage = createStorage(mergedConfig.dataDir, tag); return { + limits: createLimits(mergedConfig.dataDir, { + tag, + queue, + storage, + }), ...queue, - ...createStorage(mergedConfig.dataDir, tag), + ...storage, ...createStreamer(mergedConfig.dataDir, tag), async start() { await initDataDir(mergedConfig.dataDir); @@ -102,6 +109,7 @@ export function createLocalWorld(args?: Partial): LocalWorld { 'steps', 'events', 'hooks', + 'limits', 'waits', 'streams/runs', ]; diff --git a/packages/world-local/src/limits.test.ts b/packages/world-local/src/limits.test.ts new file mode 100644 index 0000000000..8b301c2d00 --- /dev/null +++ b/packages/world-local/src/limits.test.ts @@ -0,0 +1,57 @@ +import { createLimitsContractSuite } from '../../world-testing/src/limits-contract.js'; +import { createLocalWorld } from './index.js'; +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import os from 'node:os'; +import path from 'node:path'; + +createLimitsContractSuite('local world limits', async () => { + const dir = await mkdtemp(path.join(os.tmpdir(), 'workflow-limits-')); + const world = createLocalWorld({ dataDir: dir }); + world.registerHandler('__wkf_step_', async () => Response.json({ ok: true })); + world.registerHandler('__wkf_workflow_', async () => + Response.json({ ok: true }) + ); + + return { + limits: world.limits, + storage: world, + inspectKeyState: async (key) => { + const statePath = path.join(dir, 'limits', 'state.json'); + let raw: { + keys?: Record< + string, + { + leases?: { lockId: string }[]; + waiters?: { lockId: string }[]; + tokens?: { lockId: string }[]; + } + >; + }; + try { + raw = JSON.parse(await readFile(statePath, 'utf8')); + } catch (error) { + const code = (error as NodeJS.ErrnoException).code; + if (code === 'ENOENT') { + return { + leaseHolderIds: [], + waiterHolderIds: [], + tokenHolderIds: [], + }; + } + throw error; + } + + const keyState = raw.keys?.[key]; + return { + leaseHolderIds: keyState?.leases?.map((lease) => lease.lockId) ?? [], + waiterHolderIds: + keyState?.waiters?.map((waiter) => waiter.lockId) ?? [], + tokenHolderIds: keyState?.tokens?.map((token) => token.lockId) ?? [], + }; + }, + close: async () => { + await world.close?.(); + await rm(dir, { recursive: true, force: true }); + }, + }; +}); diff --git a/packages/world-local/src/limits.ts b/packages/world-local/src/limits.ts new file mode 100644 index 0000000000..896b9ad3d6 --- /dev/null +++ b/packages/world-local/src/limits.ts @@ -0,0 +1,560 @@ +import path from 'node:path'; +import { WorkflowWorldError } from '@workflow/errors'; +import type { Queue, Storage, WorkflowRunWithoutData } from '@workflow/world'; +import { + createLockId, + createLockWakeCorrelationId, + LimitAcquireRequestSchema, + type LimitAcquireResult, + LimitHeartbeatRequestSchema, + type LimitLease, + LimitLeaseSchema, + LimitReleaseRequestSchema, + type Limits, + parseLockId, +} from '@workflow/world'; +import { z } from 'zod'; +import { readJSON, writeJSON } from './fs.js'; +import { monotonicUlid } from './storage/helpers.js'; + +const LimitTokenSchema = z.object({ + tokenId: z.string(), + lockId: z.string(), + acquiredAt: z.coerce.date(), + expiresAt: z.coerce.date(), +}); + +const LimitWaiterSchema = z.object({ + waiterId: z.string(), + lockId: z.string(), + runId: z.string(), + lockIndex: z.number().int().nonnegative(), + createdAt: z.coerce.date(), + leaseTtlMs: z.number().int().positive().optional(), + concurrencyMax: z.number().int().positive().nullable(), + rateCount: z.number().int().positive().nullable(), + ratePeriodMs: z.number().int().positive().nullable(), +}); + +const KeyStateSchema = z.object({ + key: z.string(), + leases: z.array(LimitLeaseSchema), + tokens: z.array(LimitTokenSchema), + waiters: z.array(LimitWaiterSchema), +}); + +const LimitsStateSchema = z.object({ + version: z.literal(2), + keys: z.record(z.string(), KeyStateSchema), +}); + +type LimitToken = z.infer; +type LimitWaiter = z.infer; +type KeyState = z.infer; +type LimitsState = z.infer; + +type HolderTarget = + | { + kind: 'lock'; + runId: string; + correlationId: string; + } + | { + kind: 'opaque'; + }; + +export interface LocalLimitsOptions { + tag?: string; + queue?: Pick; + storage?: Pick; +} + +const EMPTY_STATE: LimitsState = { + version: 2, + keys: {}, +}; + +function getStatePath(dataDir: string, tag?: string): string { + return path.join(dataDir, 'limits', tag ? `state.${tag}.json` : 'state.json'); +} + +function cloneToken(token: LimitToken): LimitToken { + return { ...token }; +} + +function cloneWaiter(waiter: LimitWaiter): LimitWaiter { + return { ...waiter }; +} + +function normalizeKeyState(keyState: KeyState): KeyState { + return { + key: keyState.key, + leases: keyState.leases.map((lease) => ({ ...lease })), + tokens: keyState.tokens.map(cloneToken), + waiters: keyState.waiters.map(cloneWaiter), + }; +} + +function cloneState(state: LimitsState): LimitsState { + return { + version: 2, + keys: Object.fromEntries( + Object.entries(state.keys).map(([key, keyState]) => [ + key, + normalizeKeyState(keyState), + ]) + ), + }; +} + +function pruneKeyState(keyState: KeyState, now = Date.now()): KeyState { + return { + key: keyState.key, + leases: keyState.leases.filter( + (lease) => + lease.expiresAt === undefined || lease.expiresAt.getTime() > now + ), + tokens: keyState.tokens.filter((token) => token.expiresAt.getTime() > now), + waiters: keyState.waiters.map(cloneWaiter), + }; +} + +function getBlockedReason( + concurrencyBlocked: boolean, + rateBlocked: boolean +): 'concurrency' | 'rate' | 'concurrency_and_rate' { + if (concurrencyBlocked && rateBlocked) return 'concurrency_and_rate'; + if (concurrencyBlocked) return 'concurrency'; + return 'rate'; +} + +function getRetryAfterMs( + keyState: KeyState, + now: number, + concurrencyBlocked: boolean, + rateBlocked: boolean +): number | undefined { + const candidates: number[] = []; + + if (concurrencyBlocked) { + for (const lease of keyState.leases) { + if (lease.expiresAt) { + candidates.push(Math.max(0, lease.expiresAt.getTime() - now)); + } + } + } + + if (rateBlocked) { + for (const token of keyState.tokens) { + candidates.push(Math.max(0, token.expiresAt.getTime() - now)); + } + } + + if (candidates.length === 0) { + return undefined; + } + + return Math.min(...candidates); +} + +function createLease( + key: string, + runId: string, + lockIndex: number, + definition: LimitLease['definition'], + acquiredAt: Date, + leaseTtlMs?: number +): LimitLease { + return { + leaseId: `lmt_${monotonicUlid()}`, + key, + lockId: createLockId(runId, lockIndex), + runId, + lockIndex, + acquiredAt, + expiresAt: + leaseTtlMs !== undefined + ? new Date(acquiredAt.getTime() + leaseTtlMs) + : undefined, + definition, + }; +} + +function insertToken( + keyState: KeyState, + lockId: string, + acquiredAt: Date, + periodMs: number +) { + keyState.tokens.push({ + tokenId: `lmttok_${monotonicUlid()}`, + lockId, + acquiredAt, + expiresAt: new Date(acquiredAt.getTime() + periodMs), + }); +} + +function parseHolderId(lockId: string): HolderTarget { + const parsedLockId = parseLockId(lockId); + if (parsedLockId) { + return { + kind: 'lock', + runId: parsedLockId.runId, + correlationId: createLockWakeCorrelationId( + parsedLockId.runId, + parsedLockId.lockIndex + ), + }; + } + + return { kind: 'opaque' }; +} + +function isTerminalRun(run: WorkflowRunWithoutData | undefined) { + return !run || ['completed', 'failed', 'cancelled'].includes(run.status); +} + +function deleteEmptyKey(state: LimitsState, key: string) { + const keyState = state.keys[key]; + if (!keyState) return; + if ( + keyState.leases.length === 0 && + keyState.tokens.length === 0 && + keyState.waiters.length === 0 + ) { + delete state.keys[key]; + } +} + +export function createLimits( + dataDir: string, + tagOrOptions?: string | LocalLimitsOptions +): Limits { + const options = + typeof tagOrOptions === 'string' ? { tag: tagOrOptions } : tagOrOptions; + const statePath = getStatePath(dataDir, options?.tag); + let stateOp = Promise.resolve(); + + const withStateLock = async (fn: () => Promise): Promise => { + const run = stateOp.then(fn, fn); + stateOp = run.then( + () => undefined, + () => undefined + ); + return run; + }; + + const readState = async (): Promise => { + const raw = + (await readJSON(statePath, LimitsStateSchema)) ?? cloneState(EMPTY_STATE); + + return cloneState(raw); + }; + + const writeState = async (state: LimitsState): Promise => { + await writeJSON(statePath, state, { overwrite: true }); + }; + + const getRun = async ( + runId: string + ): Promise => { + try { + return await options?.storage?.runs.get(runId, { resolveData: 'none' }); + } catch { + return undefined; + } + }; + + const isHolderLive = async (holderId: string): Promise => { + const target = parseHolderId(holderId); + if (target.kind === 'opaque' || !options?.storage) { + return true; + } + + const run = await getRun(target.runId); + return !isTerminalRun(run); + }; + + const queueWakeForHolder = async (holderId: string): Promise => { + const target = parseHolderId(holderId); + if (target.kind === 'opaque' || !options?.queue || !options?.storage) { + return; + } + + try { + const run = await getRun(target.runId); + if (isTerminalRun(run) || !run) return; + + await options.queue.queue( + `__wkf_workflow_${run.workflowName}`, + { + runId: target.runId, + requestedAt: new Date(), + }, + { + idempotencyKey: target.correlationId, + } + ); + } catch (error) { + console.warn('[world-local] Failed to queue lock wake-up', error); + } + }; + + const promoteWaiters = async ( + key: string, + keyState: KeyState + ): Promise<{ keyState: KeyState; wakeHolders: string[] }> => { + const wakeHolders: string[] = []; + const promotedKeyState = pruneKeyState(keyState); + const remainingWaiters: LimitWaiter[] = []; + let activeLeases = promotedKeyState.leases.length; + let activeTokens = promotedKeyState.tokens.length; + + for (let index = 0; index < promotedKeyState.waiters.length; index++) { + const waiter = promotedKeyState.waiters[index]; + + if (!(await isHolderLive(waiter.lockId))) { + continue; + } + + const concurrencyBlocked = + waiter.concurrencyMax !== null && activeLeases >= waiter.concurrencyMax; + const rateBlocked = + waiter.rateCount !== null && activeTokens >= waiter.rateCount; + + if (concurrencyBlocked || rateBlocked) { + remainingWaiters.push( + waiter, + ...promotedKeyState.waiters.slice(index + 1) + ); + promotedKeyState.waiters = remainingWaiters; + return { keyState: promotedKeyState, wakeHolders }; + } + + const acquiredAt = new Date(); + const definition = { + concurrency: + waiter.concurrencyMax !== null + ? { max: waiter.concurrencyMax } + : undefined, + rate: + waiter.rateCount !== null && waiter.ratePeriodMs !== null + ? { + count: waiter.rateCount, + periodMs: waiter.ratePeriodMs, + } + : undefined, + }; + + promotedKeyState.leases.push( + createLease( + key, + waiter.runId, + waiter.lockIndex, + definition, + acquiredAt, + waiter.leaseTtlMs + ) + ); + activeLeases += 1; + + if (waiter.rateCount !== null && waiter.ratePeriodMs !== null) { + insertToken( + promotedKeyState, + waiter.lockId, + acquiredAt, + waiter.ratePeriodMs + ); + activeTokens += 1; + } + + wakeHolders.push(waiter.lockId); + } + + promotedKeyState.waiters = remainingWaiters; + return { keyState: promotedKeyState, wakeHolders }; + }; + + return { + async acquire(request) { + const parsed = LimitAcquireRequestSchema.parse(request); + const lockId = createLockId(parsed.runId, parsed.lockIndex); + + return withStateLock(async (): Promise => { + const state = cloneState(await readState()); + const baseKeyState = pruneKeyState( + state.keys[parsed.key] ?? { + key: parsed.key, + leases: [], + tokens: [], + waiters: [], + } + ); + const { keyState, wakeHolders } = await promoteWaiters( + parsed.key, + baseKeyState + ); + state.keys[parsed.key] = keyState; + + const existingLease = keyState.leases.find( + (lease) => lease.lockId === lockId + ); + if (existingLease) { + await writeState(state); + await Promise.all(wakeHolders.map(queueWakeForHolder)); + return { + status: 'acquired', + lease: existingLease, + }; + } + + const concurrencyBlocked = + parsed.definition.concurrency !== undefined && + keyState.leases.length >= parsed.definition.concurrency.max; + const rateBlocked = + parsed.definition.rate !== undefined && + keyState.tokens.length >= parsed.definition.rate.count; + const existingWaiter = keyState.waiters.find( + (waiter) => waiter.lockId === lockId + ); + + if ( + existingWaiter || + concurrencyBlocked || + rateBlocked || + keyState.waiters.length > 0 + ) { + if (!existingWaiter) { + keyState.waiters.push({ + waiterId: `lmtwait_${monotonicUlid()}`, + lockId, + runId: parsed.runId, + lockIndex: parsed.lockIndex, + createdAt: new Date(), + leaseTtlMs: parsed.leaseTtlMs, + concurrencyMax: parsed.definition.concurrency?.max ?? null, + rateCount: parsed.definition.rate?.count ?? null, + ratePeriodMs: parsed.definition.rate?.periodMs ?? null, + }); + } + + state.keys[parsed.key] = keyState; + await writeState(state); + await Promise.all(wakeHolders.map(queueWakeForHolder)); + return { + status: 'blocked', + reason: getBlockedReason(concurrencyBlocked, rateBlocked), + retryAfterMs: getRetryAfterMs( + keyState, + Date.now(), + concurrencyBlocked, + rateBlocked + ), + }; + } + + const acquiredAt = new Date(); + const lease = createLease( + parsed.key, + parsed.runId, + parsed.lockIndex, + parsed.definition, + acquiredAt, + parsed.leaseTtlMs + ); + + keyState.leases.push(lease); + + if (parsed.definition.rate) { + insertToken( + keyState, + lockId, + acquiredAt, + parsed.definition.rate.periodMs + ); + } + + state.keys[parsed.key] = keyState; + await writeState(state); + await Promise.all(wakeHolders.map(queueWakeForHolder)); + + return { + status: 'acquired', + lease, + }; + }); + }, + + async release(request) { + const parsed = LimitReleaseRequestSchema.parse(request); + + await withStateLock(async () => { + const state = cloneState(await readState()); + const wakeHolders: string[] = []; + + for (const [key, keyStateValue] of Object.entries(state.keys)) { + const keyState = pruneKeyState(keyStateValue); + const beforeLeases = keyState.leases.length; + keyState.leases = keyState.leases.filter((lease) => { + if (lease.leaseId !== parsed.leaseId) return true; + if (parsed.key && lease.key !== parsed.key) return true; + if (parsed.lockId && lease.lockId !== parsed.lockId) { + return true; + } + return false; + }); + + if (keyState.leases.length !== beforeLeases) { + const promoted = await promoteWaiters(key, keyState); + state.keys[key] = promoted.keyState; + wakeHolders.push(...promoted.wakeHolders); + } else { + state.keys[key] = keyState; + } + + deleteEmptyKey(state, key); + } + + await writeState(state); + await Promise.all(wakeHolders.map(queueWakeForHolder)); + }); + }, + + async heartbeat(request) { + const parsed = LimitHeartbeatRequestSchema.parse(request); + + return withStateLock(async () => { + const state = cloneState(await readState()); + const now = Date.now(); + + for (const [key, keyStateValue] of Object.entries(state.keys)) { + const keyState = pruneKeyState(keyStateValue, now); + const leaseIndex = keyState.leases.findIndex( + (lease) => lease.leaseId === parsed.leaseId + ); + + if (leaseIndex === -1) { + state.keys[key] = keyState; + continue; + } + + const lease = keyState.leases[leaseIndex]; + const currentExpiry = lease.expiresAt?.getTime(); + const ttlMs = + parsed.ttlMs ?? (currentExpiry ? currentExpiry - now : 30_000); + const updatedLease: LimitLease = { + ...lease, + expiresAt: new Date(now + Math.max(1, ttlMs)), + }; + + keyState.leases[leaseIndex] = updatedLease; + state.keys[key] = keyState; + await writeState(state); + return updatedLease; + } + + throw new WorkflowWorldError(`Lease "${parsed.leaseId}" not found`); + }); + }, + }; +} diff --git a/packages/world-local/src/queue.test.ts b/packages/world-local/src/queue.test.ts index 32c8d1f834..f07677fe49 100644 --- a/packages/world-local/src/queue.test.ts +++ b/packages/world-local/src/queue.test.ts @@ -2,11 +2,6 @@ import type { StepInvokePayload } from '@workflow/world'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { createQueue } from './queue'; -// Mock node:timers/promises so setTimeout resolves immediately -vi.mock('node:timers/promises', () => ({ - setTimeout: vi.fn().mockResolvedValue(undefined), -})); - const stepPayload: StepInvokePayload = { workflowName: 'test-workflow', workflowRunId: 'run_01ABC', @@ -18,11 +13,13 @@ describe('queue timeout re-enqueue', () => { let localQueue: ReturnType; beforeEach(() => { + vi.useFakeTimers(); localQueue = createQueue({ baseUrl: 'http://localhost:3000' }); }); afterEach(async () => { await localQueue.close(); + vi.useRealTimers(); }); it('createQueueHandler returns 200 with timeoutSeconds in the body', async () => { @@ -72,29 +69,6 @@ describe('queue timeout re-enqueue', () => { expect(body).toEqual({ ok: true }); }); - it('createQueueHandler returns 200 with timeoutSeconds: 0', async () => { - const handler = localQueue.createQueueHandler('__wkf_step_', async () => ({ - timeoutSeconds: 0, - })); - - const req = new Request('http://localhost/step', { - method: 'POST', - headers: { - 'content-type': 'application/json', - 'x-vqs-queue-name': '__wkf_step_test', - 'x-vqs-message-id': 'msg_01ABC', - 'x-vqs-message-attempt': '1', - }, - body: JSON.stringify(stepPayload), - }); - - const response = await handler(req); - expect(response.status).toBe(200); - - const body = await response.json(); - expect(body).toEqual({ timeoutSeconds: 0 }); - }); - it('queue retries when handler returns timeoutSeconds > 0', async () => { let callCount = 0; const handler = localQueue.createQueueHandler('__wkf_step_', async () => { @@ -102,25 +76,18 @@ describe('queue timeout re-enqueue', () => { if (callCount < 3) { return { timeoutSeconds: 5 }; } - // Third call succeeds normally return undefined; }); localQueue.registerHandler('__wkf_step_', handler); await localQueue.queue('__wkf_step_test' as any, stepPayload); + await vi.runAllTimersAsync(); - // Wait for the async queue processing to complete - // The queue fires off processing asynchronously, so we need to wait - await vi.waitFor(() => { - expect(callCount).toBe(3); - }); + expect(callCount).toBe(3); }); it('queue retries immediately when handler returns timeoutSeconds: 0', async () => { - const { setTimeout: mockSetTimeout } = await import('node:timers/promises'); - vi.mocked(mockSetTimeout).mockClear(); - let callCount = 0; const handler = localQueue.createQueueHandler('__wkf_step_', async () => { callCount++; @@ -133,12 +100,37 @@ describe('queue timeout re-enqueue', () => { localQueue.registerHandler('__wkf_step_', handler); await localQueue.queue('__wkf_step_test' as any, stepPayload); + await vi.runAllTimersAsync(); - await vi.waitFor(() => { - expect(callCount).toBe(3); + expect(callCount).toBe(3); + }); + + it('replaces delayed idempotent deliveries with an immediate wake-up', async () => { + const seenStepIds: string[] = []; + const handler = localQueue.createQueueHandler( + '__wkf_step_', + async (body) => { + seenStepIds.push((body as StepInvokePayload).stepId); + return undefined; + } + ); + + localQueue.registerHandler('__wkf_step_', handler); + + await localQueue.queue('__wkf_step_test' as any, stepPayload, { + idempotencyKey: 'step_01ABC', + delaySeconds: 30, }); + await localQueue.queue( + '__wkf_step_test' as any, + { ...stepPayload, stepId: 'step_replacement' }, + { + idempotencyKey: 'step_01ABC', + } + ); + + await vi.runAllTimersAsync(); - // setTimeout should NOT have been called for timeoutSeconds: 0 - expect(mockSetTimeout).not.toHaveBeenCalled(); + expect(seenStepIds).toEqual(['step_replacement']); }); }); diff --git a/packages/world-local/src/queue.ts b/packages/world-local/src/queue.ts index fd3b511509..c356730daf 100644 --- a/packages/world-local/src/queue.ts +++ b/packages/world-local/src/queue.ts @@ -1,4 +1,3 @@ -import { setTimeout } from 'node:timers/promises'; import { JsonTransport } from '@vercel/queue'; import { MessageId, type Queue, ValidQueueName } from '@workflow/world'; import { Sema } from 'async-sema'; @@ -9,20 +8,10 @@ import type { Config } from './config.js'; import { resolveBaseUrl } from './config.js'; import { getPackageInfo } from './init.js'; -// For local queue, there is no technical limit on the message visibility lifespan, -// but the environment variable can be used for testing purposes to set a max visibility limit. const LOCAL_QUEUE_MAX_VISIBILITY = parseInt(process.env.WORKFLOW_LOCAL_QUEUE_MAX_VISIBILITY ?? '0', 10) || Infinity; -// Maximum safe delay for setTimeout in Node.js (2^31 - 1 milliseconds ≈ 24.85 days) -// Larger values cause "TimeoutOverflowWarning: X does not fit into a 32-bit signed integer" -// When the clamped timeout fires, the handler will recalculate remaining time from -// persistent state and return another timeoutSeconds if needed. -const MAX_SAFE_TIMEOUT_MS = 2147483647; - -// The local workers share the same Node.js process and event loop, -// so we need to limit concurrency to avoid overwhelming the system. const DEFAULT_CONCURRENCY_LIMIT = 1000; const WORKFLOW_LOCAL_QUEUE_CONCURRENCY = parseInt(process.env.WORKFLOW_LOCAL_QUEUE_CONCURRENCY ?? '0', 10) || @@ -31,15 +20,27 @@ const WORKFLOW_LOCAL_QUEUE_CONCURRENCY = export type DirectHandler = (req: Request) => Promise; export type LocalQueue = Queue & { - /** Close the HTTP agent and release resources. */ close(): Promise; - /** Register a direct in-process handler for a queue prefix, bypassing HTTP. */ registerHandler( prefix: '__wkf_step_' | '__wkf_workflow_', handler: DirectHandler ): void; }; +type ScheduledMessage = { + attempt: number; + body: Uint8Array; + headers?: Record; + idempotencyKey?: string; + messageId: MessageId; + pendingExecution: boolean; + queueName: ValidQueueName; + remainingServerRetries: number; + running: boolean; + timer?: ReturnType; + version: number; +}; + function getQueueRoute(queueName: ValidQueueName): { pathname: 'flow' | 'step'; prefix: '__wkf_step_' | '__wkf_workflow_'; @@ -54,11 +55,6 @@ function getQueueRoute(queueName: ValidQueueName): { } export function createQueue(config: Partial): LocalQueue { - // Create a custom agent optimized for high-concurrency local workflows: - // - headersTimeout: 0 allows long-running steps - // - connections: 1000 allows many parallel connections to the same host - // - pipelining: 1 (default) for HTTP/1.1 compatibility - // - keepAliveTimeout: 30s keeps connections warm for rapid step execution const httpAgent = new Agent({ headersTimeout: 0, connections: 1000, @@ -67,139 +63,240 @@ export function createQueue(config: Partial): LocalQueue { const transport = new JsonTransport(); const generateId = monotonicFactory(); const semaphore = new Sema(WORKFLOW_LOCAL_QUEUE_CONCURRENCY); - - /** - * holds inflight messages by idempotency key to ensure - * that we don't queue the same message multiple times - */ - const inflightMessages = new Map(); - /** Direct in-process handlers by queue prefix, bypassing HTTP when set. */ + const scheduledMessages = new Map(); const directHandlers = new Map(); + let closed = false; - const queue: Queue['queue'] = async (queueName, message, opts) => { - const cleanup = [] as (() => void)[]; + const cleanupMessage = (message: ScheduledMessage) => { + if (message.timer) { + clearTimeout(message.timer); + message.timer = undefined; + } + if (message.idempotencyKey) { + scheduledMessages.delete(message.idempotencyKey); + } + }; - if (opts?.idempotencyKey) { - const existing = inflightMessages.get(opts.idempotencyKey); - if (existing) { - return { messageId: existing }; - } + const scheduleExecution = (message: ScheduledMessage, delayMs: number) => { + if (closed) { + cleanupMessage(message); + return; } - const body = transport.serialize(message); - const { pathname, prefix } = getQueueRoute(queueName); - const messageId = MessageId.parse(`msg_${generateId()}`); + if (message.timer) { + clearTimeout(message.timer); + message.timer = undefined; + } - if (opts?.idempotencyKey) { - const key = opts.idempotencyKey; - inflightMessages.set(key, messageId); - cleanup.push(() => { - inflightMessages.delete(key); - }); + const version = ++message.version; + const enqueueRun = () => { + message.pendingExecution = true; + if (!message.running) { + void executeMessage(message); + } + }; + + if (delayMs <= 0) { + enqueueRun(); + return; } - (async () => { - const token = semaphore.tryAcquire(); - if (!token) { - console.warn( - `[world-local]: concurrency limit (${WORKFLOW_LOCAL_QUEUE_CONCURRENCY}) reached, waiting for queue to free up` - ); - await semaphore.acquire(); + message.timer = globalThis.setTimeout(() => { + if (message.version !== version || closed) { + return; } + message.timer = undefined; + enqueueRun(); + }, delayMs); + }; + + const deliverMessage = async ( + message: ScheduledMessage + ): Promise< + | { kind: 'success' } + | { kind: 'timeout'; delayMs: number } + | { kind: 'server_error'; status: number; text: string } + > => { + const { pathname, prefix } = getQueueRoute(message.queueName); + const headers: Record = { + ...message.headers, + 'content-type': 'application/json', + 'x-vqs-queue-name': message.queueName, + 'x-vqs-message-id': message.messageId, + 'x-vqs-message-attempt': String(message.attempt + 1), + }; + const directHandler = directHandlers.get(prefix); + let response: Response; + + if (directHandler) { + const req = new Request( + `http://localhost/.well-known/workflow/v1/${pathname}`, + { + method: 'POST', + headers, + body: message.body, + } + ); + response = await directHandler(req); + } else { + const baseUrl = await resolveBaseUrl(config); + response = await fetch(`${baseUrl}/.well-known/workflow/v1/${pathname}`, { + method: 'POST', + duplex: 'half', + dispatcher: httpAgent, + headers, + body: message.body, + } as any); + } + + const text = await response.text(); + + if (response.ok) { try { - const maxAttempts = 3; - let defaultRetriesLeft = maxAttempts; - for (let attempt = 0; defaultRetriesLeft > 0; attempt++) { - defaultRetriesLeft--; - - const headers: Record = { - ...opts?.headers, - 'content-type': 'application/json', - 'x-vqs-queue-name': queueName, - 'x-vqs-message-id': messageId, - 'x-vqs-message-attempt': String(attempt + 1), + const timeoutSeconds = Number(JSON.parse(text).timeoutSeconds); + if (Number.isFinite(timeoutSeconds) && timeoutSeconds >= 0) { + return { + kind: 'timeout', + delayMs: timeoutSeconds > 0 ? timeoutSeconds * 1000 : 0, }; - const directHandler = directHandlers.get(prefix); - let response: Response; - - if (directHandler) { - const req = new Request( - `http://localhost/.well-known/workflow/v1/${pathname}`, - { - method: 'POST', - headers, - body, - } - ); - response = await directHandler(req); - } else { - const baseUrl = await resolveBaseUrl(config); - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- undici v7 dispatcher types don't match @types/node's RequestInit - response = await fetch( - `${baseUrl}/.well-known/workflow/v1/${pathname}`, - { - method: 'POST', - duplex: 'half', - dispatcher: httpAgent, - headers, - body, - } as any - ); + } + } catch {} + + return { kind: 'success' }; + } + + return { + kind: 'server_error', + status: response.status, + text, + }; + }; + + const executeMessage = async (message: ScheduledMessage): Promise => { + if (closed || message.running) { + return; + } + + message.running = true; + + try { + while (message.pendingExecution && !closed) { + message.pendingExecution = false; + const version = message.version; + const token = semaphore.tryAcquire(); + if (!token) { + console.warn( + `[world-local]: concurrency limit (${WORKFLOW_LOCAL_QUEUE_CONCURRENCY}) reached, waiting for queue to free up` + ); + await semaphore.acquire(); + } + + try { + if (closed) { + cleanupMessage(message); + return; + } + + if (version !== message.version) { + continue; } - const text = await response.text(); - - if (response.ok) { - try { - const timeoutSeconds = Number(JSON.parse(text).timeoutSeconds); - if (Number.isFinite(timeoutSeconds) && timeoutSeconds >= 0) { - // Clamp to MAX_SAFE_TIMEOUT_MS to avoid Node.js setTimeout overflow warning. - // When this fires early, the handler recalculates remaining time from - // persistent state and returns another timeoutSeconds if needed. - if (timeoutSeconds > 0) { - const timeoutMs = Math.min( - timeoutSeconds * 1000, - MAX_SAFE_TIMEOUT_MS - ); - await setTimeout(timeoutMs); - } - defaultRetriesLeft++; - continue; - } - } catch {} + const result = await deliverMessage(message); + + if (result.kind === 'success') { + cleanupMessage(message); return; } + if (result.kind === 'timeout') { + message.attempt += 1; + scheduleExecution( + message, + result.delayMs === 0 + ? 0 + : Math.min(result.delayMs, LOCAL_QUEUE_MAX_VISIBILITY * 1000) + ); + continue; + } + console.error( - `[world-local] Queue message failed (attempt ${attempt + 1}/${maxAttempts}, status ${response.status}): ${text}`, - { queueName, messageId } + `[world-local] Queue message failed (attempt ${ + message.attempt + 1 + }/3, status ${result.status}): ${result.text}`, + { queueName: message.queueName, messageId: message.messageId } ); + + message.attempt += 1; + message.remainingServerRetries -= 1; + if (message.remainingServerRetries > 0) { + scheduleExecution(message, 0); + continue; + } + + console.error(`[world-local] Queue message exhausted all retries`, { + queueName: message.queueName, + messageId: message.messageId, + }); + cleanupMessage(message); + return; + } finally { + semaphore.release(); } + } + } catch (err) { + const queueError = err as { name?: string }; + const isAbortError = + queueError.name === 'AbortError' || + queueError.name === 'ResponseAborted'; + if (!isAbortError) { + console.error('[local world] Queue operation failed:', err); + } + cleanupMessage(message); + } finally { + message.running = false; + if (message.pendingExecution && !closed) { + void executeMessage(message); + } + } + }; - console.error(`[world-local] Queue message exhausted all retries`, { - queueName, - messageId, - }); - } finally { - semaphore.release(); + const queue: Queue['queue'] = async (queueName, message, opts) => { + const body = transport.serialize(message); + const delayMs = + typeof opts?.delaySeconds === 'number' && opts.delaySeconds > 0 + ? opts.delaySeconds * 1000 + : 0; + + if (opts?.idempotencyKey) { + const existing = scheduledMessages.get(opts.idempotencyKey); + if (existing) { + existing.queueName = queueName; + existing.body = body; + existing.headers = opts.headers; + scheduleExecution(existing, delayMs); + return { messageId: existing.messageId }; } - })() - .catch((err) => { - // Silently ignore client disconnect errors (e.g., browser refresh during streaming) - // These are expected and should not cause unhandled rejection warnings - const isAbortError = - err?.name === 'AbortError' || err?.name === 'ResponseAborted'; - if (!isAbortError) { - console.error('[local world] Queue operation failed:', err); - } - }) - .finally(() => { - for (const fn of cleanup) { - fn(); - } - }); + } + + const scheduledMessage: ScheduledMessage = { + attempt: 0, + body, + headers: opts?.headers, + idempotencyKey: opts?.idempotencyKey, + messageId: MessageId.parse(`msg_${generateId()}`), + pendingExecution: false, + queueName, + remainingServerRetries: 3, + running: false, + version: 0, + }; - return { messageId }; + if (opts?.idempotencyKey) { + scheduledMessages.set(opts.idempotencyKey, scheduledMessage); + } + + scheduleExecution(scheduledMessage, delayMs); + return { messageId: scheduledMessage.messageId }; }; const HeaderParser = z.object({ @@ -270,6 +367,11 @@ export function createQueue(config: Partial): LocalQueue { directHandlers.set(prefix, handler); }, async close() { + closed = true; + for (const message of scheduledMessages.values()) { + cleanupMessage(message); + } + scheduledMessages.clear(); await httpAgent.close(); }, }; diff --git a/packages/world-postgres/README.md b/packages/world-postgres/README.md index bfb617c9b6..1b48974de8 100644 --- a/packages/world-postgres/README.md +++ b/packages/world-postgres/README.md @@ -117,6 +117,7 @@ Make sure your PostgreSQL database is accessible and the user has sufficient per - **Durable Storage**: Stores workflow runs, events, steps, hooks, and webhooks in PostgreSQL - **Queue Processing**: Uses graphile-worker as the durable queue and executes jobs over the workflow HTTP routes - **Durable Delays**: Re-schedules waits and retries in PostgreSQL +- **Flow Limits**: Implements the shared concurrency/rate-limit contract with PostgreSQL-backed leases, rate tokens, FIFO waiters, and prompt wake-ups - **Streaming**: Real-time event streaming capabilities - **Health Checks**: Built-in connection health monitoring - **Configurable Concurrency**: Adjustable worker concurrency for queue processing @@ -127,8 +128,13 @@ Make sure your PostgreSQL database is accessible and the user has sufficient per - Graphile jobs are acknowledged only after the workflow or step execution finishes, or after the worker durably schedules a delayed follow-up job - Backlog stays in PostgreSQL when all execution slots are busy - Retry and sleep-style delays use Graphile `runAt` scheduling +- Flow-limit waiters are stored durably in PostgreSQL and promoted in FIFO order per key +- Cancelled workflow waiters are pruned before promotion +- Blocked steps are re-queued instead of holding a worker slot while waiting for a lease - Workflow and step execution is sent through `/.well-known/workflow/v1/flow` and `/.well-known/workflow/v1/step` +PostgreSQL's main advantage over the local world is durability of the queue/backlog itself across host or process loss. The flow-limit behavior is intended to match other implemented worlds while the process is alive. + ## Development For local development, you can use the included Docker Compose configuration: diff --git a/packages/world-postgres/src/drizzle/migrations/0010_add_flow_limits.sql b/packages/world-postgres/src/drizzle/migrations/0010_add_flow_limits.sql new file mode 100644 index 0000000000..01892d0bfe --- /dev/null +++ b/packages/world-postgres/src/drizzle/migrations/0010_add_flow_limits.sql @@ -0,0 +1,35 @@ +CREATE TABLE "workflow"."workflow_limit_leases" ( + "lease_id" varchar PRIMARY KEY NOT NULL, + "limit_key" varchar NOT NULL, + "holder_id" varchar NOT NULL, + "acquired_at" timestamp DEFAULT now() NOT NULL, + "expires_at" timestamp, + "concurrency_max" integer, + "rate_count" integer, + "rate_period_ms" integer +); +--> statement-breakpoint +CREATE TABLE "workflow"."workflow_limit_tokens" ( + "token_id" varchar PRIMARY KEY NOT NULL, + "limit_key" varchar NOT NULL, + "holder_id" varchar NOT NULL, + "acquired_at" timestamp DEFAULT now() NOT NULL, + "expires_at" timestamp NOT NULL +); +--> statement-breakpoint +CREATE TABLE "workflow"."workflow_limit_waiters" ( + "waiter_id" varchar PRIMARY KEY NOT NULL, + "limit_key" varchar NOT NULL, + "holder_id" varchar NOT NULL, + "created_at" timestamp DEFAULT now() NOT NULL, + "lease_ttl_ms" integer, + "concurrency_max" integer, + "rate_count" integer, + "rate_period_ms" integer +); +--> statement-breakpoint +CREATE UNIQUE INDEX "workflow_limit_leases_limit_key_holder_id_index" ON "workflow"."workflow_limit_leases" USING btree ("limit_key","holder_id");--> statement-breakpoint +CREATE INDEX "workflow_limit_leases_limit_key_expires_at_index" ON "workflow"."workflow_limit_leases" USING btree ("limit_key","expires_at");--> statement-breakpoint +CREATE INDEX "workflow_limit_tokens_limit_key_expires_at_index" ON "workflow"."workflow_limit_tokens" USING btree ("limit_key","expires_at");--> statement-breakpoint +CREATE UNIQUE INDEX "workflow_limit_waiters_limit_key_holder_id_index" ON "workflow"."workflow_limit_waiters" USING btree ("limit_key","holder_id");--> statement-breakpoint +CREATE INDEX "workflow_limit_waiters_limit_key_created_at_index" ON "workflow"."workflow_limit_waiters" USING btree ("limit_key","created_at");--> statement-breakpoint diff --git a/packages/world-postgres/src/drizzle/migrations/meta/0010_snapshot.json b/packages/world-postgres/src/drizzle/migrations/meta/0010_snapshot.json new file mode 100644 index 0000000000..97ddba3774 --- /dev/null +++ b/packages/world-postgres/src/drizzle/migrations/meta/0010_snapshot.json @@ -0,0 +1,973 @@ +{ + "id": "c4af56df-d588-4810-a8b4-f4eb68b270b2", + "prevId": "7adbbd35-ca90-4353-bb34-3d1b2435a027", + "version": "7", + "dialect": "postgresql", + "tables": { + "workflow.workflow_events": { + "name": "workflow_events", + "schema": "workflow", + "columns": { + "id": { + "name": "id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "type": { + "name": "type", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "correlation_id": { + "name": "correlation_id", + "type": "varchar", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "run_id": { + "name": "run_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "payload": { + "name": "payload", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "payload_cbor": { + "name": "payload_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "spec_version": { + "name": "spec_version", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "workflow_events_run_id_index": { + "name": "workflow_events_run_id_index", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "workflow_events_correlation_id_index": { + "name": "workflow_events_correlation_id_index", + "columns": [ + { + "expression": "correlation_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_hooks": { + "name": "workflow_hooks", + "schema": "workflow", + "columns": { + "run_id": { + "name": "run_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "hook_id": { + "name": "hook_id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "token": { + "name": "token", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "owner_id": { + "name": "owner_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "project_id": { + "name": "project_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "environment": { + "name": "environment", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "metadata": { + "name": "metadata", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "metadata_cbor": { + "name": "metadata_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "spec_version": { + "name": "spec_version", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "is_webhook": { + "name": "is_webhook", + "type": "boolean", + "primaryKey": false, + "notNull": false, + "default": true + } + }, + "indexes": { + "workflow_hooks_run_id_index": { + "name": "workflow_hooks_run_id_index", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "workflow_hooks_token_index": { + "name": "workflow_hooks_token_index", + "columns": [ + { + "expression": "token", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_limit_leases": { + "name": "workflow_limit_leases", + "schema": "workflow", + "columns": { + "lease_id": { + "name": "lease_id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "limit_key": { + "name": "limit_key", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "holder_id": { + "name": "holder_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "acquired_at": { + "name": "acquired_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "concurrency_max": { + "name": "concurrency_max", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "rate_count": { + "name": "rate_count", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "rate_period_ms": { + "name": "rate_period_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "workflow_limit_leases_limit_key_holder_id_index": { + "name": "workflow_limit_leases_limit_key_holder_id_index", + "columns": [ + { + "expression": "limit_key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "holder_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "workflow_limit_leases_limit_key_expires_at_index": { + "name": "workflow_limit_leases_limit_key_expires_at_index", + "columns": [ + { + "expression": "limit_key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_limit_tokens": { + "name": "workflow_limit_tokens", + "schema": "workflow", + "columns": { + "token_id": { + "name": "token_id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "limit_key": { + "name": "limit_key", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "holder_id": { + "name": "holder_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "acquired_at": { + "name": "acquired_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "expires_at": { + "name": "expires_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "workflow_limit_tokens_limit_key_expires_at_index": { + "name": "workflow_limit_tokens_limit_key_expires_at_index", + "columns": [ + { + "expression": "limit_key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "expires_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_limit_waiters": { + "name": "workflow_limit_waiters", + "schema": "workflow", + "columns": { + "waiter_id": { + "name": "waiter_id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "limit_key": { + "name": "limit_key", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "holder_id": { + "name": "holder_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "lease_ttl_ms": { + "name": "lease_ttl_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "concurrency_max": { + "name": "concurrency_max", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "rate_count": { + "name": "rate_count", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "rate_period_ms": { + "name": "rate_period_ms", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "workflow_limit_waiters_limit_key_holder_id_index": { + "name": "workflow_limit_waiters_limit_key_holder_id_index", + "columns": [ + { + "expression": "limit_key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "holder_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "workflow_limit_waiters_limit_key_created_at_index": { + "name": "workflow_limit_waiters_limit_key_created_at_index", + "columns": [ + { + "expression": "limit_key", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_runs": { + "name": "workflow_runs", + "schema": "workflow", + "columns": { + "id": { + "name": "id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "output": { + "name": "output", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "output_cbor": { + "name": "output_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "deployment_id": { + "name": "deployment_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "spec_version": { + "name": "spec_version", + "type": "integer", + "primaryKey": false, + "notNull": false + }, + "execution_context": { + "name": "execution_context", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "execution_context_cbor": { + "name": "execution_context_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "input": { + "name": "input", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "input_cbor": { + "name": "input_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "error_cbor": { + "name": "error_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "started_at": { + "name": "started_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "expired_at": { + "name": "expired_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "workflow_runs_name_index": { + "name": "workflow_runs_name_index", + "columns": [ + { + "expression": "name", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "workflow_runs_status_index": { + "name": "workflow_runs_status_index", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_steps": { + "name": "workflow_steps", + "schema": "workflow", + "columns": { + "run_id": { + "name": "run_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "step_id": { + "name": "step_id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "step_name": { + "name": "step_name", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "step_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "input": { + "name": "input", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "input_cbor": { + "name": "input_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "output": { + "name": "output", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "output_cbor": { + "name": "output_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "error": { + "name": "error", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "error_cbor": { + "name": "error_cbor", + "type": "bytea", + "primaryKey": false, + "notNull": false + }, + "attempt": { + "name": "attempt", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "started_at": { + "name": "started_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "retry_after": { + "name": "retry_after", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "spec_version": { + "name": "spec_version", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "workflow_steps_run_id_index": { + "name": "workflow_steps_run_id_index", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "workflow_steps_status_index": { + "name": "workflow_steps_status_index", + "columns": [ + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_stream_chunks": { + "name": "workflow_stream_chunks", + "schema": "workflow", + "columns": { + "id": { + "name": "id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "stream_id": { + "name": "stream_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "run_id": { + "name": "run_id", + "type": "varchar", + "primaryKey": false, + "notNull": false + }, + "data": { + "name": "data", + "type": "bytea", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "eof": { + "name": "eof", + "type": "boolean", + "primaryKey": false, + "notNull": true + } + }, + "indexes": { + "workflow_stream_chunks_run_id_index": { + "name": "workflow_stream_chunks_run_id_index", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": { + "workflow_stream_chunks_stream_id_id_pk": { + "name": "workflow_stream_chunks_stream_id_id_pk", + "columns": ["stream_id", "id"] + } + }, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "workflow.workflow_waits": { + "name": "workflow_waits", + "schema": "workflow", + "columns": { + "wait_id": { + "name": "wait_id", + "type": "varchar", + "primaryKey": true, + "notNull": true + }, + "run_id": { + "name": "run_id", + "type": "varchar", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "wait_status", + "typeSchema": "public", + "primaryKey": false, + "notNull": true + }, + "resume_at": { + "name": "resume_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "completed_at": { + "name": "completed_at", + "type": "timestamp", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "spec_version": { + "name": "spec_version", + "type": "integer", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "workflow_waits_run_id_index": { + "name": "workflow_waits_run_id_index", + "columns": [ + { + "expression": "run_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": { + "public.step_status": { + "name": "step_status", + "schema": "public", + "values": ["pending", "running", "completed", "failed", "cancelled"] + }, + "public.wait_status": { + "name": "wait_status", + "schema": "public", + "values": ["waiting", "completed"] + }, + "public.status": { + "name": "status", + "schema": "public", + "values": ["pending", "running", "completed", "failed", "cancelled"] + } + }, + "schemas": { + "workflow": "workflow" + }, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} diff --git a/packages/world-postgres/src/drizzle/migrations/meta/_journal.json b/packages/world-postgres/src/drizzle/migrations/meta/_journal.json index f4956666fc..e98c400c01 100644 --- a/packages/world-postgres/src/drizzle/migrations/meta/_journal.json +++ b/packages/world-postgres/src/drizzle/migrations/meta/_journal.json @@ -71,6 +71,13 @@ "when": 1770500000000, "tag": "0009_add_is_webhook", "breakpoints": true + }, + { + "idx": 10, + "version": "7", + "when": 1773863098757, + "tag": "0010_add_flow_limits", + "breakpoints": true } ] } diff --git a/packages/world-postgres/src/drizzle/schema.ts b/packages/world-postgres/src/drizzle/schema.ts index f353ef8ca1..b6e8205237 100644 --- a/packages/world-postgres/src/drizzle/schema.ts +++ b/packages/world-postgres/src/drizzle/schema.ts @@ -21,6 +21,7 @@ import { primaryKey, text, timestamp, + uniqueIndex, varchar, } from 'drizzle-orm/pg-core'; import { Cbor, type Cborized } from './cbor.js'; @@ -192,6 +193,54 @@ export const waits = schema.table( (tb) => [index().on(tb.runId)] ); +export const limitLeases = schema.table( + 'workflow_limit_leases', + { + leaseId: varchar('lease_id').primaryKey(), + limitKey: varchar('limit_key').notNull(), + holderId: varchar('holder_id').notNull(), + acquiredAt: timestamp('acquired_at').defaultNow().notNull(), + expiresAt: timestamp('expires_at'), + concurrencyMax: integer('concurrency_max'), + rateCount: integer('rate_count'), + ratePeriodMs: integer('rate_period_ms'), + }, + (tb) => [ + uniqueIndex().on(tb.limitKey, tb.holderId), + index().on(tb.limitKey, tb.expiresAt), + ] +); + +export const limitTokens = schema.table( + 'workflow_limit_tokens', + { + tokenId: varchar('token_id').primaryKey(), + limitKey: varchar('limit_key').notNull(), + holderId: varchar('holder_id').notNull(), + acquiredAt: timestamp('acquired_at').defaultNow().notNull(), + expiresAt: timestamp('expires_at').notNull(), + }, + (tb) => [index().on(tb.limitKey, tb.expiresAt)] +); + +export const limitWaiters = schema.table( + 'workflow_limit_waiters', + { + waiterId: varchar('waiter_id').primaryKey(), + limitKey: varchar('limit_key').notNull(), + holderId: varchar('holder_id').notNull(), + createdAt: timestamp('created_at').defaultNow().notNull(), + leaseTtlMs: integer('lease_ttl_ms'), + concurrencyMax: integer('concurrency_max'), + rateCount: integer('rate_count'), + ratePeriodMs: integer('rate_period_ms'), + }, + (tb) => [ + uniqueIndex().on(tb.limitKey, tb.holderId), + index().on(tb.limitKey, tb.createdAt), + ] +); + const bytea = customType<{ data: Buffer; notNull: false; default: false }>({ dataType() { return 'bytea'; diff --git a/packages/world-postgres/src/index.ts b/packages/world-postgres/src/index.ts index 6f2993e3db..ad1a4c0028 100644 --- a/packages/world-postgres/src/index.ts +++ b/packages/world-postgres/src/index.ts @@ -3,6 +3,7 @@ import type { Storage, World } from '@workflow/world'; import createPostgres from 'postgres'; import type { PostgresWorldConfig } from './config.js'; import { createClient, type Drizzle } from './drizzle/index.js'; +import { createLimits } from './limits.js'; import { createQueue } from './queue.js'; import { createEventsStorage, @@ -37,8 +38,10 @@ export function createWorld( const queue = createQueue(config, postgres); const storage = createStorage(drizzle); const streamer = createStreamer(postgres, drizzle); + const limits = createLimits(config, drizzle); return { + limits, ...storage, ...streamer, ...queue, diff --git a/packages/world-postgres/src/limits.test.ts b/packages/world-postgres/src/limits.test.ts new file mode 100644 index 0000000000..44ab39f16e --- /dev/null +++ b/packages/world-postgres/src/limits.test.ts @@ -0,0 +1,78 @@ +import { afterAll, beforeAll, beforeEach, test } from 'vitest'; +import { createLimitsContractSuite } from '../../world-testing/src/limits-contract.js'; +import { createLimits } from './limits.js'; +import { + createEventsStorage, + createRunsStorage, + createStepsStorage, +} from './storage.js'; +import { createQueue } from './queue.js'; + +if (process.platform === 'win32') { + test.skip('skipped on Windows since it relies on a docker container', () => {}); +} else { + let db: Awaited< + ReturnType + >; + + beforeAll(async () => { + const { createPostgresTestDb } = await import('../test/test-db.js'); + db = await createPostgresTestDb(); + const queue = createQueue( + { connectionString: db.connectionString, queueConcurrency: 1 }, + db.sql + ); + await queue.start(); + await queue.close(); + }, 120_000); + + beforeEach(async () => { + await db.truncateLimits(); + }); + + afterAll(async () => { + await db?.close(); + }); + + createLimitsContractSuite('postgres world limits', async () => { + return { + limits: createLimits( + { connectionString: db.connectionString, queueConcurrency: 1 }, + db.drizzle + ), + storage: { + runs: createRunsStorage(db.drizzle), + steps: createStepsStorage(db.drizzle), + events: createEventsStorage(db.drizzle), + }, + inspectKeyState: async (key) => { + const [leases, waiters, tokens] = await Promise.all([ + db.sql<{ lockId: string }[]>` + select holder_id as "lockId" + from workflow.workflow_limit_leases + where limit_key = ${key} + order by holder_id asc + `, + db.sql<{ lockId: string }[]>` + select holder_id as "lockId" + from workflow.workflow_limit_waiters + where limit_key = ${key} + order by created_at asc, holder_id asc + `, + db.sql<{ lockId: string }[]>` + select holder_id as "lockId" + from workflow.workflow_limit_tokens + where limit_key = ${key} + order by acquired_at asc, holder_id asc + `, + ]); + + return { + leaseHolderIds: leases.map((row) => row.lockId), + waiterHolderIds: waiters.map((row) => row.lockId), + tokenHolderIds: tokens.map((row) => row.lockId), + }; + }, + }; + }); +} diff --git a/packages/world-postgres/src/limits.ts b/packages/world-postgres/src/limits.ts new file mode 100644 index 0000000000..22220ff8e3 --- /dev/null +++ b/packages/world-postgres/src/limits.ts @@ -0,0 +1,579 @@ +import { JsonTransport } from '@vercel/queue'; +import { and, asc, eq, isNotNull, lte, sql } from 'drizzle-orm'; +import { WorkflowWorldError } from '@workflow/errors'; +import { + createLockId, + createLockWakeCorrelationId, + LimitAcquireRequestSchema, + type LimitAcquireResult, + LimitHeartbeatRequestSchema, + type LimitLease, + LimitReleaseRequestSchema, + type Limits, + MessageId, + parseLockId, +} from '@workflow/world'; +import { monotonicFactory } from 'ulid'; +import type { PostgresWorldConfig } from './config.js'; +import type { Drizzle } from './drizzle/index.js'; +import * as Schema from './drizzle/schema.js'; +import { MessageData } from './message.js'; + +type LeaseRow = typeof Schema.limitLeases.$inferSelect; +type TokenRow = typeof Schema.limitTokens.$inferSelect; +type WaiterRow = typeof Schema.limitWaiters.$inferSelect; +type RunRow = Pick< + typeof Schema.runs.$inferSelect, + 'workflowName' | 'startedAt' | 'status' +>; +type Tx = Parameters[0]>[0]; +type Db = Drizzle | Tx; + +type HolderTarget = + | { + kind: 'lock'; + runId: string; + correlationId: string; + } + | { + kind: 'opaque'; + }; + +const transport = new JsonTransport(); +const generateId = monotonicFactory(); + +function getQueues(config: PostgresWorldConfig) { + const prefix = config.jobPrefix || 'workflow_'; + return { + workflow: `${prefix}flows`, + } as const; +} + +function nowPlus(ms?: number): Date | undefined { + if (ms === undefined) return undefined; + return new Date(Date.now() + ms); +} + +function toDate(value: Date | string | null | undefined): Date | undefined { + if (value === null || value === undefined) return undefined; + return value instanceof Date ? value : new Date(value); +} + +function toMillis(value: Date | string | null | undefined): number | undefined { + const date = toDate(value); + return date ? date.getTime() : undefined; +} + +/* +Holder ids double as wake-up hints. +When a waiter is promoted, we decode the holder id to decide which queue to poke. +*/ +function parseHolderId(lockId: string): HolderTarget { + const parsedLockId = parseLockId(lockId); + if (parsedLockId) { + return { + kind: 'lock', + runId: parsedLockId.runId, + correlationId: createLockWakeCorrelationId( + parsedLockId.runId, + parsedLockId.lockIndex + ), + }; + } + + return { kind: 'opaque' }; +} + +function toLease(row: LeaseRow): LimitLease { + const parsedLockId = parseLockId(row.holderId); + return { + leaseId: row.leaseId, + key: row.limitKey, + lockId: row.holderId, + runId: parsedLockId?.runId ?? row.holderId, + lockIndex: parsedLockId?.lockIndex ?? 0, + acquiredAt: toDate(row.acquiredAt)!, + expiresAt: toDate(row.expiresAt), + definition: { + concurrency: + row.concurrencyMax !== null ? { max: row.concurrencyMax } : undefined, + rate: + row.rateCount !== null && row.ratePeriodMs !== null + ? { + count: row.rateCount, + periodMs: row.ratePeriodMs, + } + : undefined, + }, + }; +} + +function getBlockedReason( + concurrencyBlocked: boolean, + rateBlocked: boolean +): 'concurrency' | 'rate' | 'concurrency_and_rate' { + if (concurrencyBlocked && rateBlocked) return 'concurrency_and_rate'; + if (concurrencyBlocked) return 'concurrency'; + return 'rate'; +} + +/* +When a workflow or step is blocked, we need to calculate the retry after time. +We do this by finding the earliest expiration time for any leases or tokens. +*/ +function getRetryAfterMs( + leases: LeaseRow[], + tokens: TokenRow[], + now: number, + concurrencyBlocked: boolean, + rateBlocked: boolean +): number | undefined { + const candidates: number[] = []; + + if (concurrencyBlocked) { + for (const lease of leases) { + if (lease.expiresAt) { + candidates.push(Math.max(0, toMillis(lease.expiresAt)! - now)); + } + } + } + + if (rateBlocked) { + for (const token of tokens) { + candidates.push(Math.max(0, toMillis(token.expiresAt)! - now)); + } + } + + if (candidates.length === 0) return undefined; + return Math.min(...candidates); +} + +async function queueWorkflowWake( + tx: Db, + config: PostgresWorldConfig, + runId: string, + workflowName: string, + idempotencyKey: string +) { + const messageId = MessageId.parse(`msg_${generateId()}`); + const payload = MessageData.encode({ + id: workflowName, + data: Buffer.from( + transport.serialize({ + runId, + requestedAt: new Date(), + }) + ), + attempt: 1, + idempotencyKey, + messageId, + }); + + await tx.execute(sql` + select graphile_worker.add_job( + ${getQueues(config).workflow}::text, + payload := ${JSON.stringify(payload)}::json, + max_attempts := 3, + job_key := ${idempotencyKey}::text, + job_key_mode := 'replace' + ) + `); +} + +async function queueWakeForHolder( + tx: Db, + config: PostgresWorldConfig, + holderId: string +) { + /* + Limit state is durable in Postgres, but wake-ups still need a runtime target. + If the workflow is already terminal, there is nothing left to resume. + */ + const target = parseHolderId(holderId); + if (target.kind === 'opaque') { + return; + } + + const [run] = (await tx + .select({ + workflowName: Schema.runs.workflowName, + startedAt: Schema.runs.startedAt, + status: Schema.runs.status, + }) + .from(Schema.runs) + .where(eq(Schema.runs.runId, target.runId)) + .limit(1)) as RunRow[]; + if (!run || ['completed', 'failed', 'cancelled'].includes(run.status)) { + return; + } + + await queueWorkflowWake( + tx, + config, + target.runId, + run.workflowName, + target.correlationId + ); +} + +async function pruneExpired(tx: Db, key: string): Promise { + /* + Capacity is reclaimed opportunistically whenever a key is touched. + This keeps v1 simple and avoids needing a separate cleanup worker. + */ + const now = new Date(); + + await tx + .delete(Schema.limitTokens) + .where( + and( + eq(Schema.limitTokens.limitKey, key), + lte(Schema.limitTokens.expiresAt, now) + ) + ); + + await tx + .delete(Schema.limitLeases) + .where( + and( + eq(Schema.limitLeases.limitKey, key), + isNotNull(Schema.limitLeases.expiresAt), + lte(Schema.limitLeases.expiresAt, now) + ) + ); +} + +async function getActiveState( + tx: Db, + key: string +): Promise<{ + leases: LeaseRow[]; + tokens: TokenRow[]; + waiters: WaiterRow[]; +}> { + const [leases, tokens, waiters] = await Promise.all([ + tx + .select() + .from(Schema.limitLeases) + .where(eq(Schema.limitLeases.limitKey, key)) + .orderBy( + asc(Schema.limitLeases.acquiredAt), + asc(Schema.limitLeases.leaseId) + ), + tx + .select() + .from(Schema.limitTokens) + .where(eq(Schema.limitTokens.limitKey, key)) + .orderBy(asc(Schema.limitTokens.expiresAt)), + tx + .select() + .from(Schema.limitWaiters) + .where(eq(Schema.limitWaiters.limitKey, key)) + .orderBy( + asc(Schema.limitWaiters.createdAt), + asc(Schema.limitWaiters.waiterId) + ), + ]); + + return { leases, tokens, waiters }; +} + +/* +We serialize limit mutations per key inside the transaction so concurrent +acquire/release flows cannot both observe the same free capacity. +*/ +async function lockLimitKey(tx: Db, key: string): Promise { + await tx.execute( + sql`select pg_advisory_xact_lock(hashtextextended(${key}, 0))` + ); +} + +async function isHolderLive(tx: Db, holderId: string): Promise { + const target = parseHolderId(holderId); + if (target.kind === 'opaque') { + return true; + } + + const [run] = (await tx + .select({ + status: Schema.runs.status, + }) + .from(Schema.runs) + .where(eq(Schema.runs.runId, target.runId)) + .limit(1)) as Pick[]; + + return !!run && !['completed', 'failed', 'cancelled'].includes(run.status); +} + +async function promoteWaiters( + tx: Db, + config: PostgresWorldConfig, + key: string +): Promise { + /* + We walk waiters in FIFO order and stop at the first waiter that is still blocked. + Later waiters cannot jump ahead of an earlier waiter for the same key. (getActiveState returns waiters in FIFO order) + */ + const state = await getActiveState(tx, key); + let activeLeases = state.leases.length; + let activeTokens = state.tokens.length; + + for (const waiter of state.waiters) { + if (!(await isHolderLive(tx, waiter.holderId))) { + await tx + .delete(Schema.limitWaiters) + .where(eq(Schema.limitWaiters.waiterId, waiter.waiterId)); + continue; + } + + const concurrencyBlocked = + waiter.concurrencyMax !== null && activeLeases >= waiter.concurrencyMax; + const rateBlocked = + waiter.rateCount !== null && activeTokens >= waiter.rateCount; + + if (concurrencyBlocked || rateBlocked) { + break; + } + + const leaseId = `lmt_${generateId()}`; + const expiresAt = nowPlus(waiter.leaseTtlMs ?? undefined); + const [lease] = await tx + .insert(Schema.limitLeases) + .values({ + leaseId, + limitKey: key, + holderId: waiter.holderId, + acquiredAt: new Date(), + expiresAt, + concurrencyMax: waiter.concurrencyMax, + rateCount: waiter.rateCount, + ratePeriodMs: waiter.ratePeriodMs, + }) + .onConflictDoNothing() + .returning(); + + const acquiredLease = + lease ?? + (await tx.query.limitLeases.findFirst({ + where: and( + eq(Schema.limitLeases.limitKey, key), + eq(Schema.limitLeases.holderId, waiter.holderId) + ), + })); + + if (!acquiredLease) { + continue; + } + + if (waiter.rateCount !== null && waiter.ratePeriodMs !== null) { + await tx.insert(Schema.limitTokens).values({ + tokenId: `lmttok_${generateId()}`, + limitKey: key, + holderId: waiter.holderId, + acquiredAt: new Date(), + expiresAt: new Date(Date.now() + waiter.ratePeriodMs), + }); + activeTokens += 1; + } + + await tx + .delete(Schema.limitWaiters) + .where(eq(Schema.limitWaiters.waiterId, waiter.waiterId)); + + activeLeases += 1; + await queueWakeForHolder(tx, config, acquiredLease.holderId); + } +} + +export function createLimits( + config: PostgresWorldConfig, + drizzle: Drizzle +): Limits { + return { + async acquire(request) { + const parsed = LimitAcquireRequestSchema.parse(request); + + return drizzle.transaction(async (tx) => { + await lockLimitKey(tx, parsed.key); + // Prune expired leases and tokens, promote pre-existing waiters before attempting to acquire a new lease or token. + await pruneExpired(tx, parsed.key); + await promoteWaiters(tx, config, parsed.key); + + const state = await getActiveState(tx, parsed.key); + const lockId = createLockId(parsed.runId, parsed.lockIndex); + const existingLease = state.leases.find( + (lease) => lease.holderId === lockId + ); + if (existingLease) { + return { + status: 'acquired', + lease: toLease(existingLease), + } satisfies LimitAcquireResult; + } + + const existingWaiter = state.waiters.find( + (waiter) => waiter.holderId === lockId + ); + // If there are already waiters for this key and holder no need to queue a new waiter. + if (existingWaiter) { + const now = Date.now(); + const concurrencyBlocked = + parsed.definition.concurrency !== undefined && + state.leases.length >= parsed.definition.concurrency.max; + const rateBlocked = + parsed.definition.rate !== undefined && + state.tokens.length >= parsed.definition.rate.count; + return { + status: 'blocked', + reason: getBlockedReason(concurrencyBlocked, rateBlocked), + retryAfterMs: + getRetryAfterMs( + state.leases, + state.tokens, + now, + concurrencyBlocked, + rateBlocked + ) ?? 1000, + } satisfies LimitAcquireResult; + } + + const concurrencyBlocked = + parsed.definition.concurrency !== undefined && + state.leases.length >= parsed.definition.concurrency.max; + const rateBlocked = + parsed.definition.rate !== undefined && + state.tokens.length >= parsed.definition.rate.count; + + // If we are not blocked, and there are no waiters for this key and holder, we can acquire a new lease or token. + if (!concurrencyBlocked && !rateBlocked && state.waiters.length === 0) { + const expiresAt = nowPlus(parsed.leaseTtlMs); + const [lease] = await tx + .insert(Schema.limitLeases) + .values({ + leaseId: `lmt_${generateId()}`, + limitKey: parsed.key, + holderId: lockId, + acquiredAt: new Date(), + expiresAt, + concurrencyMax: parsed.definition.concurrency?.max ?? null, + rateCount: parsed.definition.rate?.count ?? null, + ratePeriodMs: parsed.definition.rate?.periodMs ?? null, + }) + .returning(); + + if (parsed.definition.rate) { + await tx.insert(Schema.limitTokens).values({ + tokenId: `lmttok_${generateId()}`, + limitKey: parsed.key, + holderId: lockId, + acquiredAt: new Date(), + expiresAt: new Date(Date.now() + parsed.definition.rate.periodMs), + }); + } + + return { + status: 'acquired', + lease: toLease(lease), + } satisfies LimitAcquireResult; + } + + // If we are blocked, we need to queue a waiter. + await tx + .insert(Schema.limitWaiters) + .values({ + waiterId: `lmtwait_${generateId()}`, + limitKey: parsed.key, + holderId: lockId, + createdAt: new Date(), + leaseTtlMs: parsed.leaseTtlMs ?? null, + concurrencyMax: parsed.definition.concurrency?.max ?? null, + rateCount: parsed.definition.rate?.count ?? null, + ratePeriodMs: parsed.definition.rate?.periodMs ?? null, + }) + .onConflictDoNothing(); + + const now = Date.now(); + return { + status: 'blocked', + reason: getBlockedReason(concurrencyBlocked, rateBlocked), + retryAfterMs: + getRetryAfterMs( + state.leases, + state.tokens, + now, + parsed.definition.concurrency !== undefined, + parsed.definition.rate !== undefined + ) ?? 1000, + } satisfies LimitAcquireResult; + }); + }, + + async release(request) { + const parsed = LimitReleaseRequestSchema.parse(request); + + await drizzle.transaction(async (tx) => { + const key = + parsed.key ?? + ( + await tx.query.limitLeases.findFirst({ + columns: { limitKey: true }, + where: eq(Schema.limitLeases.leaseId, parsed.leaseId), + }) + )?.limitKey; + + if (key) { + await lockLimitKey(tx, key); + } + + let where = eq(Schema.limitLeases.leaseId, parsed.leaseId); + if (parsed.key) { + where = and(where, eq(Schema.limitLeases.limitKey, parsed.key))!; + } + if (parsed.lockId) { + where = and(where, eq(Schema.limitLeases.holderId, parsed.lockId))!; + } + + const [deleted] = await tx + .delete(Schema.limitLeases) + .where(where) + .returning({ limitKey: Schema.limitLeases.limitKey }); + + if (deleted?.limitKey) { + await pruneExpired(tx, deleted.limitKey); + await promoteWaiters(tx, config, deleted.limitKey); + } + }); + }, + + async heartbeat(request) { + const parsed = LimitHeartbeatRequestSchema.parse(request); + + // Heartbeat a lease to extend its expiry. + return drizzle.transaction(async (tx) => { + const existing = await tx.query.limitLeases.findFirst({ + where: eq(Schema.limitLeases.leaseId, parsed.leaseId), + }); + + if (!existing) { + throw new WorkflowWorldError(`Lease "${parsed.leaseId}" not found`); + } + + await lockLimitKey(tx, existing.limitKey); + + const now = Date.now(); + const currentExpiry = toMillis(existing.expiresAt); + const ttlMs = + parsed.ttlMs ?? (currentExpiry ? currentExpiry - now : 30_000); + const expiresAt = new Date(now + Math.max(1, ttlMs)); + + const [updated] = await tx + .update(Schema.limitLeases) + .set({ expiresAt }) + .where(eq(Schema.limitLeases.leaseId, parsed.leaseId)) + .returning(); + + return toLease(updated); + }); + }, + }; +} diff --git a/packages/world-postgres/test/storage.test.ts b/packages/world-postgres/test/storage.test.ts index 8ffe5ad62b..3023790d65 100644 --- a/packages/world-postgres/test/storage.test.ts +++ b/packages/world-postgres/test/storage.test.ts @@ -1,5 +1,6 @@ import { execSync } from 'node:child_process'; import { PostgreSqlContainer } from '@testcontainers/postgresql'; +import { EntityConflictError, WorkflowWorldError } from '@workflow/errors'; import type { Hook, Step, WorkflowRun } from '@workflow/world'; import { encode } from 'cbor-x'; import postgres from 'postgres'; diff --git a/packages/world-postgres/test/test-db.ts b/packages/world-postgres/test/test-db.ts new file mode 100644 index 0000000000..ef27f70052 --- /dev/null +++ b/packages/world-postgres/test/test-db.ts @@ -0,0 +1,59 @@ +import { execSync } from 'node:child_process'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { PostgreSqlContainer } from '@testcontainers/postgresql'; +import postgres from 'postgres'; +import { createClient } from '../src/drizzle/index.js'; + +const packageDir = path.resolve( + path.dirname(fileURLToPath(import.meta.url)), + '..' +); + +export interface PostgresTestDb { + container: Awaited>; + sql: ReturnType; + drizzle: ReturnType; + connectionString: string; + truncateLimits(): Promise; + close(): Promise; +} + +export async function createPostgresTestDb(): Promise { + const container = await new PostgreSqlContainer('postgres:15-alpine').start(); + const connectionString = container.getConnectionUri(); + process.env.DATABASE_URL = connectionString; + process.env.WORKFLOW_POSTGRES_URL = connectionString; + + execSync('pnpm db:push', { + stdio: 'inherit', + cwd: packageDir, + env: process.env, + }); + + const sql = postgres(connectionString, { max: 10 }); + const drizzle = createClient(sql); + + return { + container, + sql, + drizzle, + connectionString, + async truncateLimits() { + await sql` + truncate table + workflow.workflow_limit_waiters, + workflow.workflow_limit_tokens, + workflow.workflow_limit_leases, + workflow.workflow_steps, + workflow.workflow_events, + workflow.workflow_runs + restart identity cascade + `; + }, + async close() { + await sql.end(); + await container.stop(); + }, + }; +} diff --git a/packages/world-testing/src/index.mts b/packages/world-testing/src/index.mts index 4b59e15267..db42585942 100644 --- a/packages/world-testing/src/index.mts +++ b/packages/world-testing/src/index.mts @@ -2,6 +2,8 @@ import { addition } from './addition.mjs'; import { errors } from './errors.mjs'; import { hooks } from './hooks.mjs'; import { idempotency } from './idempotency.mjs'; +export { createLimitsContractSuite } from './limits-contract.js'; +export { createLimitsRuntimeSuite } from './limits-runtime.js'; import { nullByte } from './null-byte.mjs'; export function createTestSuite(pkgName: string) { diff --git a/packages/world-testing/src/limits-contract.ts b/packages/world-testing/src/limits-contract.ts new file mode 100644 index 0000000000..f36c33c410 --- /dev/null +++ b/packages/world-testing/src/limits-contract.ts @@ -0,0 +1,767 @@ +import { setTimeout as sleep } from 'node:timers/promises'; +import { + SPEC_VERSION_CURRENT, + type LimitDefinition, + type LimitLease, + type Limits, + type Storage, +} from '@workflow/world'; +import { describe, expect, it } from 'vitest'; + +export interface LimitsHarness { + limits: Limits; + storage?: Pick; + inspectKeyState: (key: string) => Promise<{ + leaseHolderIds: string[]; + waiterHolderIds: string[]; + tokenHolderIds: string[]; + }>; + close?: () => Promise; +} + +interface LockOwner { + lockId: string; + runId: string; + lockIndex: number; +} + +function createTestLockId(runId: string, lockIndex: number) { + return `${runId}:${lockIndex}`; +} + +async function createRun( + storage: Pick, + workflowName: string +) { + const result = await storage.events.create(null, { + eventType: 'run_created', + specVersion: SPEC_VERSION_CURRENT, + eventData: { + deploymentId: 'deployment-123', + workflowName, + input: [], + }, + }); + if (!result.run) { + throw new Error('expected run'); + } + return result.run; +} + +function requireEventsStorage( + storage: LimitsHarness['storage'] +): Pick { + if (!storage) { + throw new Error('storage.events is required for limits tests'); + } + return storage; +} + +async function createLockOwner( + storage: LimitsHarness['storage'], + workflowName: string, + lockIndex = 0 +): Promise { + const run = await createRun(requireEventsStorage(storage), workflowName); + return { + lockId: createTestLockId(run.runId, lockIndex), + runId: run.runId, + lockIndex, + }; +} + +function acquireRequest( + owner: LockOwner, + key: string, + definition: LimitDefinition, + leaseTtlMs?: number +) { + return { + key, + runId: owner.runId, + lockIndex: owner.lockIndex, + definition, + ...(leaseTtlMs !== undefined ? { leaseTtlMs } : {}), + }; +} + +function releaseRequest(lease: LimitLease) { + return { + leaseId: lease.leaseId, + key: lease.key, + lockId: lease.lockId, + }; +} + +export function createLimitsContractSuite( + name: string, + createHarness: () => Promise +) { + describe(name, () => { + it('throws a workflow world error when heartbeating a missing lease', async () => { + const harness = await createHarness(); + try { + await expect( + harness.limits.heartbeat({ + leaseId: 'lmt_missing', + }) + ).rejects.toMatchObject({ + name: 'WorkflowWorldError', + message: expect.stringContaining('not found'), + }); + } finally { + await harness.close?.(); + } + }); + + it('enforces per-key concurrency limits', async () => { + const harness = await createHarness(); + try { + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'step:db:cheap', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const second = await harness.limits.acquire( + acquireRequest( + ownerB, + 'step:db:cheap', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(second).toMatchObject({ + status: 'blocked', + reason: 'concurrency', + }); + + await harness.limits.release(releaseRequest(first.lease)); + + const third = await harness.limits.acquire( + acquireRequest( + ownerB, + 'step:db:cheap', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(third.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('isolates unrelated keys at the raw limits layer', async () => { + const harness = await createHarness(); + try { + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const [first, second] = await Promise.all([ + harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:a', + { concurrency: { max: 1 } }, + 1_000 + ) + ), + harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:b', + { concurrency: { max: 1 } }, + 1_000 + ) + ), + ]); + + expect(first.status).toBe('acquired'); + expect(second.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('serializes concurrent acquires for the same key', async () => { + const harness = await createHarness(); + try { + const owners = await Promise.all( + Array.from({ length: 12 }, (_, index) => + createLockOwner(harness.storage, `holder-${index}`) + ) + ); + const results = await Promise.all( + owners.map((owner) => + harness.limits.acquire( + acquireRequest( + owner, + 'workflow:user:concurrent', + { concurrency: { max: 1 } }, + 1_000 + ) + ) + ) + ); + + const acquired = results.filter( + (result) => result.status === 'acquired' + ); + const blocked = results.filter((result) => result.status === 'blocked'); + + expect(acquired).toHaveLength(1); + expect(blocked).toHaveLength(11); + } finally { + await harness.close?.(); + } + }); + + it('keeps rate capacity consumed until the window expires', async () => { + const harness = await createHarness(); + try { + const periodMs = 200; + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const ownerC = await createLockOwner(harness.storage, 'holder-c'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'step:provider:openai', + { rate: { count: 1, periodMs } }, + 1_000 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + await harness.limits.release(releaseRequest(first.lease)); + + const second = await harness.limits.acquire( + acquireRequest( + ownerB, + 'step:provider:openai', + { rate: { count: 1, periodMs } }, + 1_000 + ) + ); + expect(second.status).toBe('blocked'); + if (second.status !== 'blocked') throw new Error('expected blocked'); + expect(second.reason).toBe('rate'); + expect(second.retryAfterMs).toBeGreaterThanOrEqual(0); + + let third = await harness.limits.acquire( + acquireRequest( + ownerC, + 'step:provider:openai', + { rate: { count: 1, periodMs } }, + 1_000 + ) + ); + const deadline = Date.now() + periodMs + 1_000; + while (third.status === 'blocked' && Date.now() < deadline) { + await sleep(Math.max(25, third.retryAfterMs ?? 0) + 50); + third = await harness.limits.acquire( + acquireRequest( + ownerC, + 'step:provider:openai', + { rate: { count: 1, periodMs } }, + 1_000 + ) + ); + } + expect(third.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('returns a combined blocked reason when both limits are saturated', async () => { + const harness = await createHarness(); + try { + const periodMs = 300; + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'step:mixed', + { + concurrency: { max: 1 }, + rate: { count: 1, periodMs }, + }, + 1_000 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const second = await harness.limits.acquire( + acquireRequest( + ownerB, + 'step:mixed', + { + concurrency: { max: 1 }, + rate: { count: 1, periodMs }, + }, + 1_000 + ) + ); + expect(second).toMatchObject({ + status: 'blocked', + reason: 'concurrency_and_rate', + }); + if (second.status !== 'blocked') throw new Error('expected blocked'); + + await harness.limits.release(releaseRequest(first.lease)); + + const third = await harness.limits.acquire( + acquireRequest( + ownerB, + 'step:mixed', + { + concurrency: { max: 1 }, + rate: { count: 1, periodMs }, + }, + 1_000 + ) + ); + expect(third).toMatchObject({ + status: 'blocked', + reason: 'rate', + }); + + let fourth = third; + const deadline = Date.now() + periodMs + 1_000; + while (fourth.status === 'blocked' && Date.now() < deadline) { + await sleep(Math.max(25, fourth.retryAfterMs ?? 0) + 50); + fourth = await harness.limits.acquire( + acquireRequest( + ownerB, + 'step:mixed', + { + concurrency: { max: 1 }, + rate: { count: 1, periodMs }, + }, + 1_000 + ) + ); + } + + expect(fourth.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('restores capacity immediately when a lease is released', async () => { + const harness = await createHarness(); + try { + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:123', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const second = await harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:123', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(second.status).toBe('blocked'); + + await harness.limits.release(releaseRequest(first.lease)); + + const third = await harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:123', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(third.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('extends lease expiry when heartbeated', async () => { + const harness = await createHarness(); + try { + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:heartbeat', + { concurrency: { max: 1 } }, + 200 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const heartbeat = await harness.limits.heartbeat({ + leaseId: first.lease.leaseId, + ttlMs: 600, + }); + + expect(heartbeat.expiresAt?.getTime()).toBeGreaterThan( + first.lease.expiresAt?.getTime() ?? 0 + ); + + const second = await harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:heartbeat', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(second.status).toBe('blocked'); + } finally { + await harness.close?.(); + } + }); + + it('reclaims expired leases without manual cleanup', async () => { + const harness = await createHarness(); + try { + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:expired', + { concurrency: { max: 1 } }, + 250 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const second = await harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:expired', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(second.status).toBe('blocked'); + + await sleep(400); + + const third = await harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:expired', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(third.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('reuses an existing lease for the same holder', async () => { + const harness = await createHarness(); + try { + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:reacquire', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const second = await harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:reacquire', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(second).toMatchObject({ + status: 'acquired', + lease: { + leaseId: first.lease.leaseId, + lockId: first.lease.lockId, + }, + }); + + if (!harness.inspectKeyState) { + throw new Error( + 'inspectKeyState is required for duplicate lease checks' + ); + } + const keyState = await harness.inspectKeyState( + 'workflow:user:reacquire' + ); + expect( + keyState.leaseHolderIds.filter((lockId) => lockId === ownerA.lockId) + ).toHaveLength(1); + expect( + keyState.waiterHolderIds.filter((lockId) => lockId === ownerA.lockId) + ).toHaveLength(0); + } finally { + await harness.close?.(); + } + }); + + it('promotes waiters in FIFO order per key', async () => { + const harness = await createHarness(); + try { + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const ownerB = await createLockOwner(harness.storage, 'holder-b'); + const ownerC = await createLockOwner(harness.storage, 'holder-c'); + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:ordered', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const second = await harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:ordered', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + const third = await harness.limits.acquire( + acquireRequest( + ownerC, + 'workflow:user:ordered', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + + expect(second.status).toBe('blocked'); + expect(third.status).toBe('blocked'); + + await harness.limits.release(releaseRequest(first.lease)); + + const promoted = await harness.limits.acquire( + acquireRequest( + ownerB, + 'workflow:user:ordered', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + const stillWaiting = await harness.limits.acquire( + acquireRequest( + ownerC, + 'workflow:user:ordered', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + + expect(promoted.status).toBe('acquired'); + expect(stillWaiting.status).toBe('blocked'); + if (promoted.status !== 'acquired') + throw new Error('expected waiter-b promotion'); + + await harness.limits.release(releaseRequest(promoted.lease)); + + const thirdPromoted = await harness.limits.acquire( + acquireRequest( + ownerC, + 'workflow:user:ordered', + { concurrency: { max: 1 } }, + 1_000 + ) + ); + + expect(thirdPromoted.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('skips cancelled workflow waiters before promotion', async () => { + const harness = await createHarness(); + try { + if (!harness.storage) { + throw new Error('storage is required for workflow waiter liveness'); + } + + const deadRun = await createRun(harness.storage, 'dead-workflow'); + await harness.storage.events.create(deadRun.runId, { + eventType: 'run_started', + specVersion: SPEC_VERSION_CURRENT, + }); + await harness.storage.events.create(deadRun.runId, { + eventType: 'run_cancelled', + specVersion: SPEC_VERSION_CURRENT, + }); + + const liveRun = await createRun(harness.storage, 'live-workflow'); + await harness.storage.events.create(liveRun.runId, { + eventType: 'run_started', + specVersion: SPEC_VERSION_CURRENT, + }); + const liveOwner = { + lockId: createTestLockId(liveRun.runId, 0), + runId: liveRun.runId, + lockIndex: 0, + }; + const deadOwner = { + lockId: createTestLockId(deadRun.runId, 0), + runId: deadRun.runId, + lockIndex: 0, + }; + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + + const first = await harness.limits.acquire( + acquireRequest( + ownerA, + 'workflow:user:skip-dead-workflow', + { concurrency: { max: 1 } }, + 5_000 + ) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + await harness.limits.acquire( + acquireRequest( + deadOwner, + 'workflow:user:skip-dead-workflow', + { concurrency: { max: 1 } }, + 5_000 + ) + ); + await harness.limits.acquire( + acquireRequest( + liveOwner, + 'workflow:user:skip-dead-workflow', + { concurrency: { max: 1 } }, + 5_000 + ) + ); + + await harness.limits.release(releaseRequest(first.lease)); + + const promoted = await harness.limits.acquire( + acquireRequest( + liveOwner, + 'workflow:user:skip-dead-workflow', + { concurrency: { max: 1 } }, + 5_000 + ) + ); + + expect(promoted.status).toBe('acquired'); + } finally { + await harness.close?.(); + } + }); + + it('does not duplicate a replayed blocked holder waiter or lease', async () => { + const harness = await createHarness(); + try { + const key = 'workflow:user:replay'; + const ownerA = await createLockOwner(harness.storage, 'holder-a'); + const replayOwner = await createLockOwner( + harness.storage, + 'holder-replay' + ); + const blockedLockId = replayOwner.lockId; + + const first = await harness.limits.acquire( + acquireRequest(ownerA, key, { concurrency: { max: 1 } }, 1_000) + ); + expect(first.status).toBe('acquired'); + if (first.status !== 'acquired') + throw new Error('expected acquisition'); + + const blockedA = await harness.limits.acquire( + acquireRequest(replayOwner, key, { concurrency: { max: 1 } }, 1_000) + ); + const blockedB = await harness.limits.acquire( + acquireRequest(replayOwner, key, { concurrency: { max: 1 } }, 1_000) + ); + + expect(blockedA.status).toBe('blocked'); + expect(blockedB.status).toBe('blocked'); + + const blockedState = await harness.inspectKeyState(key); + expect( + blockedState.waiterHolderIds.filter( + (lockId) => lockId === blockedLockId + ) + ).toHaveLength(1); + expect( + blockedState.leaseHolderIds.filter( + (lockId) => lockId === blockedLockId + ) + ).toHaveLength(0); + + await harness.limits.release(releaseRequest(first.lease)); + + const acquired = await harness.limits.acquire( + acquireRequest(replayOwner, key, { concurrency: { max: 1 } }, 1_000) + ); + expect(acquired.status).toBe('acquired'); + if (acquired.status !== 'acquired') + throw new Error('expected replayed holder acquisition'); + + const acquiredState = await harness.inspectKeyState(key); + expect( + acquiredState.waiterHolderIds.filter( + (lockId) => lockId === blockedLockId + ) + ).toHaveLength(0); + expect( + acquiredState.leaseHolderIds.filter( + (lockId) => lockId === blockedLockId + ) + ).toHaveLength(1); + } finally { + await harness.close?.(); + } + }); + }); +} diff --git a/packages/world-testing/src/limits-runtime.ts b/packages/world-testing/src/limits-runtime.ts new file mode 100644 index 0000000000..807033e712 --- /dev/null +++ b/packages/world-testing/src/limits-runtime.ts @@ -0,0 +1,304 @@ +import { describe, expect, it } from 'vitest'; + +type WorkflowLockContentionResult = { + workflowLockAcquiredAt: number; + workflowLockReleasedAt: number; + stepCallLockAcquiredAt: number; + stepCallLockReleasedAt: number; +}; + +type LockedStepCallResult = { + label: string; + key?: string; + attempt: number; + acquiredAt: number; + releasedAt: number; +}; + +type WorkflowOnlyLockResult = { + label: string; + workflowLockAcquiredAt: number; + workflowLockReleasedAt: number; +}; + +type WorkflowRateLimitResult = { + label: string; + workflowRateAcquiredAt: number; + workflowRateReleasedAt: number; + periodMs: number; +}; + +type LeakedLockResult = { + label: string; + key: string; + leaseTtlMs: number; + lockAcquiredAt: number; + workflowCompletedAt: number; +}; + +type WorkflowMultiStepScopeResult = { + key: string; + workflowLockAcquiredAt: number; + firstStepCompletedAt: number; + secondStepCompletedAt: number; + workflowLockReleasedAt: number; +}; + +export interface LimitsRuntimeHarness { + runWorkflowWithScopedLocks(userId: string): Promise<{ + workflowKey: string; + dbKey: string; + aiKey: string; + summary: string; + }>; + runWorkflowLockContention( + userId: string, + holdMs: number + ): Promise<[WorkflowLockContentionResult, WorkflowLockContentionResult]>; + runLockedStepCallContention( + key: string, + holdMs: number, + labelA?: string, + labelB?: string + ): Promise<[LockedStepCallResult, LockedStepCallResult]>; + runWorkflowLockAcrossSuspension( + userId: string, + holdMs: number + ): Promise<[WorkflowOnlyLockResult, WorkflowOnlyLockResult]>; + runWorkflowExpiredLeaseRecovery( + userId: string, + leaseTtlMs: number + ): Promise<[LeakedLockResult, WorkflowOnlyLockResult]>; + runLeakedKeyExpiredLeaseRecovery( + userId: string, + leaseTtlMs: number + ): Promise<[LeakedLockResult, LockedStepCallResult]>; + runWorkflowMixedLimitContention( + userId: string, + holdMs: number, + periodMs: number + ): Promise<[WorkflowRateLimitResult, WorkflowRateLimitResult]>; + runWorkflowFifoThreeWaiters( + userId: string, + holdMs: number + ): Promise< + [WorkflowOnlyLockResult, WorkflowOnlyLockResult, WorkflowOnlyLockResult] + >; + runCancelledWorkflowWaiter( + userId: string, + holdMs: number + ): Promise<{ + cancelledError: unknown; + resultA: WorkflowOnlyLockResult; + resultC: WorkflowOnlyLockResult; + }>; + runIndependentWorkflowKeys( + holdMs: number + ): Promise<[WorkflowOnlyLockResult, WorkflowOnlyLockResult]>; + runIndependentStepKeys( + holdMs: number + ): Promise<[LockedStepCallResult, LockedStepCallResult]>; + runBlockedWaiterWithUnrelatedWorkflow(holdMs: number): Promise<{ + holder: WorkflowOnlyLockResult; + waiter: WorkflowOnlyLockResult; + unrelated: WorkflowOnlyLockResult; + }>; + runWorkflowSingleLockAcrossMultipleSteps( + holdMs: number + ): Promise; +} + +export function createLimitsRuntimeSuite( + name: string, + createHarness: () => Promise +) { + describe(name, () => { + it('runs locks around individual step calls end-to-end', async () => { + const harness = await createHarness(); + const userId = 'shared-user'; + const result = await harness.runWorkflowWithScopedLocks(userId); + + expect(result).toMatchObject({ + workflowKey: `workflow:user:${userId}`, + dbKey: 'step:db:cheap', + aiKey: 'step:provider:openai', + summary: `summary:profile:${userId}`, + }); + }); + + it('serializes workflow locks and locks around step calls under contention', async () => { + const harness = await createHarness(); + const [resultA, resultB] = await harness.runWorkflowLockContention( + 'shared-user', + 750 + ); + + expect(resultB.workflowLockAcquiredAt).toBeGreaterThanOrEqual( + resultA.workflowLockReleasedAt + ); + expect(resultB.stepCallLockAcquiredAt).toBeGreaterThanOrEqual( + resultA.stepCallLockReleasedAt + ); + }); + + it('wakes promoted workflow and step-call lock waiters promptly', async () => { + const harness = await createHarness(); + const [resultA, resultB] = await harness.runWorkflowLockContention( + 'shared-user', + 1_500 + ); + + expect( + resultB.workflowLockAcquiredAt - resultA.workflowLockReleasedAt + ).toBeLessThan(4_000); + expect( + resultB.stepCallLockAcquiredAt - resultA.stepCallLockReleasedAt + ).toBeLessThan(4_000); + }); + + it('can hold one workflow lock across multiple steps in the same scope', async () => { + const harness = await createHarness(); + const result = + await harness.runWorkflowSingleLockAcrossMultipleSteps(400); + + expect(result.firstStepCompletedAt).toBeGreaterThanOrEqual( + result.workflowLockAcquiredAt + ); + expect(result.secondStepCompletedAt).toBeGreaterThanOrEqual( + result.firstStepCompletedAt + ); + expect(result.workflowLockReleasedAt).toBeGreaterThanOrEqual( + result.secondStepCompletedAt + ); + }); + + it('keeps workflow locks held across suspension until the workflow finishes', async () => { + const harness = await createHarness(); + const [resultA, resultB] = await harness.runWorkflowLockAcrossSuspension( + 'shared-user', + 1_500 + ); + + expect(resultB.workflowLockAcquiredAt).toBeGreaterThanOrEqual( + resultA.workflowLockReleasedAt + ); + expect( + resultB.workflowLockAcquiredAt - resultA.workflowLockReleasedAt + ).toBeLessThan(4_000); + }); + + it('reclaims expired leaked workflow locks without manual cleanup', async () => { + const harness = await createHarness(); + const leaseTtlMs = 1_250; + const [resultA, resultB] = await harness.runWorkflowExpiredLeaseRecovery( + 'expired-workflow-user', + leaseTtlMs + ); + + expect(resultB.workflowLockAcquiredAt).toBeGreaterThanOrEqual( + resultA.workflowCompletedAt + ); + expect( + resultB.workflowLockAcquiredAt - resultA.lockAcquiredAt + ).toBeGreaterThanOrEqual(leaseTtlMs - 100); + }); + + it('reclaims expired leaked locks on arbitrary keys without manual cleanup', async () => { + const harness = await createHarness(); + const leaseTtlMs = 1_250; + const [resultA, resultB] = await harness.runLeakedKeyExpiredLeaseRecovery( + 'expired-key-user', + leaseTtlMs + ); + + expect(resultB.acquiredAt).toBeGreaterThanOrEqual( + resultA.workflowCompletedAt + ); + expect( + resultB.acquiredAt - resultA.lockAcquiredAt + ).toBeGreaterThanOrEqual(leaseTtlMs - 100); + }); + + it('keeps mixed concurrency and rate waiters blocked until the rate window expires', async () => { + const harness = await createHarness(); + const holdMs = 250; + const periodMs = 1_500; + const [resultA, resultB] = await harness.runWorkflowMixedLimitContention( + 'shared-user', + holdMs, + periodMs + ); + + expect( + resultB.workflowRateAcquiredAt - resultA.workflowRateAcquiredAt + ).toBeGreaterThanOrEqual(periodMs - 100); + + const remainingWindowAfterRelease = + periodMs - + (resultA.workflowRateReleasedAt - resultA.workflowRateAcquiredAt); + expect( + resultB.workflowRateAcquiredAt - resultA.workflowRateReleasedAt + ).toBeGreaterThanOrEqual(Math.max(0, remainingWindowAfterRelease - 100)); + }); + + it('promotes 3 workflow waiters in FIFO order', async () => { + const harness = await createHarness(); + const [resultA, resultB, resultC] = + await harness.runWorkflowFifoThreeWaiters('shared-user', 750); + + expect(resultB.workflowLockAcquiredAt).toBeGreaterThanOrEqual( + resultA.workflowLockReleasedAt + ); + expect(resultC.workflowLockAcquiredAt).toBeGreaterThanOrEqual( + resultB.workflowLockReleasedAt + ); + }); + + it('skips cancelled workflow waiters before promoting the next run', async () => { + const harness = await createHarness(); + const { cancelledError, resultA, resultC } = + await harness.runCancelledWorkflowWaiter('shared-user', 1_500); + + expect(cancelledError).toBeTruthy(); + expect(resultC.workflowLockAcquiredAt).toBeGreaterThanOrEqual( + resultA.workflowLockReleasedAt + ); + expect( + resultC.workflowLockAcquiredAt - resultA.workflowLockReleasedAt + ).toBeLessThan(4_000); + }); + + it('does not block unrelated workflow keys', async () => { + const harness = await createHarness(); + const [resultA, resultB] = + await harness.runIndependentWorkflowKeys(1_000); + + expect(resultB.workflowLockAcquiredAt).toBeLessThan( + resultA.workflowLockReleasedAt + ); + }); + + it('does not block unrelated step-like keys', async () => { + const harness = await createHarness(); + const [resultA, resultB] = await harness.runIndependentStepKeys(1_000); + + expect(resultB.acquiredAt).toBeLessThan(resultA.releasedAt); + }); + + it.skipIf(process.env.WORKFLOW_LIMITS_LOW_CONCURRENCY !== '1')( + 'frees worker slots for unrelated workflows while a waiter is blocked', + async () => { + const harness = await createHarness(); + const { holder, waiter, unrelated } = + await harness.runBlockedWaiterWithUnrelatedWorkflow(1_500); + + expect(waiter.workflowLockAcquiredAt).toBeGreaterThanOrEqual( + holder.workflowLockReleasedAt + ); + expect(unrelated.workflowLockReleasedAt).toBeLessThan( + waiter.workflowLockAcquiredAt + ); + } + ); + }); +} diff --git a/packages/world-vercel/src/index.ts b/packages/world-vercel/src/index.ts index 975dc49863..ec7b9bdb1b 100644 --- a/packages/world-vercel/src/index.ts +++ b/packages/world-vercel/src/index.ts @@ -1,5 +1,6 @@ import type { World } from '@workflow/world'; import { createGetEncryptionKeyForRun } from './encryption.js'; +import { createLimits } from './limits.js'; import { createQueue } from './queue.js'; import { createResolveLatestDeploymentId } from './resolve-latest-deployment.js'; import { createStorage } from './storage.js'; @@ -23,6 +24,7 @@ export function createVercelWorld(config?: APIConfig): World { config?.projectConfig?.projectId || process.env.VERCEL_PROJECT_ID; return { + limits: createLimits(config), ...createQueue(config), ...createStorage(config), ...createStreamer(config), diff --git a/packages/world-vercel/src/limits.test.ts b/packages/world-vercel/src/limits.test.ts new file mode 100644 index 0000000000..ff6bf0151a --- /dev/null +++ b/packages/world-vercel/src/limits.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'vitest'; +import { LIMITS_NOT_IMPLEMENTED_MESSAGE } from '@workflow/world'; +import { createVercelWorld } from './index.js'; +import { createLimits } from './limits.js'; + +describe('vercel world limits', () => { + it('exposes the required limits namespace', () => { + const limits = createLimits(); + + expect(limits).toMatchObject({ + acquire: expect.any(Function), + release: expect.any(Function), + heartbeat: expect.any(Function), + }); + }); + + it('keeps limits unimplemented until lock support exists', async () => { + const world = createVercelWorld(); + + await expect( + world.limits.acquire({ + key: 'workflow:user:test', + runId: 'wrun_test', + lockIndex: 0, + definition: { concurrency: { max: 1 } }, + }) + ).rejects.toThrow(LIMITS_NOT_IMPLEMENTED_MESSAGE); + + await expect( + world.limits.release({ + leaseId: 'lease_test', + }) + ).rejects.toThrow(LIMITS_NOT_IMPLEMENTED_MESSAGE); + + await expect( + world.limits.heartbeat({ + leaseId: 'lease_test', + }) + ).rejects.toThrow(LIMITS_NOT_IMPLEMENTED_MESSAGE); + }); +}); diff --git a/packages/world-vercel/src/limits.ts b/packages/world-vercel/src/limits.ts new file mode 100644 index 0000000000..785fa4886e --- /dev/null +++ b/packages/world-vercel/src/limits.ts @@ -0,0 +1,16 @@ +import { createLimitsNotImplementedError, type Limits } from '@workflow/world'; +import type { APIConfig } from './utils.js'; + +export function createLimits(_config?: APIConfig): Limits { + return { + async acquire() { + throw createLimitsNotImplementedError(); + }, + async release() { + throw createLimitsNotImplementedError(); + }, + async heartbeat() { + throw createLimitsNotImplementedError(); + }, + }; +} diff --git a/packages/world/FLOW_LIMITS.md b/packages/world/FLOW_LIMITS.md new file mode 100644 index 0000000000..b2d30b6376 --- /dev/null +++ b/packages/world/FLOW_LIMITS.md @@ -0,0 +1,363 @@ +# Flow Limits Design Notes + +This note summarizes the implemented direction for flow concurrency and rate +limiting across `@workflow/core`, `@workflow/world`, and concrete world +implementations. + +## Status + +- The shared `limits` interface and `lock()` API surface now exist. +- Local world now implements the shared live-process limits semantics with + leases, rate tokens, FIFO waiters, and prompt wake-up with delayed fallback. +- Postgres implements the same limits semantics with PostgreSQL-backed leases, + rate tokens, durable waiters, and durable queue wake-up. +- Vercel still exposes `limits` as a stub. +- The Next.js Turbopack workbench has shared E2E coverage for `lock()` used + with `await using`, including locks that wrap individual step calls or + groups of steps. + +## Goals + +- Support keyed concurrency limits. +- Support keyed rate limits. +- Allow concurrency and rate to be colocated in one interface. +- Support locks whose lifetime follows normal `await using` lexical scope. +- Make crash recovery possible through leases with TTL/expiry. +- Keep worker throughput controls separate from business-level flow limits. + +## Core Terms + +- `worker concurrency`: backend throughput setting for queue/job processing. +- `workflow limit`: admission control for workflow runs that share a key. +- `scoped resource key`: any user-defined key acquired from workflow scope to + protect one step call, multiple step calls, or a whole workflow section. +- `lease`: durable record that a workflow currently occupies capacity for a + key. + +## Shared Contract vs World-Specific Behavior + +The limits contract is intended to describe one shared set of observable +semantics across implemented worlds. That shared contract includes: + +- `acquire()`, `release()`, and `heartbeat()` surface behavior +- `WorkflowWorldError` when heartbeating a missing lease +- per-key concurrency and rate limiting outcomes +- same-holder lease reuse +- serialization of concurrent acquires for a single key +- FIFO waiter promotion per key +- pruning cancelled workflow waiters +- blocked acquisitions not consuming execution concurrency +- prompt wake-up with delayed fallback replay + +World-specific behavior should be limited to implementation mechanics and +durability characteristics, for example: + +- how waiter state is stored internally +- how per-key mutations are serialized internally +- how prompt wake-up is delivered +- whether queued wake-ups survive process or host loss +- backend-specific observability or debugging surfaces + +That means SQL row layout, advisory locks, and Graphile jobs are PostgreSQL +implementation details, while FIFO fairness and waiter skipping are contract +behavior that local and Postgres should both exhibit. + +## Decisions So Far + +### 1. Use one shared limits model + +The shared world interface uses a single `limits` namespace and a single limit +definition shape that can contain either or both: + +- `concurrency` +- `rate` + +This allows one key to express: + +- concurrency only +- rate only +- both together + +### 2. Use leases, not plain mutexes + +Limits are modeled as leases with TTL/expiry so capacity can be recovered after: + +- worker crashes +- process death +- machine shutdown +- lost retries + +Normal completion should dispose/release the lease explicitly. Crash recovery +comes from lease expiry plus future reclaim logic. + +The default workflow lock TTL should be high enough to cover normal suspended +execution without making users tune it eagerly. The current runtime default is +24 hours unless the caller overrides `leaseTtlMs`. + +### 3. Keep worker concurrency separate from flow limits + +Current world-level concurrency settings are infrastructure controls, not +business-level locking: + +- local world: `WORKFLOW_LOCAL_QUEUE_CONCURRENCY` +- postgres world: `WORKFLOW_POSTGRES_WORKER_CONCURRENCY` + +These control how many queue jobs can be processed at once. They should remain +independent from flow limits like: + +- `workflow:user:123` +- `step:db:cheap` +- `step:provider:openai` + +### 4. Use a sliding-window model for rate limits in v1 + +The current rate-limit model is a sliding-window log model, not a token bucket. + +For a limit like: + +- `rate: { count: 10, periodMs: 60_000 }` + +the intended semantics are: + +- allow at most 10 successful acquires in the last 60 seconds +- each successful acquire records a timestamped rate usage entry +- rate capacity returns only when that entry ages out of the window + +This is simpler than a token bucket and matches the current local-world +implementation direction well. + +Important distinction: + +- `lease`: active occupancy / ownership for a holder +- `token`: internal rate-usage record that remains until the rate window expires + +Releasing a lease should free concurrency capacity immediately, but it should +not restore rate capacity until the associated rate usage entry expires. + +### 5. Use one `lock()` API from workflow scope + +We want one user-facing primitive: + +```ts +await using lease = await lock({ ... }); +``` + +`lock()` means workflow code acquires ownership of a keyed lease. + +If placed at the top of a workflow, it should hold the lease across the logical +workflow scope, even though the workflow may suspend and resume many times. + +Steps themselves do not acquire locks directly. To limit one step category or a +group of steps, the workflow acquires the lock and then calls those steps while +the lease is held. + +### 6. `await using` is the preferred user-facing shape + +The preferred API is explicit resource management: + +```ts +await using lease = await lock({ ... }); +``` + +This gives automatic cleanup on scope exit and reads well for critical sections +that may include one or many step calls. + +For manual early cleanup, the user-facing `LockHandle` should expose: + +- `dispose()` +- `[Symbol.asyncDispose]()` + +The backend-facing world contract can continue to use `release(...)` internally. + +### 7. Locks follow logical scope, not request lifetime + +For workflows, `await using` must be tied to the logical workflow scope across: + +- step round trips +- queue turns +- sleeps +- hooks +- replay/resume + +The lease must not be disposed merely because one host process invocation ends. + +### 8. Keep admission decisions in workflow code + +Current preferred model: + +- workflow code acquires and releases limits +- steps execute inside whatever critical section the workflow establishes +- step code never waits on a separate lock of its own + +This keeps the dependency direction simple: + +- workflow admission / critical section -> step execution + +That avoids needing separate workflow-lock and step-lock runtime semantics. + +### 9. Waiters are FIFO per key + +Implemented worlds use a waiter queue and promote waiters in FIFO order for a +single limit key. + +Important details: + +- FIFO is per key, not global across all limit keys +- promotion order is based on waiter creation order +- dead or terminal waiters are pruned before promotion +- a live waiter may still be skipped if it is no longer eligible when promotion runs +- releasing a lease or reclaiming an expired lease can both trigger promotion +- rate-window expiry can also make the head waiter eligible again + +This gives deterministic and inspectable fairness for a key without requiring a +global scheduler. + +### 10. Blocked limits do not consume worker concurrency + +Blocked flow limits and worker concurrency are intentionally separate. + +For implemented worlds: + +- blocked workflows are suspended and re-queued, not left running on a worker +- worker slots are free to service unrelated work while the blocked execution is + waiting to be retried or promoted + +PostgreSQL additionally keeps that backlog durable in the database. The local +world keeps queue delivery in-memory, so cross-process crash recovery for the +backlog is explicitly outside the shared limits contract today. + +### 11. Wake-up is prompt, with a delayed fallback + +Implemented worlds use the world-owned limit state as the source of truth and +try to resume promoted waiters promptly, with a delayed fallback still in place +so progress is possible if an immediate wake-up is missed. + +Current behavior: + +- leases, rate tokens, and waiters live in world-owned limit state +- promotion decisions are made from that limit state +- when a waiter is promoted, the runtime is woken by enqueuing the workflow job +- workflows also keep a delayed replay fallback so progress is still possible if + an immediate wake-up is missed + +PostgreSQL uses Graphile jobs for that wake-up path and keeps the backlog +durable across host/process failure. The local world uses an in-memory queue, so +prompt wake behavior matches while the process is alive, but durable backlog +survival is not guaranteed after process loss. + +### 12. V1 semantics are intentionally opinionated + +For v1, the intended semantics are: + +- workflow locks count admitted, in-flight workflows for a key +- workflow-held keys may be used to serialize or rate-limit specific step categories +- worker concurrency remains a separate infrastructure throttle + +More concretely: + +- if a workflow acquires a lock and then sleeps for 10 minutes, + it still counts as active for that workflow key during the sleep +- if a workflow acquires a lock for a step-like key such as `step:db:cheap`, + that key remains occupied until the workflow releases it, even if the + protected work is just one step call or a small group of step calls +- rate-limited step-like keys still consume rate capacity when the workflow + acquires that key, and that usage remains counted until the window expires + even if the workflow releases the lease quickly + +For the current local implementation specifically: + +- workflow locks now follow the same live-process waiter/fairness semantics as + Postgres +- the queue remains in-memory, so queued wake-ups are not durable across process + loss + +This means the current v1 interpretation of a workflow lock is: + +- "How many workflows for this key are admitted and in flight at all?" + +not: + +- "How many workflows are actively burning CPU right this instant?" + +## Current Example Shape + +The current placeholder E2E example models: + +- workflow-level user concurrency: + - `workflow:user:${userId}` +- step-level DB concurrency: + - `step:db:cheap` +- step-level AI rate limit: + - `step:provider:openai` + +With intended usage like: + +```ts +async function cheapDbStep(userId: string) { + 'use step'; + return { userId, prompt: `profile:${userId}` }; +} + +async function expensiveAIStep(prompt: string) { + 'use step'; + return `summary:${prompt}`; +} + +export async function workflowWithScopedLocks(userId: string) { + 'use workflow'; + await using userLimit = await lock({ + key: `workflow:user:${userId}`, + concurrency: { max: 2 }, + }); + + let row: Awaited>; + { + await using _dbLimit = await lock({ + key: 'step:db:cheap', + concurrency: { max: 20 }, + }); + row = await cheapDbStep(userId); + } + + let summary: Awaited>; + { + await using _aiLimit = await lock({ + key: 'step:provider:openai', + rate: { count: 10, periodMs: 60_000 }, + }); + summary = await expensiveAIStep(row.prompt); + } + return { row, summary }; +} +``` + +## Important Clarification + +Flow limits and worker concurrency are different layers. + +For example: + +- a cheap DB step may continue making progress even while an expensive AI step + is rate-limited +- the main shared coupling between them is the worker pool +- if workers are available, unrelated step categories should continue + +So overall system throughput is not one simple global minimum. Different +workflow paths may be bottlenecked by different limits at different times. + +Two more practical clarifications: + +- a blocked workflow lock should not monopolize + `WORKFLOW_POSTGRES_WORKER_CONCURRENCY` or + `WORKFLOW_LOCAL_QUEUE_CONCURRENCY` just because it is waiting +- a released concurrency lease frees concurrency immediately, but associated + rate usage still remains counted until its token ages out of the rate window + +## Open Questions + +- Whether workflow-level locks should always be whole-run admission locks or + also support narrower lexical scopes within workflow code. +- Whether `heartbeat()` should remain user-visible or become mostly internal. +- Whether `lock()` should eventually grow optional metadata or + config sugar for common per-step resource keys. +- Exact event-log representation for acquire/block/dispose transitions. diff --git a/packages/world/package.json b/packages/world/package.json index e250e45412..57546d8b1c 100644 --- a/packages/world/package.json +++ b/packages/world/package.json @@ -20,7 +20,8 @@ "scripts": { "build": "tsc", "dev": "tsc --watch", - "clean": "tsc --build --clean && rm -rf dist" + "clean": "tsc --build --clean && rm -rf dist", + "test": "vitest run src" }, "dependencies": { "ulid": "catalog:" @@ -30,6 +31,7 @@ }, "devDependencies": { "@types/node": "catalog:", + "vitest": "catalog:", "zod": "catalog:", "@workflow/tsconfig": "workspace:*" }, diff --git a/packages/world/src/index.ts b/packages/world/src/index.ts index 3e7ed1c4fb..5e8f73d111 100644 --- a/packages/world/src/index.ts +++ b/packages/world/src/index.ts @@ -10,6 +10,28 @@ export { export type * from './hooks.js'; export { HookSchema } from './hooks.js'; export type * from './interfaces.js'; +export type * from './limits.js'; +export { + createLockId, + createLockWakeCorrelationId, + createLimitsNotImplementedError, + LimitAcquireAcquiredResultSchema, + LimitAcquireBlockedResultSchema, + LimitAcquireRequestSchema, + LimitAcquireResultSchema, + LimitAcquireStatusSchema, + LimitBlockedReasonSchema, + LimitConcurrencySchema, + LimitDefinitionSchema, + LimitHeartbeatRequestSchema, + LimitKeySchema, + LimitLeaseSchema, + LimitLockIdSchema, + LimitRateSchema, + LimitReleaseRequestSchema, + LIMITS_NOT_IMPLEMENTED_MESSAGE, + parseLockId, +} from './limits.js'; export type * from './queue.js'; export { HealthCheckPayloadSchema, diff --git a/packages/world/src/interfaces.ts b/packages/world/src/interfaces.ts index d53fd96d14..87c57c0c8f 100644 --- a/packages/world/src/interfaces.ts +++ b/packages/world/src/interfaces.ts @@ -9,6 +9,7 @@ import type { RunCreatedEventRequest, } from './events.js'; import type { GetHookParams, Hook, ListHooksParams } from './hooks.js'; +import type { Limits } from './limits.js'; import type { Queue } from './queue.js'; import type { GetWorkflowRunParams, @@ -179,6 +180,8 @@ export interface Storage { * The "World" interface represents how Workflows are able to communicate with the outside world. */ export interface World extends Queue, Storage, Streamer { + limits: Limits; + /** * A function that will be called to start any background tasks needed by the World implementation. * For example, in the case of a queue backed World, this would start the queue processing. diff --git a/packages/world/src/limits.test.ts b/packages/world/src/limits.test.ts new file mode 100644 index 0000000000..8796d636ad --- /dev/null +++ b/packages/world/src/limits.test.ts @@ -0,0 +1,19 @@ +import { describe, it } from 'vitest'; + +describe('limits schemas', () => { + it.fails('accepts concurrency-only, rate-only, and combined limit definitions', () => { + throw new Error('TODO: implement'); + }); + + it.fails('rejects invalid or empty limit definitions', () => { + throw new Error('TODO: implement'); + }); + + it.fails('discriminates acquired and blocked acquire results', () => { + throw new Error('TODO: implement'); + }); + + it.fails('keeps lease, release, and heartbeat request shapes stable', () => { + throw new Error('TODO: implement'); + }); +}); diff --git a/packages/world/src/limits.ts b/packages/world/src/limits.ts new file mode 100644 index 0000000000..495f29a84f --- /dev/null +++ b/packages/world/src/limits.ts @@ -0,0 +1,140 @@ +import { z } from 'zod'; + +export const LIMITS_NOT_IMPLEMENTED_MESSAGE = + 'Flow limits are reserved for future support and are not implemented yet.'; + +export function createLimitsNotImplementedError(): Error { + return new Error(LIMITS_NOT_IMPLEMENTED_MESSAGE); +} + +export const LimitKeySchema = z.string().min(1); +export type LimitKey = z.infer; + +export const LimitConcurrencySchema = z.object({ + max: z.number().int().positive(), +}); +export type LimitConcurrency = z.infer; + +export const LimitRateSchema = z.object({ + count: z.number().int().positive(), + periodMs: z.number().int().positive(), +}); +export type LimitRate = z.infer; + +export const LimitDefinitionSchema = z + .object({ + concurrency: LimitConcurrencySchema.optional(), + rate: LimitRateSchema.optional(), + }) + .refine( + (value) => value.concurrency !== undefined || value.rate !== undefined, + { + message: 'At least one limit must be configured', + } + ); +export type LimitDefinition = z.infer; + +export const LimitLockIdSchema = z.string().min(1); +export type LimitLockId = z.infer; + +export function createLockId(runId: string, lockIndex: number): LimitLockId { + return `${runId}:${lockIndex}`; +} + +export function parseLockId( + lockId: string +): { runId: string; lockIndex: number } | null { + const separatorIndex = lockId.lastIndexOf(':'); + if (separatorIndex <= 0 || separatorIndex === lockId.length - 1) { + return null; + } + + const runId = lockId.slice(0, separatorIndex); + const rawLockIndex = lockId.slice(separatorIndex + 1); + const lockIndex = Number.parseInt(rawLockIndex, 10); + if (!Number.isInteger(lockIndex) || lockIndex < 0) { + return null; + } + + return { runId, lockIndex }; +} + +export function createLockWakeCorrelationId( + runId: string, + lockIndex: number +): string { + return `wflock_wait_${runId}:${lockIndex}`; +} + +export const LimitLeaseSchema = z.object({ + leaseId: z.string().min(1), + key: LimitKeySchema, + lockId: LimitLockIdSchema, + runId: z.string().min(1), + lockIndex: z.number().int().nonnegative(), + acquiredAt: z.coerce.date(), + expiresAt: z.coerce.date().optional(), + definition: LimitDefinitionSchema, +}); +export type LimitLease = z.infer; + +export const LimitAcquireRequestSchema = z.object({ + key: LimitKeySchema, + runId: z.string().min(1), + lockIndex: z.number().int().nonnegative(), + definition: LimitDefinitionSchema, + leaseTtlMs: z.number().int().positive().optional(), +}); +export type LimitAcquireRequest = z.infer; + +export const LimitBlockedReasonSchema = z.enum([ + 'concurrency', + 'rate', + 'concurrency_and_rate', +]); +export type LimitBlockedReason = z.infer; + +export const LimitAcquireStatusSchema = z.enum(['acquired', 'blocked']); +export type LimitAcquireStatus = z.infer; + +export const LimitAcquireAcquiredResultSchema = z.object({ + status: z.literal(LimitAcquireStatusSchema.enum.acquired), + lease: LimitLeaseSchema, +}); +export type LimitAcquireAcquiredResult = z.infer< + typeof LimitAcquireAcquiredResultSchema +>; + +export const LimitAcquireBlockedResultSchema = z.object({ + status: z.literal(LimitAcquireStatusSchema.enum.blocked), + reason: LimitBlockedReasonSchema, + retryAfterMs: z.number().int().nonnegative().optional(), +}); +export type LimitAcquireBlockedResult = z.infer< + typeof LimitAcquireBlockedResultSchema +>; + +export const LimitAcquireResultSchema = z.discriminatedUnion('status', [ + LimitAcquireAcquiredResultSchema, + LimitAcquireBlockedResultSchema, +]); +export type LimitAcquireResult = z.infer; + +export const LimitReleaseRequestSchema = z.object({ + leaseId: z.string().min(1), + key: LimitKeySchema.optional(), + lockId: LimitLockIdSchema.optional(), +}); +export type LimitReleaseRequest = z.infer; + +export const LimitHeartbeatRequestSchema = z.object({ + leaseId: z.string().min(1), + ttlMs: z.number().int().positive().optional(), +}); +export type LimitHeartbeatRequest = z.infer; + +export interface Limits { + acquire(request: LimitAcquireRequest): Promise; + release(request: LimitReleaseRequest): Promise; + heartbeat(request: LimitHeartbeatRequest): Promise; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1e644f7416..5ec582352c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1284,6 +1284,9 @@ importers: '@workflow/tsconfig': specifier: workspace:* version: link:../tsconfig + vitest: + specifier: 'catalog:' + version: 4.0.18(@opentelemetry/api@1.9.0)(@types/node@22.19.0)(jiti@2.6.1)(jsdom@26.1.0)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1) zod: specifier: 'catalog:' version: 4.3.6 @@ -23456,14 +23459,6 @@ snapshots: optionalDependencies: vite: 7.1.12(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1) - '@vitest/mocker@4.0.18(vite@7.1.12(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1))': - dependencies: - '@vitest/spy': 4.0.18 - estree-walker: 3.0.3 - magic-string: 0.30.21 - optionalDependencies: - vite: 7.1.12(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1) - '@vitest/mocker@4.0.18(vite@7.1.12(@types/node@24.6.2)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1))': dependencies: '@vitest/spy': 4.0.18 @@ -32855,7 +32850,7 @@ snapshots: vitest@4.0.18(@opentelemetry/api@1.9.0)(@types/node@22.19.0)(jiti@2.6.1)(jsdom@26.1.0)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1): dependencies: '@vitest/expect': 4.0.18 - '@vitest/mocker': 4.0.18(vite@7.1.12(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1)) + '@vitest/mocker': 4.0.18(vite@7.1.12(@types/node@24.6.2)(jiti@2.6.1)(lightningcss@1.30.2)(terser@5.44.0)(tsx@4.20.6)(yaml@2.8.1)) '@vitest/pretty-format': 4.0.18 '@vitest/runner': 4.0.18 '@vitest/snapshot': 4.0.18 diff --git a/workbench/example/tsconfig.json b/workbench/example/tsconfig.json index 39c2f1ea68..58fb97394f 100644 --- a/workbench/example/tsconfig.json +++ b/workbench/example/tsconfig.json @@ -1,14 +1,15 @@ { "compilerOptions": { "target": "es2022", - "module": "NodeNext", + "module": "esnext", "lib": ["dom", "dom.iterable", "esnext"], + "baseUrl": ".", "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, - "moduleResolution": "NodeNext", + "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", diff --git a/workbench/example/workflows/99_e2e.ts b/workbench/example/workflows/99_e2e.ts index d6caf9dcc8..1c9bd2ca0a 100644 --- a/workbench/example/workflows/99_e2e.ts +++ b/workbench/example/workflows/99_e2e.ts @@ -9,6 +9,7 @@ import { getStepMetadata, getWorkflowMetadata, getWritable, + lock, type RequestWithResponse, RetryableError, sleep, @@ -213,6 +214,304 @@ export async function parallelSleepWorkflow() { return { startTime, endTime }; } +async function cheapDbStep(userId: string) { + 'use step'; + return { + userId, + prompt: `profile:${userId}`, + }; +} + +async function expensiveAIStep(prompt: string) { + 'use step'; + return `summary:${prompt}`; +} + +export async function workflowWithScopedLocks(userId = 'user-123') { + 'use workflow'; + + await using userLimit = await lock({ + key: `workflow:user:${userId}`, + concurrency: { max: 2 }, + leaseTtlMs: 30_000, + }); + + let row: Awaited>; + { + await using _dbLimit = await lock({ + key: 'step:db:cheap', + concurrency: { max: 20 }, + leaseTtlMs: 30_000, + }); + row = await cheapDbStep(userId); + } + + let summary: Awaited>; + { + await using _aiLimit = await lock({ + key: 'step:provider:openai', + rate: { count: 10, periodMs: 60_000 }, + leaseTtlMs: 30_000, + }); + summary = await expensiveAIStep(row.prompt); + } + + return { + workflowKey: userLimit.key, + dbKey: 'step:db:cheap', + aiKey: 'step:provider:openai', + summary, + }; +} + +async function serializedLimitStep( + label: string, + holdMs: number, + key = 'step:db:serialized' +) { + 'use step'; + + const metadata = getStepMetadata(); + const acquiredAt = Date.now(); + await new Promise((resolve) => setTimeout(resolve, holdMs)); + const releasedAt = Date.now(); + + return { + label, + key, + attempt: metadata.attempt, + acquiredAt, + releasedAt, + }; +} + +export async function workflowLockContentionWorkflow( + userId = 'user-123', + holdMs = 750 +) { + 'use workflow'; + + const workflowLock = await lock({ + key: `workflow:user:${userId}`, + concurrency: { max: 1 }, + leaseTtlMs: holdMs + 5_000, + }); + + const workflowLockAcquiredAt = Date.now(); + let step: Awaited>; + { + await using _nestedLock = await lock({ + key: 'step:db:serialized', + concurrency: { max: 1 }, + leaseTtlMs: holdMs + 5_000, + }); + step = await serializedLimitStep(userId, holdMs); + } + await workflowLock.dispose(); + const workflowLockReleasedAt = Date.now(); + + return { + userId, + workflowLockAcquiredAt, + workflowLockReleasedAt, + stepCallLockAcquiredAt: step.acquiredAt, + stepCallLockReleasedAt: step.releasedAt, + }; +} + +export async function lockedStepCallContentionWorkflow( + key = 'step:db:key-contention', + holdMs = 750, + label = key +) { + 'use workflow'; + + { + await using _lock = await lock({ + key, + concurrency: { max: 1 }, + leaseTtlMs: holdMs + 5_000, + }); + + return await serializedLimitStep(label, holdMs, key); + } +} + +////////////////////////////////////////////////////////// + +export async function workflowOnlyLockContentionWorkflow( + userId = 'user-123', + holdMs = 750, + label = userId +) { + 'use workflow'; + + await using _workflowLock = await lock({ + key: `workflow:user:${userId}`, + concurrency: { max: 1 }, + leaseTtlMs: holdMs + 5_000, + }); + + const workflowLockAcquiredAt = Date.now(); + await sleep(holdMs); + const workflowLockReleasedAt = Date.now(); + + return { + label, + userId, + workflowLockAcquiredAt, + workflowLockReleasedAt, + }; +} + +export async function workflowLeakedLockWorkflow( + userId = 'user-123', + leaseTtlMs = 1_250, + label = userId +) { + 'use workflow'; + + const leakedWorkflowLock = await lock({ + key: `workflow:user:${userId}`, + concurrency: { max: 1 }, + leaseTtlMs, + }); + + const workflowLockAcquiredAt = Date.now(); + + return { + label, + userId, + key: leakedWorkflowLock.key, + leaseTtlMs, + leakedLeaseId: leakedWorkflowLock.leaseId, + lockAcquiredAt: workflowLockAcquiredAt, + workflowCompletedAt: Date.now(), + }; +} + +export async function leakedKeyLockWorkflow( + userId = 'user-123', + leaseTtlMs = 1_250, + label = userId +) { + 'use workflow'; + + const leakedLock = await lock({ + key: `workflow:key:expired:${userId}`, + concurrency: { max: 1 }, + leaseTtlMs, + }); + + return { + label, + key: leakedLock.key, + leaseTtlMs, + leakedLeaseId: leakedLock.leaseId, + lockAcquiredAt: Date.now(), + workflowCompletedAt: Date.now(), + }; +} + +export async function workflowRateLimitContentionWorkflow( + userId = 'user-123', + holdMs = 250, + periodMs = 1_500, + label = userId +) { + 'use workflow'; + + await using _workflowRateLimit = await lock({ + key: `workflow:rate:${userId}`, + rate: { count: 1, periodMs }, + leaseTtlMs: periodMs + 5_000, + }); + + const workflowRateAcquiredAt = Date.now(); + await sleep(holdMs); + const workflowRateReleasedAt = Date.now(); + + return { + label, + userId, + periodMs, + workflowRateAcquiredAt, + workflowRateReleasedAt, + }; +} + +export async function workflowMixedLimitContentionWorkflow( + userId = 'user-123', + holdMs = 250, + periodMs = 1_500, + label = userId +) { + 'use workflow'; + + await using _mixedLimit = await lock({ + key: `workflow:mixed:${userId}`, + concurrency: { max: 1 }, + rate: { count: 1, periodMs }, + leaseTtlMs: periodMs + 5_000, + }); + + const workflowRateAcquiredAt = Date.now(); + await sleep(holdMs); + const workflowRateReleasedAt = Date.now(); + + return { + label, + userId, + periodMs, + workflowRateAcquiredAt, + workflowRateReleasedAt, + }; +} + +async function scopedMultiStepStep(label: string, holdMs: number) { + 'use step'; + + const metadata = getStepMetadata(); + await new Promise((resolve) => setTimeout(resolve, holdMs)); + return { + label, + attempt: metadata.attempt, + completedAt: Date.now(), + }; +} + +export async function singleLockAcrossMultipleStepsWorkflow( + key = 'step:db:batch', + holdMs = 400 +) { + 'use workflow'; + + let workflowLockAcquiredAt: number; + let first: Awaited>; + let second: Awaited>; + let workflowLockReleasedAt: number; + { + await using _lock = await lock({ + key, + concurrency: { max: 1 }, + leaseTtlMs: holdMs * 2 + 5_000, + }); + + workflowLockAcquiredAt = Date.now(); + first = await scopedMultiStepStep('first', holdMs); + second = await scopedMultiStepStep('second', holdMs); + workflowLockReleasedAt = Date.now(); + } + + return { + key, + workflowLockAcquiredAt, + firstStepCompletedAt: first.completedAt, + secondStepCompletedAt: second.completedAt, + workflowLockReleasedAt, + }; +} + ////////////////////////////////////////////////////////// async function nullByteStep() { diff --git a/workbench/nextjs-turbopack/next.config.ts b/workbench/nextjs-turbopack/next.config.ts index 78df6b2090..5d1a204118 100644 --- a/workbench/nextjs-turbopack/next.config.ts +++ b/workbench/nextjs-turbopack/next.config.ts @@ -1,7 +1,9 @@ -import type { NextConfig } from 'next'; import path from 'node:path'; +import type { NextConfig } from 'next'; import { withWorkflow } from 'workflow/next'; +process.env.WORKFLOW_PUBLIC_MANIFEST ??= '1'; + const turbopackRoot = path.resolve(process.cwd(), '../..'); const nextConfig: NextConfig = {