From 677acb60196eb4735418792e76ed01df651c636e Mon Sep 17 00:00:00 2001 From: lapenna-bjss Date: Wed, 18 Mar 2026 11:01:34 +0000 Subject: [PATCH 1/4] CCM-14320: Set up TraceContext --- .../mesh-download/mesh_download/processor.py | 17 ++++++++-- .../src/__tests__/apis/sqs-handler.test.ts | 12 +++++++ .../pdm-poll-lambda/src/apis/sqs-handler.ts | 32 +++++++++++++++---- utils/py-utils/dl_utils/__init__.py | 7 ++++ utils/py-utils/dl_utils/trace_context.py | 23 +++++++++++++ utils/utils/package.json | 3 +- utils/utils/src/index.ts | 1 + utils/utils/src/trace-context/index.ts | 1 + .../utils/src/trace-context/trace-context.ts | 28 ++++++++++++++++ 9 files changed, 114 insertions(+), 10 deletions(-) create mode 100644 utils/py-utils/dl_utils/trace_context.py create mode 100644 utils/utils/src/trace-context/index.ts create mode 100644 utils/utils/src/trace-context/trace-context.ts diff --git a/lambdas/mesh-download/mesh_download/processor.py b/lambdas/mesh-download/mesh_download/processor.py index e51edced..f37c6d42 100644 --- a/lambdas/mesh-download/mesh_download/processor.py +++ b/lambdas/mesh-download/mesh_download/processor.py @@ -5,6 +5,7 @@ from pydantic import ValidationError from digital_letters_events import MESHInboxMessageDownloaded, MESHInboxMessageReceived from mesh_download.errors import MeshMessageNotFound +from dl_utils import derive_child_traceparent class MeshDownloadProcessor: @@ -23,7 +24,10 @@ def __init__(self, **kwargs): def process_sqs_message(self, sqs_record): try: validated_event = self._parse_and_validate_event(sqs_record) - logger = self.__log.bind(mesh_message_id=validated_event.data.meshMessageId) + logger = self.__log.bind( + mesh_message_id=validated_event.data.meshMessageId, + traceparent=validated_event.traceparent, + ) logger.info("Processing MESH download request") self._handle_download(validated_event, logger) @@ -106,14 +110,21 @@ def _store_message_content(self, sender_id, message_reference, message_content, def _publish_downloaded_event(self, incoming_event, message_uri): """ Publishes a MESHInboxMessageDownloaded event. + + The outgoing event derives a child traceparent from the incoming event's + traceparent, preserving the same trace-id while generating a fresh span-id + for this service hop. """ now = datetime.now(timezone.utc).isoformat() + child_traceparent = derive_child_traceparent(incoming_event.traceparent) + cloud_event = { **incoming_event.model_dump(exclude_none=True), 'id': str(uuid4()), 'time': now, 'recordedtime': now, + 'traceparent': child_traceparent, 'type': 'uk.nhs.notify.digital.letters.mesh.inbox.message.downloaded.v1', 'dataschema': ( 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/' @@ -137,5 +148,7 @@ def _publish_downloaded_event(self, incoming_event, message_uri): "Published MESHInboxMessageDownloaded event", sender_id=incoming_event.data.senderId, message_uri=message_uri, - message_reference=incoming_event.data.messageReference + message_reference=incoming_event.data.messageReference, + incoming_traceparent=incoming_event.traceparent, + outgoing_traceparent=child_traceparent, ) diff --git a/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts b/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts index 74c1ac35..f6500aa7 100644 --- a/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts +++ b/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts @@ -15,13 +15,20 @@ const pdm = mock(); jest.mock('node:crypto', () => ({ randomUUID: jest.fn(), + randomBytes: jest.fn(), })); +// eslint-disable-next-line @typescript-eslint/no-require-imports +const { randomBytes } = require('node:crypto'); const mockRandomUUID = randomUUID as jest.MockedFunction; +const mockRandomBytes = randomBytes as jest.MockedFunction; const mockDate = jest.spyOn(Date.prototype, 'toISOString'); mockRandomUUID.mockReturnValue('550e8400-e29b-41d4-a716-446655440001'); +mockRandomBytes.mockReturnValue(Buffer.from('aabbccdd11223344', 'hex')); mockDate.mockReturnValue('2023-06-20T12:00:00.250Z'); +const DERIVED_TRACEPARENT = '00-0af7651916cd43dd8448eb211c80319c-aabbccdd11223344-01'; + const handler = createHandler({ eventPublisher, logger, @@ -51,6 +58,7 @@ describe('SQS Handler', () => { id: '550e8400-e29b-41d4-a716-446655440001', time: '2023-06-20T12:00:00.250Z', recordedtime: '2023-06-20T12:00:00.250Z', + traceparent: DERIVED_TRACEPARENT, dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-available-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.available.v1', @@ -90,6 +98,7 @@ describe('SQS Handler', () => { id: '550e8400-e29b-41d4-a716-446655440001', time: '2023-06-20T12:00:00.250Z', recordedtime: '2023-06-20T12:00:00.250Z', + traceparent: DERIVED_TRACEPARENT, dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-unavailable-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.unavailable.v1', @@ -132,6 +141,7 @@ describe('SQS Handler', () => { id: '550e8400-e29b-41d4-a716-446655440001', time: '2023-06-20T12:00:00.250Z', recordedtime: '2023-06-20T12:00:00.250Z', + traceparent: DERIVED_TRACEPARENT, dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-available-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.available.v1', @@ -174,6 +184,7 @@ describe('SQS Handler', () => { id: '550e8400-e29b-41d4-a716-446655440001', time: '2023-06-20T12:00:00.250Z', recordedtime: '2023-06-20T12:00:00.250Z', + traceparent: DERIVED_TRACEPARENT, type: 'uk.nhs.notify.digital.letters.pdm.resource.unavailable.v1', data: { messageReference: @@ -219,6 +230,7 @@ describe('SQS Handler', () => { id: '550e8400-e29b-41d4-a716-446655440001', time: '2023-06-20T12:00:00.250Z', recordedtime: '2023-06-20T12:00:00.250Z', + traceparent: DERIVED_TRACEPARENT, dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-retries-exceeded-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.retries.exceeded.v1', diff --git a/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts b/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts index 39ae9c72..fc5181fc 100644 --- a/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts +++ b/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts @@ -15,6 +15,7 @@ import pdmResourceSubmittedValidator from 'digital-letters-events/PDMResourceSub import pdmResourceUnavailableValidator from 'digital-letters-events/PDMResourceUnavailable.js'; import pdmResourceRetriesExceededValidator from 'digital-letters-events/PDMResourceRetriesExceeded.js'; import { randomUUID } from 'node:crypto'; +import { deriveChildTraceparent } from 'utils/trace-context'; import { EventPublisher, Logger } from 'utils'; export interface HandlerDependencies { @@ -88,6 +89,7 @@ function generateAvailableEvent( id: randomUUID(), time: eventTime, recordedtime: eventTime, + traceparent: deriveChildTraceparent(event.traceparent), dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-available-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.available.v1', @@ -112,6 +114,7 @@ function generateUnavailableEvent( id: randomUUID(), time: eventTime, recordedtime: eventTime, + traceparent: deriveChildTraceparent(event.traceparent), dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-unavailable-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.unavailable.v1', @@ -135,6 +138,7 @@ function generateRetriesExceededEvent( id: randomUUID(), time: eventTime, recordedtime: eventTime, + traceparent: deriveChildTraceparent(event.traceparent), dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-retries-exceeded-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.retries.exceeded.v1', @@ -184,16 +188,30 @@ export const createHandler = ({ if (pdmAvailability === 'unavailable') { if (retries >= pollMaxRetries) { - retriesExceededEvents.push( - generateRetriesExceededEvent(event, retries), - ); + const outgoing = generateRetriesExceededEvent(event, retries); + logger.info({ + description: 'TraceContext hop', + incoming_traceparent: event.traceparent, + outgoing_traceparent: outgoing.traceparent, + }); + retriesExceededEvents.push(outgoing); } else { - unavailableEvents.push(generateUnavailableEvent(event, retries)); + const outgoing = generateUnavailableEvent(event, retries); + logger.info({ + description: 'TraceContext hop', + incoming_traceparent: event.traceparent, + outgoing_traceparent: outgoing.traceparent, + }); + unavailableEvents.push(outgoing); } } else { - availableEvents.push( - generateAvailableEvent(event, nhsNumber, odsCode), - ); + const outgoing = generateAvailableEvent(event, nhsNumber, odsCode); + logger.info({ + description: 'TraceContext hop', + incoming_traceparent: event.traceparent, + outgoing_traceparent: outgoing.traceparent, + }); + availableEvents.push(outgoing); } } catch (error: any) { logger.warn({ diff --git a/utils/py-utils/dl_utils/__init__.py b/utils/py-utils/dl_utils/__init__.py index 1c8046ba..5d8c28da 100644 --- a/utils/py-utils/dl_utils/__init__.py +++ b/utils/py-utils/dl_utils/__init__.py @@ -22,6 +22,11 @@ report_expiry_time ) +from .trace_context import ( + create_traceparent, + derive_child_traceparent, +) + __all__ = [ 'EventPublisher', 'BaseMeshConfig', @@ -33,4 +38,6 @@ 'Metric', 'CertificateExpiryMonitor', 'report_expiry_time', + 'create_traceparent', + 'derive_child_traceparent', ] diff --git a/utils/py-utils/dl_utils/trace_context.py b/utils/py-utils/dl_utils/trace_context.py new file mode 100644 index 00000000..2ee956f2 --- /dev/null +++ b/utils/py-utils/dl_utils/trace_context.py @@ -0,0 +1,23 @@ +"""W3C TraceContext helpers for Digital Letters + +Format: 00--- +""" + +import re +import secrets + +_TRACEPARENT_RE = re.compile(r'^00-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$') + + +def create_traceparent() -> str: + """Return a new root W3C traceparent (sampled).""" + return f'00-{secrets.token_hex(16)}-{secrets.token_hex(8)}-01' + + +def derive_child_traceparent(incoming: str) -> str: + """Return a child traceparent that shares the incoming trace-id.""" + match = _TRACEPARENT_RE.match(incoming) + if not match: + raise ValueError(f'Invalid traceparent: {incoming!r}') + trace_id, flags = match.group(1), match.group(3) + return f'00-{trace_id}-{secrets.token_hex(8)}-{flags}' diff --git a/utils/utils/package.json b/utils/utils/package.json index 780d0109..ba8dfb8a 100644 --- a/utils/utils/package.json +++ b/utils/utils/package.json @@ -31,7 +31,8 @@ }, "exports": { ".": "./src/index.ts", - "./logger": "./src/logger.ts" + "./logger": "./src/logger.ts", + "./trace-context": "./src/trace-context/index.ts" }, "main": "src/index.ts", "name": "utils", diff --git a/utils/utils/src/index.ts b/utils/utils/src/index.ts index 4b9333bc..b93ced2c 100644 --- a/utils/utils/src/index.ts +++ b/utils/utils/src/index.ts @@ -16,3 +16,4 @@ export * from './key-generation-utils'; export * from './schema-utils'; export * from './pdm-client'; export * from './reporting'; +export * from './trace-context'; diff --git a/utils/utils/src/trace-context/index.ts b/utils/utils/src/trace-context/index.ts new file mode 100644 index 00000000..28ce4237 --- /dev/null +++ b/utils/utils/src/trace-context/index.ts @@ -0,0 +1 @@ +export * from './trace-context'; diff --git a/utils/utils/src/trace-context/trace-context.ts b/utils/utils/src/trace-context/trace-context.ts new file mode 100644 index 00000000..ea40ccbd --- /dev/null +++ b/utils/utils/src/trace-context/trace-context.ts @@ -0,0 +1,28 @@ +/** + * W3C TraceContext helpers for Digital Letters + * + * Format: 00--- + */ + +import { randomBytes } from 'node:crypto'; + +const TRACEPARENT_REGEX = /^00-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/; + +function randomHex(bytes: number): string { + return randomBytes(bytes).toString('hex'); +} + +/** Create a new root W3C traceparent */ +export function createTraceparent(): string { + return `00-${randomHex(16)}-${randomHex(8)}-01`; +} + +/** Return a child traceparent that shares the incoming trace-id */ +export function deriveChildTraceparent(incoming: string): string { + const match = TRACEPARENT_REGEX.exec(incoming); + if (!match) { + throw new Error(`Invalid traceparent: "${incoming}"`); + } + const [, traceId, , flags] = match; + return `00-${traceId}-${randomHex(8)}-${flags}`; +} From 04b3c76c5c73f6e570951b4db01d4e823d24b2f9 Mon Sep 17 00:00:00 2001 From: lapenna-bjss Date: Thu, 19 Mar 2026 10:08:13 +0000 Subject: [PATCH 2/4] CCM-14320: Update EventPublishers to handle traceparent --- lambdas/mesh-download/mesh_download/processor.py | 12 +++--------- lambdas/mesh-poll/mesh_poll/processor.py | 7 ++++--- .../src/__tests__/apis/sqs-handler.test.ts | 10 +++++++--- lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts | 4 ---- utils/py-utils/dl_utils/event_publisher.py | 15 ++++++++++++--- .../utils/src/event-publisher/event-publisher.ts | 14 ++++++++++++++ 6 files changed, 40 insertions(+), 22 deletions(-) diff --git a/lambdas/mesh-download/mesh_download/processor.py b/lambdas/mesh-download/mesh_download/processor.py index f37c6d42..45264113 100644 --- a/lambdas/mesh-download/mesh_download/processor.py +++ b/lambdas/mesh-download/mesh_download/processor.py @@ -5,7 +5,6 @@ from pydantic import ValidationError from digital_letters_events import MESHInboxMessageDownloaded, MESHInboxMessageReceived from mesh_download.errors import MeshMessageNotFound -from dl_utils import derive_child_traceparent class MeshDownloadProcessor: @@ -110,21 +109,16 @@ def _store_message_content(self, sender_id, message_reference, message_content, def _publish_downloaded_event(self, incoming_event, message_uri): """ Publishes a MESHInboxMessageDownloaded event. - - The outgoing event derives a child traceparent from the incoming event's - traceparent, preserving the same trace-id while generating a fresh span-id - for this service hop. + The EventPublisher will derive a child traceparent from the incoming traceparent + automatically, preserving the trace-id across this service hop. """ now = datetime.now(timezone.utc).isoformat() - child_traceparent = derive_child_traceparent(incoming_event.traceparent) - cloud_event = { **incoming_event.model_dump(exclude_none=True), 'id': str(uuid4()), 'time': now, 'recordedtime': now, - 'traceparent': child_traceparent, 'type': 'uk.nhs.notify.digital.letters.mesh.inbox.message.downloaded.v1', 'dataschema': ( 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/' @@ -150,5 +144,5 @@ def _publish_downloaded_event(self, incoming_event, message_uri): message_uri=message_uri, message_reference=incoming_event.data.messageReference, incoming_traceparent=incoming_event.traceparent, - outgoing_traceparent=child_traceparent, + outgoing_traceparent=cloud_event['traceparent'], ) diff --git a/lambdas/mesh-poll/mesh_poll/processor.py b/lambdas/mesh-poll/mesh_poll/processor.py index 79ff6527..f8fe2c07 100644 --- a/lambdas/mesh-poll/mesh_poll/processor.py +++ b/lambdas/mesh-poll/mesh_poll/processor.py @@ -151,6 +151,7 @@ def process_message(self, message): def _publish_mesh_inbox_message_received_event(self, event_detail): """ Publishes a MESHInboxMessageReceived event for the retriever component. + The EventPublisher will create a root traceparent automatically. """ now = datetime.now(timezone.utc).isoformat() @@ -167,7 +168,6 @@ def _publish_mesh_inbox_message_received_event(self, event_detail): 'recordedtime': now, 'severitynumber': 2, 'severitytext': 'INFO', - 'traceparent': '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01', 'dataschema': ( 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/' '2025-10-draft/data/digital-letters-mesh-inbox-message-received-data.schema.json' @@ -185,11 +185,13 @@ def _publish_mesh_inbox_message_received_event(self, event_detail): self.__log.info("Published MESHInboxMessageReceived event", mesh_message_id=event_detail["data"]["meshMessageId"], - sender_id=event_detail["data"]["senderId"]) + sender_id=event_detail["data"]["senderId"], + traceparent=cloud_event['traceparent']) def _publish_mesh_inbox_message_invalid_event(self, event_detail): """ Publishes a MESHInboxMessageInvalid event when a message fails validation. + The EventPublisher will create a root traceparent automatically. """ now = datetime.now(timezone.utc).isoformat() @@ -203,7 +205,6 @@ def _publish_mesh_inbox_message_invalid_event(self, event_detail): 'recordedtime': now, 'severitynumber': 3, 'severitytext': 'WARN', - 'traceparent': '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01', 'dataschema': ( 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/' '2025-10-draft/data/digital-letters-mesh-inbox-message-invalid-data.schema.json' diff --git a/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts b/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts index f6500aa7..edc6f3f0 100644 --- a/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts +++ b/lambdas/pdm-poll-lambda/src/__tests__/apis/sqs-handler.test.ts @@ -20,14 +20,18 @@ jest.mock('node:crypto', () => ({ // eslint-disable-next-line @typescript-eslint/no-require-imports const { randomBytes } = require('node:crypto'); + const mockRandomUUID = randomUUID as jest.MockedFunction; -const mockRandomBytes = randomBytes as jest.MockedFunction; +const mockRandomBytes = randomBytes as jest.MockedFunction< + typeof import('node:crypto').randomBytes +>; const mockDate = jest.spyOn(Date.prototype, 'toISOString'); mockRandomUUID.mockReturnValue('550e8400-e29b-41d4-a716-446655440001'); -mockRandomBytes.mockReturnValue(Buffer.from('aabbccdd11223344', 'hex')); +mockRandomBytes.mockImplementation(() => Buffer.from('aabbccdd11223344', 'hex')); mockDate.mockReturnValue('2023-06-20T12:00:00.250Z'); -const DERIVED_TRACEPARENT = '00-0af7651916cd43dd8448eb211c80319c-aabbccdd11223344-01'; +const DERIVED_TRACEPARENT = + '00-0af7651916cd43dd8448eb211c80319c-aabbccdd11223344-01'; const handler = createHandler({ eventPublisher, diff --git a/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts b/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts index fc5181fc..1b7c3de2 100644 --- a/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts +++ b/lambdas/pdm-poll-lambda/src/apis/sqs-handler.ts @@ -15,7 +15,6 @@ import pdmResourceSubmittedValidator from 'digital-letters-events/PDMResourceSub import pdmResourceUnavailableValidator from 'digital-letters-events/PDMResourceUnavailable.js'; import pdmResourceRetriesExceededValidator from 'digital-letters-events/PDMResourceRetriesExceeded.js'; import { randomUUID } from 'node:crypto'; -import { deriveChildTraceparent } from 'utils/trace-context'; import { EventPublisher, Logger } from 'utils'; export interface HandlerDependencies { @@ -89,7 +88,6 @@ function generateAvailableEvent( id: randomUUID(), time: eventTime, recordedtime: eventTime, - traceparent: deriveChildTraceparent(event.traceparent), dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-available-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.available.v1', @@ -114,7 +112,6 @@ function generateUnavailableEvent( id: randomUUID(), time: eventTime, recordedtime: eventTime, - traceparent: deriveChildTraceparent(event.traceparent), dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-unavailable-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.unavailable.v1', @@ -138,7 +135,6 @@ function generateRetriesExceededEvent( id: randomUUID(), time: eventTime, recordedtime: eventTime, - traceparent: deriveChildTraceparent(event.traceparent), dataschema: 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-pdm-resource-retries-exceeded-data.schema.json', type: 'uk.nhs.notify.digital.letters.pdm.resource.retries.exceeded.v1', diff --git a/utils/py-utils/dl_utils/event_publisher.py b/utils/py-utils/dl_utils/event_publisher.py index b533ccba..c9d95b0c 100644 --- a/utils/py-utils/dl_utils/event_publisher.py +++ b/utils/py-utils/dl_utils/event_publisher.py @@ -11,6 +11,7 @@ import boto3 from botocore.exceptions import ClientError from pydantic import ValidationError +from .trace_context import create_traceparent, derive_child_traceparent DlqReason = Literal['INVALID_EVENT', 'EVENTBRIDGE_FAILURE'] @@ -220,14 +221,22 @@ def send_events(self, events: List[Dict[str, Any]], """ Send CloudEvents to EventBridge with validation and DLQ support. - 1. Validates events using the specified validator function - 2. Sends valid events to EventBridge - 3. Routes failed events to DLQ + 1. Stamps each event with a traceparent: derives a child if one exists, otherwise creates a new root traceparent. + 2. Validates events using the specified validator function + 3. Sends valid events to EventBridge + 4. Routes failed events to DLQ """ if not events: self.logger.info('No events to send') return [] + for event in events: + incoming = event.get('traceparent') + if incoming: + event['traceparent'] = derive_child_traceparent(incoming) + else: + event['traceparent'] = create_traceparent() + valid_events = [] invalid_events = [] diff --git a/utils/utils/src/event-publisher/event-publisher.ts b/utils/utils/src/event-publisher/event-publisher.ts index e7b6b45f..df853099 100644 --- a/utils/utils/src/event-publisher/event-publisher.ts +++ b/utils/utils/src/event-publisher/event-publisher.ts @@ -5,6 +5,10 @@ import { import { SQSClient, SendMessageBatchCommand } from '@aws-sdk/client-sqs'; import { randomUUID } from 'node:crypto'; import { Logger } from '../logger'; +import { + createTraceparent, + deriveChildTraceparent, +} from '../trace-context/trace-context'; type DlqReason = 'INVALID_EVENT' | 'EVENTBRIDGE_FAILURE'; @@ -197,6 +201,16 @@ export class EventPublisher { return []; } + // Stamp each event with a traceparent: derive child if one exists, else create root + for (const event of events) { + const incoming = (event as any).traceparent as string | undefined; + if (incoming) { + (event as any).traceparent = deriveChildTraceparent(incoming); + } else { + (event as any).traceparent = createTraceparent(); + } + } + const validEvents: T[] = []; const invalidEvents: T[] = []; From d59974b2075271ef183de5b2bfdb9f9576ee4a82 Mon Sep 17 00:00:00 2001 From: lapenna-bjss Date: Thu, 19 Mar 2026 10:48:22 +0000 Subject: [PATCH 3/4] CCM-14320: propagate trace context across GuardDuty via S3 metadata --- lambdas/file-scanner-lambda/src/apis/sqs-handler.ts | 1 + lambdas/file-scanner-lambda/src/app/file-scanner.ts | 2 ++ .../move-scanned-files-lambda/src/app/move-file-handler.ts | 6 ++++++ lambdas/move-scanned-files-lambda/src/domain/mapper.ts | 7 ++++++- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts b/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts index d8b7b706..0acf2934 100644 --- a/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts +++ b/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts @@ -64,6 +64,7 @@ async function processRecord( messageReference: event.data.messageReference, senderId: event.data.senderId, createdAt: event.time, + traceparent: event.traceparent, }); if (result.outcome === 'failed') { diff --git a/lambdas/file-scanner-lambda/src/app/file-scanner.ts b/lambdas/file-scanner-lambda/src/app/file-scanner.ts index 3eabde7a..fb687b0a 100644 --- a/lambdas/file-scanner-lambda/src/app/file-scanner.ts +++ b/lambdas/file-scanner-lambda/src/app/file-scanner.ts @@ -24,6 +24,7 @@ export interface ScanFileMetadata { messageReference: string; senderId: string; createdAt: string; + traceparent: string; } export type ScanFileResult = { @@ -158,6 +159,7 @@ export class FileScanner { messageReference: metadata.messageReference, senderId: metadata.senderId, createdAt: metadata.createdAt, + traceparent: metadata.traceparent, }, }; diff --git a/lambdas/move-scanned-files-lambda/src/app/move-file-handler.ts b/lambdas/move-scanned-files-lambda/src/app/move-file-handler.ts index f8d0bb48..ac5a65c4 100644 --- a/lambdas/move-scanned-files-lambda/src/app/move-file-handler.ts +++ b/lambdas/move-scanned-files-lambda/src/app/move-file-handler.ts @@ -19,6 +19,7 @@ type ObjectMetadata = { senderId: string; messageReference: string; createdAt: string; + traceparent?: string; }; type ObjectsFromEvent = { @@ -34,6 +35,7 @@ const SCAN_RESULT_STATUS_NO_THREATS_FOUND = 'NO_THREATS_FOUND'; const METADATA_CREATED_AT = 'createdat'; const METADATA_MESSAGE_REFERENCE = 'messagereference'; const METADATA_SENDER_ID = 'senderid'; +const METADATA_TRACEPARENT = 'traceparent'; /** * Utility function to extract a subset of the object key for logging purposes. @@ -121,6 +123,7 @@ export class MoveFileHandler { const createdAt = metadataMap.get(METADATA_CREATED_AT); const messageReference = metadataMap.get(METADATA_MESSAGE_REFERENCE); const senderId = metadataMap.get(METADATA_SENDER_ID); + const traceparent = metadataMap.get(METADATA_TRACEPARENT); if (!messageReference || !senderId || !createdAt) { return null; @@ -137,6 +140,7 @@ export class MoveFileHandler { senderId, messageReference, createdAt, + traceparent, }; } @@ -160,6 +164,7 @@ export class MoveFileHandler { metadata.senderId, `s3://${destinationBucket}/${objectKey}`, metadata.createdAt, + metadata.traceparent, ); } else { destinationBucket = this.quarantineBucketName; @@ -176,6 +181,7 @@ export class MoveFileHandler { metadata.senderId, `s3://${destinationBucket}/${objectKey}`, metadata.createdAt, + metadata.traceparent, ); } diff --git a/lambdas/move-scanned-files-lambda/src/domain/mapper.ts b/lambdas/move-scanned-files-lambda/src/domain/mapper.ts index fe0d483a..8c90528a 100644 --- a/lambdas/move-scanned-files-lambda/src/domain/mapper.ts +++ b/lambdas/move-scanned-files-lambda/src/domain/mapper.ts @@ -7,6 +7,7 @@ function createEventWithCommonFields( senderId: string, letterUri: string, createdAt: string, + traceparent?: string, ): FileSafe | FileQuarantined { return { specversion: '1.0', @@ -14,7 +15,7 @@ function createEventWithCommonFields( subject: `customer/${senderId}/recipient/${messageReference}`, source: '/nhs/england/notify/production/primary/data-plane/digitalletters/print', // Note CCM-13892. - traceparent: '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01', // Note CCM-14255. + traceparent: traceparent ?? '', // EventPublisher will derive child or create root type: isFileSafe ? 'uk.nhs.notify.digital.letters.print.file.safe.v1' : 'uk.nhs.notify.digital.letters.print.file.quarantined.v1', @@ -36,6 +37,7 @@ export function createFileSafeEvent( senderId: string, letterUri: string, createdAt: string, + traceparent?: string, ): FileSafe { return createEventWithCommonFields( true, @@ -43,6 +45,7 @@ export function createFileSafeEvent( senderId, letterUri, createdAt, + traceparent, ) as FileSafe; } @@ -51,6 +54,7 @@ export function createFileQuarantinedEvent( senderId: string, letterUri: string, createdAt: string, + traceparent?: string, ): FileQuarantined { return createEventWithCommonFields( false, @@ -58,5 +62,6 @@ export function createFileQuarantinedEvent( senderId, letterUri, createdAt, + traceparent, ) as FileQuarantined; } From bec1dd1c523ecc892c87ead55981f90bf343fd4b Mon Sep 17 00:00:00 2001 From: lapenna-bjss Date: Fri, 20 Mar 2026 11:16:43 +0000 Subject: [PATCH 4/4] CCM-14320: Update W3C traceparent helpers to use OpenTelemetry SDK --- lambdas/mesh-acknowledge/requirements.txt | 2 + lambdas/mesh-download/requirements.txt | 2 + lambdas/mesh-poll/requirements.txt | 2 + lambdas/report-sender/requirements.txt | 2 + package-lock.json | 98 +++++++++++++++++++ utils/py-utils/dl_utils/trace_context.py | 41 +++++--- utils/py-utils/requirements.txt | 2 + utils/utils/package.json | 2 + .../utils/src/trace-context/trace-context.ts | 48 ++++++--- 9 files changed, 170 insertions(+), 29 deletions(-) diff --git a/lambdas/mesh-acknowledge/requirements.txt b/lambdas/mesh-acknowledge/requirements.txt index 68ecfdcb..3e375cb8 100644 --- a/lambdas/mesh-acknowledge/requirements.txt +++ b/lambdas/mesh-acknowledge/requirements.txt @@ -3,6 +3,8 @@ boto3>=1.28.62 pyopenssl>=24.2.1 pydantic>=2.0.0 structlog>=21.5.0 +opentelemetry-api>=1.25.0 +opentelemetry-sdk>=1.25.0 -e ../../src/digital-letters-events -e ../../utils/py-mock-mesh -e ../../utils/py-utils diff --git a/lambdas/mesh-download/requirements.txt b/lambdas/mesh-download/requirements.txt index b9af3fe0..41615568 100644 --- a/lambdas/mesh-download/requirements.txt +++ b/lambdas/mesh-download/requirements.txt @@ -8,6 +8,8 @@ urllib3>=1.26.19,<2.0.0 idna>=3.7 requests>=2.32.0 pyopenssl>=24.2.1 +opentelemetry-api>=1.25.0 +opentelemetry-sdk>=1.25.0 -e ../../src/digital-letters-events -e ../../utils/py-mock-mesh -e ../../utils/py-utils diff --git a/lambdas/mesh-poll/requirements.txt b/lambdas/mesh-poll/requirements.txt index 5e7f1345..36ed181b 100644 --- a/lambdas/mesh-poll/requirements.txt +++ b/lambdas/mesh-poll/requirements.txt @@ -8,6 +8,8 @@ idna>=3.7 requests>=2.32.0 pyopenssl>=24.2.1 pydantic>=2.0.0 +opentelemetry-api>=1.25.0 +opentelemetry-sdk>=1.25.0 -e ../../src/digital-letters-events -e ../../utils/py-mock-mesh -e ../../utils/py-utils diff --git a/lambdas/report-sender/requirements.txt b/lambdas/report-sender/requirements.txt index 5e7f1345..36ed181b 100644 --- a/lambdas/report-sender/requirements.txt +++ b/lambdas/report-sender/requirements.txt @@ -8,6 +8,8 @@ idna>=3.7 requests>=2.32.0 pyopenssl>=24.2.1 pydantic>=2.0.0 +opentelemetry-api>=1.25.0 +opentelemetry-sdk>=1.25.0 -e ../../src/digital-letters-events -e ../../utils/py-mock-mesh -e ../../utils/py-utils diff --git a/package-lock.json b/package-lock.json index 177d60c7..83738854 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9424,6 +9424,102 @@ "dev": true, "license": "MIT" }, + "node_modules/@opentelemetry/api": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", + "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", + "license": "Apache-2.0", + "peer": true, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/@opentelemetry/context-async-hooks": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/context-async-hooks/-/context-async-hooks-2.6.0.tgz", + "integrity": "sha512-L8UyDwqpTcbkIK5cgwDRDYDoEhQoj8wp8BwsO19w3LB1Z41yEQm2VJyNfAi9DrLP/YTqXqWpKHyZfR9/tFYo1Q==", + "license": "Apache-2.0", + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/core": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-2.6.0.tgz", + "integrity": "sha512-HLM1v2cbZ4TgYN6KEOj+Bbj8rAKriOdkF9Ed3tG25FoprSiQl7kYc+RRT6fUZGOvx0oMi5U67GoFdT+XUn8zEg==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/resources": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-2.6.0.tgz", + "integrity": "sha512-D4y/+OGe3JSuYUCBxtH5T9DSAWNcvCb/nQWIga8HNtXTVPQn59j0nTBAgaAXxUVBDl40mG3Tc76b46wPlZaiJQ==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/core": "2.6.0", + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.3.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sdk-trace-base": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-2.6.0.tgz", + "integrity": "sha512-g/OZVkqlxllgFM7qMKqbPV9c1DUPhQ7d4n3pgZFcrnrNft9eJXZM2TNHTPYREJBrtNdRytYyvwjgL5geDKl3EQ==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/core": "2.6.0", + "@opentelemetry/resources": "2.6.0", + "@opentelemetry/semantic-conventions": "^1.29.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.3.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sdk-trace-node": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-node/-/sdk-trace-node-2.6.0.tgz", + "integrity": "sha512-YhswtasmsbIGEFvLGvR9p/y3PVRTfFf+mgY8van4Ygpnv4sA3vooAjvh+qAn9PNWxs4/IwGGqiQS0PPsaRJ0vQ==", + "license": "Apache-2.0", + "dependencies": { + "@opentelemetry/context-async-hooks": "2.6.0", + "@opentelemetry/core": "2.6.0", + "@opentelemetry/sdk-trace-base": "2.6.0" + }, + "engines": { + "node": "^18.19.0 || >=20.6.0" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/semantic-conventions": { + "version": "1.40.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/semantic-conventions/-/semantic-conventions-1.40.0.tgz", + "integrity": "sha512-cifvXDhcqMwwTlTK04GBNeIe7yyo28Mfby85QXFe1Yk8nmi36Ab/5UQwptOx84SsoGNRg+EVSjwzfSZMy6pmlw==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, "node_modules/@pdf-lib/standard-fonts": { "version": "1.0.0", "license": "MIT", @@ -22977,6 +23073,8 @@ "@aws-sdk/client-ssm": "^3.984.0", "@aws-sdk/lib-dynamodb": "^3.984.0", "@aws-sdk/lib-storage": "^3.984.0", + "@opentelemetry/api": "^1.9.0", + "@opentelemetry/sdk-trace-node": "^2.6.0", "async-mutex": "^0.4.0", "axios": "^1.13.5", "date-fns": "^4.1.0", diff --git a/utils/py-utils/dl_utils/trace_context.py b/utils/py-utils/dl_utils/trace_context.py index 2ee956f2..6d92b880 100644 --- a/utils/py-utils/dl_utils/trace_context.py +++ b/utils/py-utils/dl_utils/trace_context.py @@ -1,23 +1,38 @@ -"""W3C TraceContext helpers for Digital Letters +"""W3C TraceContext helpers for Digital Letters using OpenTelemetry. -Format: 00--- +Uses the OpenTelemetry API for context propagation. This means: +- Trace IDs and span IDs are generated by the OTel SDK +- traceparent strings are standard W3C format: 00--- +- When an ADOT exporter is later configured, these traces will automatically flow into X-Ray / CloudWatch Application Signals with no code changes needed """ -import re -import secrets +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator -_TRACEPARENT_RE = re.compile(r'^00-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$') +trace.set_tracer_provider(TracerProvider()) +_propagator = TraceContextTextMapPropagator() +_tracer = trace.get_tracer(__name__) def create_traceparent() -> str: - """Return a new root W3C traceparent (sampled).""" - return f'00-{secrets.token_hex(16)}-{secrets.token_hex(8)}-01' + """Create a new root W3C traceparent via the OTel tracer.""" + span = _tracer.start_span("root") + ctx = span.get_span_context() + span.end() + trace_id = format(ctx.trace_id, '032x') + span_id = format(ctx.span_id, '016x') + return f'00-{trace_id}-{span_id}-01' def derive_child_traceparent(incoming: str) -> str: - """Return a child traceparent that shares the incoming trace-id.""" - match = _TRACEPARENT_RE.match(incoming) - if not match: - raise ValueError(f'Invalid traceparent: {incoming!r}') - trace_id, flags = match.group(1), match.group(3) - return f'00-{trace_id}-{secrets.token_hex(8)}-{flags}' + """Return a child traceparent that shares the incoming trace-id, via OTel context.""" + carrier = {'traceparent': incoming} + parent_ctx = _propagator.extract(carrier=carrier) + span = _tracer.start_span("child", context=parent_ctx) + ctx = span.get_span_context() + span.end() + trace_id = format(ctx.trace_id, '032x') + span_id = format(ctx.span_id, '016x') + flags = format(ctx.trace_flags, '02x') + return f'00-{trace_id}-{span_id}-{flags}' diff --git a/utils/py-utils/requirements.txt b/utils/py-utils/requirements.txt index 3ae7ecbd..4824dadb 100644 --- a/utils/py-utils/requirements.txt +++ b/utils/py-utils/requirements.txt @@ -3,4 +3,6 @@ pydantic>=2.0.0 structlog>=21.5.0 mesh-client>=3.2.3 pyopenssl>=24.0.0 +opentelemetry-api>=1.25.0 +opentelemetry-sdk>=1.25.0 -e ../py-mock-mesh diff --git a/utils/utils/package.json b/utils/utils/package.json index ba8dfb8a..723dd324 100644 --- a/utils/utils/package.json +++ b/utils/utils/package.json @@ -9,6 +9,8 @@ "@aws-sdk/client-ssm": "^3.984.0", "@aws-sdk/lib-dynamodb": "^3.984.0", "@aws-sdk/lib-storage": "^3.984.0", + "@opentelemetry/api": "^1.9.0", + "@opentelemetry/sdk-trace-node": "^2.6.0", "async-mutex": "^0.4.0", "axios": "^1.13.5", "date-fns": "^4.1.0", diff --git a/utils/utils/src/trace-context/trace-context.ts b/utils/utils/src/trace-context/trace-context.ts index ea40ccbd..dd1fb385 100644 --- a/utils/utils/src/trace-context/trace-context.ts +++ b/utils/utils/src/trace-context/trace-context.ts @@ -1,28 +1,44 @@ /** - * W3C TraceContext helpers for Digital Letters + * W3C TraceContext helpers for Digital Letters using OpenTelemetry. * - * Format: 00--- + * Uses the OpenTelemetry API for context propagation. This means: + * - Trace IDs and span IDs are generated by the OTel SDK + * - traceparent strings are standard W3C format: 00--- + * - When an ADOT exporter is later configured, these traces will automatically + * flow into X-Ray / CloudWatch Application Signals with no code changes needed here */ -import { randomBytes } from 'node:crypto'; +import { + context, + propagation, + trace, + TraceFlags, + ROOT_CONTEXT, +} from '@opentelemetry/api'; +import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node'; -const TRACEPARENT_REGEX = /^00-([0-9a-f]{32})-([0-9a-f]{16})-([0-9a-f]{2})$/; +const provider = new NodeTracerProvider(); +provider.register(); -function randomHex(bytes: number): string { - return randomBytes(bytes).toString('hex'); -} +const tracer = trace.getTracer('dl-trace-context'); -/** Create a new root W3C traceparent */ +/** Create a new root W3C traceparent via the OTel tracer */ export function createTraceparent(): string { - return `00-${randomHex(16)}-${randomHex(8)}-01`; + const span = tracer.startSpan('root', undefined, ROOT_CONTEXT); + const ctx = span.spanContext(); + span.end(); + const traceId = ctx.traceId; + const spanId = ctx.spanId; + return `00-${traceId}-${spanId}-01`; // Formats into W3C traceparent string } -/** Return a child traceparent that shares the incoming trace-id */ +/** Return a child traceparent that shares the incoming trace-id, via OTel context */ export function deriveChildTraceparent(incoming: string): string { - const match = TRACEPARENT_REGEX.exec(incoming); - if (!match) { - throw new Error(`Invalid traceparent: "${incoming}"`); - } - const [, traceId, , flags] = match; - return `00-${traceId}-${randomHex(8)}-${flags}`; + const carrier: Record = { traceparent: incoming }; + const parentCtx = propagation.extract(ROOT_CONTEXT, carrier); + const span = tracer.startSpan('child', undefined, parentCtx); + const ctx = span.spanContext(); + span.end(); + const flags = (ctx.traceFlags & TraceFlags.SAMPLED) ? '01' : '00'; + return `00-${ctx.traceId}-${ctx.spanId}-${flags}`; }