From 04f3e715fb6e1c1bb9d3f19480296859ac195cfa Mon Sep 17 00:00:00 2001 From: Jack Misner Date: Fri, 13 Feb 2026 19:07:56 +0000 Subject: [PATCH 1/2] feat: add value sanitization for UTM parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a sanitization module that strips dangerous characters (HTML tags, control chars, custom patterns) from UTM parameter values at capture time to prevent XSS when values are rendered in HTML or used in URLs. - New SanitizeConfig type with enabled, stripHtml, stripControlChars, maxLength, and optional customPattern fields - sanitizeValue() and sanitizeParams() in src/core/sanitizer.ts - Integration into captureUtmParameters() pipeline (extract → filter → sanitize → convert key format) - Config system support: DEFAULT_SANITIZE_CONFIG, merge, validation - React hook forwards sanitize config to capture - Disabled by default with safe defaults when enabled - 266 tests passing (45 new) 🤖 Generated with [Nori](https://nori.ai) Co-Authored-By: Nori --- .gitignore | 1 + README.md | 44 ++++++ __tests__/config/loader.test.ts | 96 ++++++++++++ __tests__/core/capture.test.ts | 47 ++++++ __tests__/core/sanitizer.test.ts | 188 ++++++++++++++++++++++++ __tests__/docs.md | 4 +- __tests__/react/useUtmTracking.test.tsx | 25 ++++ src/config/defaults.ts | 17 ++- src/config/docs.md | 5 +- src/config/index.ts | 7 +- src/config/loader.ts | 55 ++++++- src/core/capture.ts | 19 ++- src/core/docs.md | 7 +- src/core/index.ts | 3 + src/core/sanitizer.ts | 67 +++++++++ src/docs.md | 4 +- src/index.ts | 6 + src/react/useUtmTracking.ts | 1 + src/types/docs.md | 2 + src/types/index.ts | 24 +++ vitest.config.ts | 4 +- 21 files changed, 609 insertions(+), 17 deletions(-) create mode 100644 __tests__/core/sanitizer.test.ts create mode 100644 src/core/sanitizer.ts diff --git a/.gitignore b/.gitignore index c05ae6a..e2a2d4b 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ dist/ .vscode/ *.swp *.swo +temp/* # OS .DS_Store diff --git a/README.md b/README.md index 700bec2..00443bb 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ A comprehensive TypeScript library for capturing, storing, and appending UTM tra ## Features - **Capture** UTM parameters from URLs +- **Sanitize** parameter values to prevent XSS and injection - **Store** in sessionStorage for the browser session - **Append** UTM parameters to share URLs - **Configurable** key format (snake_case or camelCase) @@ -110,6 +111,16 @@ const params = captureUtmParameters(url, { keyFormat: 'camelCase', // 'snake_case' (default) or 'camelCase' allowedParameters: ['utm_source', 'utm_campaign'], // Filter to specific params }); + +// With sanitization (strips HTML, control chars) +const params = captureUtmParameters(url, { + sanitize: { + enabled: true, + stripHtml: true, // Remove < > " ' ` (default: true) + stripControlChars: true, // Remove control characters (default: true) + maxLength: 200, // Truncate values (default: 200) + }, +}); ``` #### `storeUtmParameters(params, options?)` @@ -206,6 +217,37 @@ validateAndNormalize('example.com'); // { valid: true, normalizedUrl: 'https://example.com' } ``` +### Value Sanitization + +Sanitize UTM parameter values to prevent XSS when rendering in HTML or constructing URLs. Sanitization is disabled by default and runs at capture time only. + +```typescript +import { captureUtmParameters, sanitizeValue, sanitizeParams } from '@jackmisner/utm-toolkit'; + +// Enable sanitization during capture +const params = captureUtmParameters('https://example.com?utm_source=', { + sanitize: { enabled: true }, +}); +// { utm_source: 'scriptbad/script' } + +// Use standalone sanitization functions +sanitizeValue('bold', { + enabled: true, + stripHtml: true, + stripControlChars: true, + maxLength: 200, +}); +// 'bbold/b' + +// With a custom pattern +const params = captureUtmParameters(url, { + sanitize: { + enabled: true, + customPattern: /[!@#$%^&*]/g, // Strip additional characters + }, +}); +``` + ### Configuration ```typescript @@ -289,6 +331,7 @@ installDebugHelpers(); | `defaultParams` | `object` | `{}` | Fallback params when none captured | | `shareContextParams` | `object` | `{}` | Platform-specific params | | `excludeFromShares` | `string[]` | `[]` | Params to exclude from shares | +| `sanitize` | `SanitizeConfig` | `{ enabled: false }` | Value sanitization settings | ## TypeScript Types @@ -296,6 +339,7 @@ installDebugHelpers(); import type { UtmParameters, UtmConfig, + SanitizeConfig, SharePlatform, UseUtmTrackingReturn, } from '@jackmisner/utm-toolkit'; diff --git a/__tests__/config/loader.test.ts b/__tests__/config/loader.test.ts index c4907f6..743edac 100644 --- a/__tests__/config/loader.test.ts +++ b/__tests__/config/loader.test.ts @@ -238,6 +238,102 @@ describe('validateConfig', () => { }) }) +describe('sanitize config', () => { + it('createConfig includes sanitize defaults when not provided', () => { + const config = createConfig() + expect(config.sanitize).toEqual({ + enabled: false, + stripHtml: true, + stripControlChars: true, + maxLength: 200, + }) + }) + + it('createConfig merges partial sanitize config with defaults', () => { + const config = createConfig({ + sanitize: { enabled: true, maxLength: 100 }, + }) + expect(config.sanitize.enabled).toBe(true) + expect(config.sanitize.maxLength).toBe(100) + expect(config.sanitize.stripHtml).toBe(true) // default preserved + expect(config.sanitize.stripControlChars).toBe(true) // default preserved + }) + + it('createConfig preserves customPattern when provided', () => { + const pattern = /[!@#]/g + const config = createConfig({ + sanitize: { enabled: true, customPattern: pattern }, + }) + expect(config.sanitize.customPattern).toBe(pattern) + }) + + it('mergeConfig merges sanitize overrides', () => { + const base = getDefaultConfig() + const merged = mergeConfig(base, { + sanitize: { enabled: true }, + }) + expect(merged.sanitize.enabled).toBe(true) + expect(merged.sanitize.stripHtml).toBe(true) // preserved from base + }) + + it('validateConfig validates sanitize.enabled is boolean', () => { + const errors = validateConfig({ sanitize: { enabled: 'yes' } }) + expect(errors).toContain('sanitize.enabled must be a boolean') + }) + + it('validateConfig validates sanitize.stripHtml is boolean', () => { + const errors = validateConfig({ sanitize: { stripHtml: 123 } }) + expect(errors).toContain('sanitize.stripHtml must be a boolean') + }) + + it('validateConfig validates sanitize.stripControlChars is boolean', () => { + const errors = validateConfig({ sanitize: { stripControlChars: null } }) + expect(errors).toContain('sanitize.stripControlChars must be a boolean') + }) + + it('validateConfig validates sanitize.maxLength is number', () => { + const errors = validateConfig({ sanitize: { maxLength: 'big' } }) + expect(errors).toContain('sanitize.maxLength must be a positive finite number') + }) + + it('validateConfig validates sanitize.maxLength is positive', () => { + const errors = validateConfig({ sanitize: { maxLength: -1 } }) + expect(errors).toContain('sanitize.maxLength must be a positive finite number') + }) + + it('validateConfig rejects NaN as sanitize.maxLength', () => { + const errors = validateConfig({ sanitize: { maxLength: NaN } }) + expect(errors).toContain('sanitize.maxLength must be a positive finite number') + }) + + it('validateConfig rejects Infinity as sanitize.maxLength', () => { + const errors = validateConfig({ sanitize: { maxLength: Infinity } }) + expect(errors).toContain('sanitize.maxLength must be a positive finite number') + }) + + it('validateConfig validates sanitize.customPattern is a RegExp', () => { + const errors = validateConfig({ sanitize: { customPattern: 'not a regex' } }) + expect(errors).toContain('sanitize.customPattern must be a RegExp') + }) + + it('validateConfig accepts valid customPattern RegExp', () => { + const errors = validateConfig({ sanitize: { customPattern: /[!@#]/g } }) + expect(errors).toEqual([]) + }) + + it('validateConfig validates sanitize is an object', () => { + const errors = validateConfig({ sanitize: 'not an object' }) + expect(errors).toContain('sanitize must be an object') + }) + + it('validateConfig accepts valid sanitize config', () => { + const errors = validateConfig({ + sanitize: { enabled: true, stripHtml: true, stripControlChars: true, maxLength: 100 }, + }) + expect(errors).toEqual([]) + }) +}) + describe('getDefaultConfig', () => { it('returns a copy of default config', () => { const config1 = getDefaultConfig() diff --git a/__tests__/core/capture.test.ts b/__tests__/core/capture.test.ts index e228814..e7ef619 100644 --- a/__tests__/core/capture.test.ts +++ b/__tests__/core/capture.test.ts @@ -183,6 +183,53 @@ describe('hasUtmParameters', () => { }) }) +describe('sanitization integration', () => { + const sanitizeConfig = { + enabled: true, + stripHtml: true, + stripControlChars: true, + maxLength: 200, + } + + it('sanitizes values when sanitize config is enabled', () => { + const result = captureUtmParameters( + 'https://example.com?utm_source=&utm_medium=email', + { sanitize: sanitizeConfig }, + ) + expect(result.utm_source).toBe('scriptbad/script') + expect(result.utm_medium).toBe('email') + }) + + it('does not sanitize when sanitize is not provided', () => { + const result = captureUtmParameters('https://example.com?utm_source=') + expect(result.utm_source).toBe('') + }) + + it('does not sanitize when sanitize.enabled is false', () => { + const result = captureUtmParameters('https://example.com?utm_source=', { + sanitize: { ...sanitizeConfig, enabled: false }, + }) + expect(result.utm_source).toBe('') + }) + + it('sanitizes with camelCase key format', () => { + const result = captureUtmParameters('https://example.com?utm_source=bold', { + keyFormat: 'camelCase', + sanitize: sanitizeConfig, + }) + expect(result.utmSource).toBe('bbold/b') + }) + + it('sanitizes after allowed parameter filtering', () => { + const result = captureUtmParameters( + 'https://example.com?utm_source=bold&utm_campaign=test', + { allowedParameters: ['utm_source'], sanitize: sanitizeConfig }, + ) + expect(result.utm_source).toBe('bbold/b') + expect(result).not.toHaveProperty('utm_campaign') + }) +}) + describe('captureFromCurrentUrl', () => { beforeEach(() => { vi.stubGlobal('location', { diff --git a/__tests__/core/sanitizer.test.ts b/__tests__/core/sanitizer.test.ts new file mode 100644 index 0000000..0f1a408 --- /dev/null +++ b/__tests__/core/sanitizer.test.ts @@ -0,0 +1,188 @@ +import { describe, it, expect } from 'vitest' +import { sanitizeValue, sanitizeParams } from '../../src/core/sanitizer' +import type { SanitizeConfig } from '../../src/types' + +const defaultConfig: SanitizeConfig = { + enabled: true, + stripHtml: true, + stripControlChars: true, + maxLength: 200, +} + +describe('sanitizeValue', () => { + describe('stripHtml', () => { + it('strips HTML-significant characters', () => { + const result = sanitizeValue('', defaultConfig) + expect(result).toBe('scriptalert(xss)/script') + }) + + it('strips backticks', () => { + const result = sanitizeValue('value`with`backticks', defaultConfig) + expect(result).toBe('valuewithbackticks') + }) + + it('strips single quotes', () => { + const result = sanitizeValue("it's a test", defaultConfig) + expect(result).toBe('its a test') + }) + + it('does not strip HTML characters when stripHtml is false', () => { + const config: SanitizeConfig = { ...defaultConfig, stripHtml: false } + const result = sanitizeValue('bold', config) + expect(result).toBe('bold') + }) + }) + + describe('stripControlChars', () => { + it('strips null bytes and control characters', () => { + const result = sanitizeValue('hello\x00world\x01test', defaultConfig) + expect(result).toBe('helloworldtest') + }) + + it('preserves tab, newline, and carriage return in middle of string', () => { + const result = sanitizeValue('hello\tworld\ntest\rend', defaultConfig) + expect(result).toBe('hello\tworld\ntest\rend') + }) + + it('does not strip control characters when stripControlChars is false', () => { + const config: SanitizeConfig = { ...defaultConfig, stripControlChars: false } + const result = sanitizeValue('hello\x00world', config) + expect(result).toBe('hello\x00world') + }) + }) + + describe('maxLength', () => { + it('truncates values exceeding maxLength', () => { + const config: SanitizeConfig = { ...defaultConfig, maxLength: 10 } + const result = sanitizeValue('this is a long value', config) + expect(result).toBe('this is a ') + }) + + it('does not truncate values within maxLength', () => { + const config: SanitizeConfig = { ...defaultConfig, maxLength: 50 } + const result = sanitizeValue('short', config) + expect(result).toBe('short') + }) + + it('truncates after stripping (not before)', () => { + const config: SanitizeConfig = { ...defaultConfig, maxLength: 5 } + // After stripping < and >, "ab" remains, which is within limit + const result = sanitizeValue('<>ab', config) + expect(result).toBe('ab') + }) + }) + + describe('customPattern', () => { + it('strips characters matching custom pattern', () => { + const config: SanitizeConfig = { + ...defaultConfig, + customPattern: /[!@#$%^&*]/g, + } + const result = sanitizeValue('hello!@#world', config) + expect(result).toBe('helloworld') + }) + + it('applies custom pattern after built-in stripping', () => { + const config: SanitizeConfig = { + ...defaultConfig, + customPattern: /\d+/g, + } + const result = sanitizeValue('abc123', config) + expect(result).toBe('tagabc/tag') + }) + }) + + describe('whitespace trimming', () => { + it('trims leading and trailing whitespace', () => { + const result = sanitizeValue(' hello world ', defaultConfig) + expect(result).toBe('hello world') + }) + + it('trims whitespace left after stripping', () => { + const result = sanitizeValue(' <> ', defaultConfig) + expect(result).toBe('') + }) + }) + + describe('edge cases', () => { + it('returns empty string when everything is stripped', () => { + const result = sanitizeValue('<>"\'`', defaultConfig) + expect(result).toBe('') + }) + + it('returns empty string for empty input', () => { + const result = sanitizeValue('', defaultConfig) + expect(result).toBe('') + }) + + it('is idempotent', () => { + const input = '' + const once = sanitizeValue(input, defaultConfig) + const twice = sanitizeValue(once, defaultConfig) + expect(once).toBe(twice) + }) + + it('handles normal UTM values without modification', () => { + const result = sanitizeValue('linkedin', defaultConfig) + expect(result).toBe('linkedin') + }) + + it('handles typical campaign names', () => { + const result = sanitizeValue('spring-2025_campaign', defaultConfig) + expect(result).toBe('spring-2025_campaign') + }) + }) +}) + +describe('sanitizeParams', () => { + it('sanitizes all values in a params object', () => { + const params = { + utm_source: '', + utm_medium: 'email', + } + const result = sanitizeParams(params, defaultConfig) + expect(result).toEqual({ + utm_source: 'scriptbad/script', + utm_medium: 'email', + }) + }) + + it('preserves keys unchanged', () => { + const params = { + utm_source: 'test<>value', + utm_campaign: 'normal', + } + const result = sanitizeParams(params, defaultConfig) + expect(result).toHaveProperty('utm_source') + expect(result).toHaveProperty('utm_campaign') + }) + + it('skips undefined values', () => { + const params = { + utm_source: 'linkedin', + utm_medium: undefined, + } + const result = sanitizeParams(params, defaultConfig) + expect(result).toEqual({ + utm_source: 'linkedin', + utm_medium: undefined, + }) + }) + + it('returns empty object for empty input', () => { + const result = sanitizeParams({}, defaultConfig) + expect(result).toEqual({}) + }) + + it('works with camelCase keys', () => { + const params = { + utmSource: 'bold', + utmMedium: 'cpc', + } + const result = sanitizeParams(params, defaultConfig) + expect(result).toEqual({ + utmSource: 'bbold/b', + utmMedium: 'cpc', + }) + }) +}) diff --git a/__tests__/docs.md b/__tests__/docs.md index 7d7258f..9758471 100644 --- a/__tests__/docs.md +++ b/__tests__/docs.md @@ -18,8 +18,8 @@ Path: @/__tests__ ### Core Implementation - **`setup.ts`**: Creates a fresh sessionStorage mock and location mock in `beforeEach`, ensuring tests are isolated. The storage mock implements `getItem`, `setItem`, `removeItem`, `clear`, `length`, and `key`. Location is stubbed with `href`, `search`, `hash`, `pathname`, `protocol`, `host`, and `hostname`. -- **`core/` tests**: Cover capture (URL parsing, allowed parameters, key format conversion, SSR fallback), storage (write/read/clear, format conversion, validation of stored data, silent failure), appender (query/fragment placement, preserveExisting, remove, extract), keys (bidirectional conversion, standard and custom keys, detection, validation), and validator (protocol, domain, normalization, mutable default protocol). -- **`config/` tests**: Cover `createConfig` merging semantics (nullish coalescing, array replacement, object merge), `validateConfig` error messages, and `loadConfigFromJson` fallback behavior. +- **`core/` tests**: Cover capture (URL parsing, allowed parameters, key format conversion, SSR fallback, sanitization integration), sanitizer (HTML stripping, control character removal, custom patterns, truncation, combined rules), storage (write/read/clear, format conversion, validation of stored data, silent failure), appender (query/fragment placement, preserveExisting, remove, extract), keys (bidirectional conversion, standard and custom keys, detection, validation), and validator (protocol, domain, normalization, mutable default protocol). +- **`config/` tests**: Cover `createConfig` merging semantics (nullish coalescing, array replacement, object merge), `validateConfig` error messages, `loadConfigFromJson` fallback behavior, and sanitize config handling (default inclusion, partial merge, custom pattern preservation, validation of each sanitize field). - **`react/` tests**: Use `@testing-library/react` `renderHook` and `render` to test `useUtmTracking` (auto-capture, manual capture, clear, appendToUrl with share context and exclusions) and `UtmProvider`/`useUtmContext` (context propagation, error on missing provider). ### Things to Know diff --git a/__tests__/react/useUtmTracking.test.tsx b/__tests__/react/useUtmTracking.test.tsx index ab4229b..9ca004a 100644 --- a/__tests__/react/useUtmTracking.test.tsx +++ b/__tests__/react/useUtmTracking.test.tsx @@ -302,6 +302,31 @@ describe('useUtmTracking', () => { }) }) + describe('sanitization', () => { + it('sanitizes captured values when sanitize is enabled', () => { + vi.stubGlobal('location', { + href: 'https://example.com?utm_source=&utm_medium=email', + search: '?utm_source=&utm_medium=email', + }) + + const { result } = renderHook(() => + useUtmTracking({ + config: { + captureOnMount: false, + sanitize: { enabled: true }, + }, + }), + ) + + act(() => { + result.current.capture() + }) + + expect(result.current.utmParameters?.utm_source).toBe('scriptbad/script') + expect(result.current.utmParameters?.utm_medium).toBe('email') + }) + }) + describe('key format', () => { it('uses snake_case by default', () => { vi.stubGlobal('location', { diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 76875fc..bfea562 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -4,7 +4,18 @@ * Provides sensible defaults for UTM toolkit configuration. */ -import type { ResolvedUtmConfig } from '../types' +import type { ResolvedUtmConfig, SanitizeConfig } from '../types' + +/** + * Default sanitization configuration + * Sanitization is disabled by default but has safe defaults when enabled + */ +export const DEFAULT_SANITIZE_CONFIG: SanitizeConfig = { + enabled: false, + stripHtml: true, + stripControlChars: true, + maxLength: 200, +} /** * Standard UTM parameters (snake_case format for URLs) @@ -49,6 +60,9 @@ export const DEFAULT_CONFIG: ResolvedUtmConfig = { /** No parameters excluded from shares by default */ excludeFromShares: [], + + /** Sanitization disabled by default */ + sanitize: { ...DEFAULT_SANITIZE_CONFIG }, } /** @@ -62,5 +76,6 @@ export function getDefaultConfig(): ResolvedUtmConfig { defaultParams: { ...DEFAULT_CONFIG.defaultParams }, shareContextParams: { ...DEFAULT_CONFIG.shareContextParams }, excludeFromShares: [...DEFAULT_CONFIG.excludeFromShares], + sanitize: { ...DEFAULT_CONFIG.sanitize }, } } diff --git a/src/config/docs.md b/src/config/docs.md index 4ce82d9..392aa55 100644 --- a/src/config/docs.md +++ b/src/config/docs.md @@ -13,14 +13,15 @@ Path: @/src/config - `createConfig()` is the primary entry point, called by `useUtmTracking` in `@/src/react` to resolve user-provided partial config into a complete `ResolvedUtmConfig`. - `@/src/debug` imports `getDefaultConfig()` from here as a fallback when no config is provided to diagnostic functions. - `DEFAULT_CONFIG` and `STANDARD_UTM_PARAMETERS` are the canonical definitions of default behavior (enabled, snake_case, sessionStorage key `utm_parameters`, auto-capture on mount, append to shares, the 6 standard UTM params). +- `DEFAULT_SANITIZE_CONFIG` defines the sanitization defaults: disabled by default, but with safe-by-default values when enabled (`stripHtml: true`, `stripControlChars: true`, `maxLength: 200`). It is exported as a public constant and spread into `DEFAULT_CONFIG.sanitize`. - The config system does not perform side effects -- it is pure data transformation. ### Core Implementation -- `createConfig()` merges a partial user config with defaults using nullish coalescing (`??`) for scalar fields. Array fields (`allowedParameters`, `excludeFromShares`) are replaced wholesale when provided by the user, not merged. Object fields (`defaultParams`, `shareContextParams`) are shallow-merged. +- `createConfig()` merges a partial user config with defaults using nullish coalescing (`??`) for scalar fields. Array fields (`allowedParameters`, `excludeFromShares`) are replaced wholesale when provided by the user, not merged. Object fields (`defaultParams`, `shareContextParams`) are shallow-merged. The `sanitize` field is merged via `mergeSanitizeConfig()`, which uses nullish coalescing per-field so partial overrides preserve unspecified defaults. - `mergeConfig()` follows the same semantics but takes a `ResolvedUtmConfig` as the base instead of implicitly using defaults -- useful for layering configurations. - `loadConfigFromJson()` accepts `unknown` input, validates it is a non-null non-array object, then delegates to `createConfig()`. Invalid input falls back to defaults with a `console.warn`. -- `validateConfig()` performs runtime type checking on each config field and returns an array of error message strings (empty array means valid). +- `validateConfig()` performs runtime type checking on each config field and returns an array of error message strings (empty array means valid). Sanitize validation checks that `sanitize` is an object, `enabled`/`stripHtml`/`stripControlChars` are booleans, and `maxLength` is a positive number. - `getDefaultConfig()` returns a shallow copy of `DEFAULT_CONFIG` with cloned arrays and objects to prevent mutation of the shared constant. ### Things to Know diff --git a/src/config/index.ts b/src/config/index.ts index c636fa1..c3fea51 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -2,6 +2,11 @@ * Configuration exports */ -export { DEFAULT_CONFIG, STANDARD_UTM_PARAMETERS, getDefaultConfig } from './defaults' +export { + DEFAULT_CONFIG, + DEFAULT_SANITIZE_CONFIG, + STANDARD_UTM_PARAMETERS, + getDefaultConfig, +} from './defaults' export { createConfig, mergeConfig, loadConfigFromJson, validateConfig } from './loader' diff --git a/src/config/loader.ts b/src/config/loader.ts index e4b6c3f..575c2b9 100644 --- a/src/config/loader.ts +++ b/src/config/loader.ts @@ -4,7 +4,13 @@ * Provides utilities for loading and merging UTM toolkit configuration. */ -import type { UtmConfig, ResolvedUtmConfig, ShareContextParams, UtmParameters } from '../types' +import type { + UtmConfig, + ResolvedUtmConfig, + SanitizeConfig, + ShareContextParams, + UtmParameters, +} from '../types' import { DEFAULT_CONFIG, getDefaultConfig } from './defaults' /** @@ -33,6 +39,25 @@ function mergeShareContextParams( return result } +/** + * Merge sanitize config with defaults + */ +function mergeSanitizeConfig( + base: SanitizeConfig, + override: Partial | undefined, +): SanitizeConfig { + if (!override) { + return { ...base } + } + return { + enabled: override.enabled ?? base.enabled, + stripHtml: override.stripHtml ?? base.stripHtml, + stripControlChars: override.stripControlChars ?? base.stripControlChars, + maxLength: override.maxLength ?? base.maxLength, + customPattern: override.customPattern ?? base.customPattern, + } +} + /** * Merge two UTM parameter objects */ @@ -82,6 +107,7 @@ export function createConfig(userConfig?: Partial): ResolvedUtmConfig excludeFromShares: userConfig.excludeFromShares ? [...userConfig.excludeFromShares] : defaults.excludeFromShares, + sanitize: mergeSanitizeConfig(defaults.sanitize, userConfig.sanitize), } } @@ -113,6 +139,7 @@ export function mergeConfig( excludeFromShares: override.excludeFromShares ? [...override.excludeFromShares] : [...base.excludeFromShares], + sanitize: mergeSanitizeConfig(base.sanitize, override.sanitize), } } @@ -218,6 +245,32 @@ export function validateConfig(config: unknown): string[] { errors.push('shareContextParams must be an object') } + if (c.sanitize !== undefined) { + if (typeof c.sanitize !== 'object' || c.sanitize === null || Array.isArray(c.sanitize)) { + errors.push('sanitize must be an object') + } else { + const s = c.sanitize as Record + if (s.enabled !== undefined && typeof s.enabled !== 'boolean') { + errors.push('sanitize.enabled must be a boolean') + } + if (s.stripHtml !== undefined && typeof s.stripHtml !== 'boolean') { + errors.push('sanitize.stripHtml must be a boolean') + } + if (s.stripControlChars !== undefined && typeof s.stripControlChars !== 'boolean') { + errors.push('sanitize.stripControlChars must be a boolean') + } + if ( + s.maxLength !== undefined && + (typeof s.maxLength !== 'number' || !Number.isFinite(s.maxLength) || s.maxLength <= 0) + ) { + errors.push('sanitize.maxLength must be a positive finite number') + } + if (s.customPattern !== undefined && !(s.customPattern instanceof RegExp)) { + errors.push('sanitize.customPattern must be a RegExp') + } + } + } + return errors } diff --git a/src/core/capture.ts b/src/core/capture.ts index be3eec5..fa1877a 100644 --- a/src/core/capture.ts +++ b/src/core/capture.ts @@ -5,8 +5,10 @@ * Supports standard UTM parameters and custom utm_ prefixed parameters. */ -import type { KeyFormat, UtmParameters } from '../types' +import type { KeyFormat, SanitizeConfig, UtmParameters } from '../types' +import { DEFAULT_SANITIZE_CONFIG } from '../config/defaults' import { convertParams, isSnakeCaseUtmKey } from './keys' +import { sanitizeParams } from './sanitizer' /** * Options for capturing UTM parameters @@ -17,6 +19,9 @@ export interface CaptureOptions { /** Allowlist of parameters to capture (snake_case format, e.g., ['utm_source', 'utm_campaign']) */ allowedParameters?: string[] + + /** Sanitization configuration — when enabled, strips dangerous characters from values */ + sanitize?: Partial } /** @@ -59,7 +64,7 @@ function isBrowser(): boolean { * ``` */ export function captureUtmParameters(url?: string, options: CaptureOptions = {}): UtmParameters { - const { keyFormat = 'snake_case', allowedParameters } = options + const { keyFormat = 'snake_case', allowedParameters, sanitize } = options // Get URL, defaulting to current page URL in browser const urlString = url ?? (isBrowser() ? window.location.href : '') @@ -89,12 +94,18 @@ export function captureUtmParameters(url?: string, options: CaptureOptions = {}) } } + // Apply sanitization if configured and enabled + const resolvedSanitize: SanitizeConfig = { ...DEFAULT_SANITIZE_CONFIG, ...sanitize } + const captured: UtmParameters = resolvedSanitize.enabled + ? sanitizeParams(params as UtmParameters, resolvedSanitize) + : (params as UtmParameters) + // Convert to target format if needed if (keyFormat === 'camelCase') { - return convertParams(params as UtmParameters, 'camelCase') + return convertParams(captured, 'camelCase') } - return params as UtmParameters + return captured } catch (error) { // If URL parsing fails, return empty object // This ensures the function is robust and doesn't break the app diff --git a/src/core/docs.md b/src/core/docs.md index 91f9855..f7829cc 100644 --- a/src/core/docs.md +++ b/src/core/docs.md @@ -23,7 +23,7 @@ The data flow through the core modules follows this path: URL string | v -[capture.ts] -- parses URL, filters to utm_* keys, applies allowedParameters, converts key format +[capture.ts] -- parses URL, filters to utm_* keys, applies allowedParameters, sanitizes values, converts key format | v UtmParameters object @@ -40,7 +40,9 @@ URL string with UTM params - **keys.ts**: Bidirectional key conversion between `snake_case` and `camelCase`. Uses lookup tables (`SNAKE_TO_CAMEL`, `CAMEL_TO_SNAKE`) for the 6 standard keys and regex-based conversion for custom keys. `isSnakeCaseUtmKey` checks for `utm_` prefix; `isCamelCaseUtmKey` checks for `utm` followed by an uppercase letter. `detectKeyFormat` scans keys and returns the first format found, defaulting to `snake_case` for empty objects. -- **capture.ts**: `captureUtmParameters()` takes a URL string (defaulting to `window.location.href`), parses it via `new URL()`, iterates `searchParams`, and filters to keys passing `isSnakeCaseUtmKey`. Optionally filters by an `allowedParameters` set and converts output via `convertParams`. +- **capture.ts**: `captureUtmParameters()` takes a URL string (defaulting to `window.location.href`), parses it via `new URL()`, iterates `searchParams`, and filters to keys passing `isSnakeCaseUtmKey`. Optionally filters by an `allowedParameters` set, applies value sanitization when `sanitize.enabled` is true, then converts output via `convertParams`. The pipeline order is: extract params --> filter by allowlist --> sanitize --> convert key format. + +- **sanitizer.ts**: `sanitizeValue()` strips dangerous characters from a single string value. Rules apply in order: HTML-significant characters (`< > " ' \``) --> control characters (\x00-\x1F except tab/newline/CR) --> optional custom regex pattern --> trim --> truncate to `maxLength`. `sanitizeParams()` applies `sanitizeValue()` to every non-undefined value in a `UtmParameters` object, returning a new object with keys preserved unchanged. Both functions are pure and stateless; all behavior is driven by the `SanitizeConfig` argument. - **storage.ts**: Uses sessionStorage with a configurable key (default: `utm_parameters`). Write operations skip empty param objects and fail silently with `console.warn`. Read operations validate parsed JSON with `isValidStoredData()`, which checks that all keys pass `isUtmKey` and all values are strings or undefined. @@ -55,5 +57,6 @@ URL string with UTM params - **Silent failure**: Storage and capture operations never throw. Errors produce `console.warn` messages and return fallback values. The appender returns the original URL unchanged on failure. - **validator.ts mutable state**: `defaultProtocol` is module-level mutable state modified via `setDefaultProtocol()`. This is global -- all callers share the same default protocol. Tests that call `setDefaultProtocol()` should restore the original value. - **Fragment parameter handling in appender**: When appending to query, conflicting UTM params are removed from the fragment. When appending to fragment, conflicting UTM params are removed from the query. Only fragments that contain `=` are treated as parameter-bearing; plain anchors like `#section` are left alone. +- **Sanitization is capture-time only**: Sanitization runs during `captureUtmParameters()` before values enter the system. It does not run at storage time, append time, or on read. This means values stored in sessionStorage are already sanitized if sanitization was enabled at capture. Created and maintained by Nori. diff --git a/src/core/index.ts b/src/core/index.ts index 6a54c78..67d7114 100644 --- a/src/core/index.ts +++ b/src/core/index.ts @@ -48,6 +48,9 @@ export { STANDARD_CAMEL_KEYS, } from './keys' +// Sanitizer utilities +export { sanitizeValue, sanitizeParams } from './sanitizer' + // Validator utilities export { validateUrl, diff --git a/src/core/sanitizer.ts b/src/core/sanitizer.ts new file mode 100644 index 0000000..1c5c55a --- /dev/null +++ b/src/core/sanitizer.ts @@ -0,0 +1,67 @@ +/** + * UTM Parameter Value Sanitizer + * + * Strips dangerous characters from UTM parameter values to prevent XSS + * when values are rendered in HTML or used in URL construction. + */ + +import type { SanitizeConfig, UtmParameters } from '../types' + +/** + * Sanitize a single UTM parameter value + * + * Applies stripping rules in order: HTML chars → control chars → custom pattern → trim → truncate. + * + * @param value - The raw parameter value + * @param config - Sanitization configuration + * @returns Sanitized value + */ +export function sanitizeValue(value: string, config: SanitizeConfig): string { + let result = value + + if (config.stripHtml) { + result = result.replace(/[<>"'`]/g, '') + } + + if (config.stripControlChars) { + // Strip control characters \x00-\x1F except tab (\x09), newline (\x0A), carriage return (\x0D) + // oxlint-disable-next-line no-control-regex + result = result.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, '') + } + + if (config.customPattern) { + config.customPattern.lastIndex = 0 + result = result.replace(config.customPattern, '') + } + + result = result.trim() + + if (result.length > config.maxLength) { + result = result.slice(0, config.maxLength) + } + + return result +} + +/** + * Sanitize all values in a UTM parameters object + * + * Applies sanitization to every non-undefined value. Keys are preserved unchanged. + * + * @param params - UTM parameters object + * @param config - Sanitization configuration + * @returns New object with sanitized values + */ +export function sanitizeParams(params: UtmParameters, config: SanitizeConfig): UtmParameters { + const result: Record = {} + + for (const [key, value] of Object.entries(params)) { + if (value === undefined) { + result[key] = undefined + } else { + result[key] = sanitizeValue(value, config) + } + } + + return result as UtmParameters +} diff --git a/src/docs.md b/src/docs.md index 5a50332..6854fea 100644 --- a/src/docs.md +++ b/src/docs.md @@ -32,11 +32,11 @@ Consumer API - **types/** (`@/src/types`): Shared type definitions consumed by all other modules. Defines the dual key format system (snake_case/camelCase) and configuration interfaces. - **config/** (`@/src/config`): Pure configuration creation and validation. Merges partial user config with defaults to produce `ResolvedUtmConfig`. -- **core/** (`@/src/core`): Framework-agnostic UTM operations -- capture from URLs, persist in sessionStorage, append to outbound URLs, convert key formats, validate URLs. All SSR-safe. +- **core/** (`@/src/core`): Framework-agnostic UTM operations -- capture from URLs, sanitize parameter values, persist in sessionStorage, append to outbound URLs, convert key formats, validate URLs. All SSR-safe. - **debug/** (`@/src/debug`): Development-time diagnostics. Assembles state snapshots and provides formatted console output and optional `window.utmDebug` helpers. - **react/** (`@/src/react`): React hook and context provider that orchestrate the core modules into stateful React APIs with auto-capture-on-mount behavior. -**Key data flow**: URL with UTM params --> `capture` --> `store` in sessionStorage --> `appendToUrl` for outbound link generation. +**Key data flow**: URL with UTM params --> `capture` (with optional sanitization) --> `store` in sessionStorage --> `appendToUrl` for outbound link generation. ### Things to Know diff --git a/src/index.ts b/src/index.ts index e923096..b100e14 100644 --- a/src/index.ts +++ b/src/index.ts @@ -48,6 +48,10 @@ export { STANDARD_SNAKE_KEYS, STANDARD_CAMEL_KEYS, + // Sanitizer + sanitizeValue, + sanitizeParams, + // Validator validateUrl, normalizeUrl, @@ -64,6 +68,7 @@ export { // Configuration export { DEFAULT_CONFIG, + DEFAULT_SANITIZE_CONFIG, STANDARD_UTM_PARAMETERS, getDefaultConfig, createConfig, @@ -94,4 +99,5 @@ export type { UseUtmTrackingReturn, UtmProviderProps, DiagnosticInfo, + SanitizeConfig, } from './types' diff --git a/src/react/useUtmTracking.ts b/src/react/useUtmTracking.ts index 7f35bd3..5327dfe 100644 --- a/src/react/useUtmTracking.ts +++ b/src/react/useUtmTracking.ts @@ -121,6 +121,7 @@ export function useUtmTracking(options: UseUtmTrackingOptions = {}): UseUtmTrack const params = captureUtmParameters(window.location.href, { keyFormat: config.keyFormat, allowedParameters: config.allowedParameters, + sanitize: config.sanitize, }) // Only store if we found some parameters diff --git a/src/types/docs.md b/src/types/docs.md index 524317a..6c0528c 100644 --- a/src/types/docs.md +++ b/src/types/docs.md @@ -23,11 +23,13 @@ Path: @/src/types - `ResolvedUtmConfig` mirrors `UtmConfig` but with all fields required -- it represents the result of merging user-provided partial config with defaults. - `ShareContextParams` uses `Partial>` with a `default` key for base params and platform-specific overrides, enabling a layered merge strategy in `useUtmTracking`'s `appendToUrl` callback. - `AppendOptions` controls whether UTM params go into query string or fragment, and whether existing UTM params on the target URL are preserved. +- `SanitizeConfig` defines value sanitization behavior with fields for `enabled`, `stripHtml`, `stripControlChars`, `maxLength`, and an optional `customPattern` (RegExp). It appears as `Partial` on `UtmConfig` (user input) and as a required `SanitizeConfig` on `ResolvedUtmConfig` (resolved output). This follows the same partial-in/resolved-out pattern used by the rest of the config system. ### Things to Know - `UtmParameters` is a union, not an intersection. Code that receives it must handle either format, typically by detecting the format or converting via `@/src/core/keys.ts`. - `SharePlatform` is `'linkedin' | 'twitter' | 'facebook' | 'copy' | string` -- the named platforms are documentation aids, but any string is accepted. - `DiagnosticInfo` is only used by `@/src/debug` and is meant for development-time inspection, not production data flow. +- New features use a nested config object pattern (e.g., `sanitize: SanitizeConfig`) rather than adding flat fields to `UtmConfig`. Existing flat fields remain unchanged for backward compatibility. Created and maintained by Nori. diff --git a/src/types/index.ts b/src/types/index.ts index 1873388..b412654 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -104,6 +104,26 @@ export interface ValidationResult { */ export type ValidationError = 'invalid_protocol' | 'invalid_domain' | 'malformed_url' | 'empty_url' +/** + * Configuration for value sanitization + */ +export interface SanitizeConfig { + /** Enable sanitization (default: false) */ + enabled: boolean + + /** Strip HTML-significant characters: < > " ' ` (default: true) */ + stripHtml: boolean + + /** Strip control characters \x00-\x1F except \t \n \r (default: true) */ + stripControlChars: boolean + + /** Maximum allowed length for parameter values (default: 200) */ + maxLength: number + + /** Optional additional regex pattern to strip from values */ + customPattern?: RegExp +} + /** * Main configuration interface for UTM toolkit */ @@ -137,6 +157,9 @@ export interface UtmConfig { /** Parameters to exclude when appending to share URLs (e.g., ['utm_team_id']) */ excludeFromShares?: string[] + + /** Value sanitization configuration */ + sanitize?: Partial } /** @@ -152,6 +175,7 @@ export interface ResolvedUtmConfig { defaultParams: UtmParameters shareContextParams: ShareContextParams excludeFromShares: string[] + sanitize: SanitizeConfig } /** diff --git a/vitest.config.ts b/vitest.config.ts index 2f1f989..7ea92ab 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -1,4 +1,4 @@ -import { defineConfig } from 'vitest/config'; +import { defineConfig } from 'vitest/config' export default defineConfig({ test: { @@ -19,4 +19,4 @@ export default defineConfig({ }, setupFiles: ['__tests__/setup.ts'], }, -}); +}) From 57536dbefb0c58795fa2d32c90282644c172fadc Mon Sep 17 00:00:00 2001 From: Jack Misner Date: Fri, 13 Feb 2026 19:18:12 +0000 Subject: [PATCH 2/2] fix: respect enabled flag in sanitizeValue and sanitizeParams MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both functions are public API exports and should no-op when config.enabled is false, rather than relying on callers to check. 🤖 Generated with [Nori](https://nori.ai) Co-Authored-By: Nori --- __tests__/core/sanitizer.test.ts | 21 +++++++++++++++++++++ src/core/sanitizer.ts | 8 ++++++++ 2 files changed, 29 insertions(+) diff --git a/__tests__/core/sanitizer.test.ts b/__tests__/core/sanitizer.test.ts index 0f1a408..20a7184 100644 --- a/__tests__/core/sanitizer.test.ts +++ b/__tests__/core/sanitizer.test.ts @@ -104,6 +104,14 @@ describe('sanitizeValue', () => { }) }) + describe('enabled flag', () => { + it('returns value unchanged when enabled is false', () => { + const config: SanitizeConfig = { ...defaultConfig, enabled: false } + const result = sanitizeValue('', config) + expect(result).toBe('') + }) + }) + describe('edge cases', () => { it('returns empty string when everything is stripped', () => { const result = sanitizeValue('<>"\'`', defaultConfig) @@ -174,6 +182,19 @@ describe('sanitizeParams', () => { expect(result).toEqual({}) }) + it('returns params unchanged when enabled is false', () => { + const config: SanitizeConfig = { ...defaultConfig, enabled: false } + const params = { + utm_source: '', + utm_medium: 'email', + } + const result = sanitizeParams(params, config) + expect(result).toEqual({ + utm_source: '', + utm_medium: 'email', + }) + }) + it('works with camelCase keys', () => { const params = { utmSource: 'bold', diff --git a/src/core/sanitizer.ts b/src/core/sanitizer.ts index 1c5c55a..318d224 100644 --- a/src/core/sanitizer.ts +++ b/src/core/sanitizer.ts @@ -17,6 +17,10 @@ import type { SanitizeConfig, UtmParameters } from '../types' * @returns Sanitized value */ export function sanitizeValue(value: string, config: SanitizeConfig): string { + if (!config.enabled) { + return value + } + let result = value if (config.stripHtml) { @@ -53,6 +57,10 @@ export function sanitizeValue(value: string, config: SanitizeConfig): string { * @returns New object with sanitized values */ export function sanitizeParams(params: UtmParameters, config: SanitizeConfig): UtmParameters { + if (!config.enabled) { + return { ...params } + } + const result: Record = {} for (const [key, value] of Object.entries(params)) {