-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Issue: Entry Validation and Sanitization API
Summary
CLI implements various validation functions (slugs, filenames, metadata). This should be consolidated into a comprehensive validation API.
Current State
CLI Implementation (render_engine_cli.utils)
def validate_file_name_or_slug(ctx, param, value: str) -> str | None:
"""Validate the filename and slug options"""
if " " in value:
raise BadParameter(f"Spaces are not allowed in {param.name}.")
# Auto-generates from title if not provideddef split_args(args: list[str]) -> dict[str, str]:
"""Parse key=value or key:value arguments."""
# Validates format and checks for duplicatesDate Parsing
# In cli.py
if date := parsed_args.pop("date", None):
try:
date = dateparser.parse(date)
except ParserError:
raise ValueError(f"Invalid date: {repr(date)}.") from NoneProposed API
Create render_engine_api.validation with:
from typing import Dict, Any, Optional, List
from dataclasses import dataclass
from datetime import datetime
@dataclass
class ValidationResult:
"""Result of validation check."""
valid: bool
value: Any # Sanitized/normalized value
errors: List[str] # Error messages if invalid
warnings: List[str] # Non-fatal warnings
class EntryValidator:
"""Validates and sanitizes collection entry data."""
def __init__(self, collection: Optional[Collection] = None):
"""Initialize with optional Collection for schema validation."""
# Slug validation
def validate_slug(self, slug: str) -> ValidationResult:
"""Validate and sanitize slug."""
def generate_slug(self, text: str) -> str:
"""Generate valid slug from arbitrary text."""
# Filename validation
def validate_filename(
self,
filename: str,
extension: str = ".md"
) -> ValidationResult:
"""Validate and sanitize filename."""
# Metadata validation
def validate_metadata(
self,
metadata: Dict[str, Any],
required_fields: Optional[List[str]] = None
) -> ValidationResult:
"""Validate metadata dictionary."""
def validate_date(
self,
date_value: Any
) -> ValidationResult:
"""Parse and validate date from various formats."""
# Content validation
def validate_content(
self,
content: str,
max_length: Optional[int] = None
) -> ValidationResult:
"""Validate content string."""
# Frontmatter validation
def validate_frontmatter(
self,
frontmatter_str: str
) -> ValidationResult:
"""Validate YAML frontmatter."""
# Combined validation
def validate_entry(
self,
slug: Optional[str],
title: Optional[str],
content: str,
metadata: Dict[str, Any]
) -> ValidationResult:
"""Validate complete entry."""
class ArgumentParser:
"""Parse and validate CLI-style arguments."""
@staticmethod
def parse_key_value_args(
args: List[str],
separator: str = "="
) -> ValidationResult:
"""Parse key=value or key:value arguments."""
@staticmethod
def parse_date_arg(date_str: str) -> ValidationResult:
"""Parse date from string."""Validation Rules
Slug Validation
validator = EntryValidator()
# Valid slugs
result = validator.validate_slug("my-post")
assert result.valid and result.value == "my-post"
# Auto-sanitize
result = validator.validate_slug("My Post!")
assert result.valid and result.value == "my-post"
# Invalid
result = validator.validate_slug("../../etc/passwd")
assert not result.valid
assert "Invalid characters" in result.errors[0]Filename Validation
# Valid filename
result = validator.validate_filename("my-post.md")
assert result.valid
# Add extension automatically
result = validator.validate_filename("my-post")
assert result.value == "my-post.md"
# Reject invalid
result = validator.validate_filename("../../../etc/passwd")
assert not result.validMetadata Validation
# Required fields
validator = EntryValidator()
result = validator.validate_metadata(
{"title": "My Post"},
required_fields=["title", "date"]
)
assert not result.valid
assert "Missing required field: date" in result.errors
# Type validation
result = validator.validate_metadata({
"title": "My Post",
"date": "2025-01-30", # String will be parsed
"tags": ["python", "django"]
})
assert result.validDate Validation
# Various formats
validator = EntryValidator()
# ISO format
result = validator.validate_date("2025-01-30")
assert result.valid and isinstance(result.value, datetime)
# Natural language (requires dateparser)
result = validator.validate_date("yesterday")
assert result.valid
# Invalid
result = validator.validate_date("not-a-date")
assert not result.validBenefits
- Safety: Prevent malicious input (path traversal, XSS, etc.)
- Consistency: Same validation across CLI, TUI, API
- User Experience: Clear error messages
- Auto-correction: Automatically fix common issues
- Extensibility: Easy to add new validation rules
- Testing: Isolated validation logic
Security Considerations
Path Traversal Prevention
def validate_slug(self, slug: str) -> ValidationResult:
# Reject path components
if ".." in slug or "/" in slug or "\\" in slug:
return ValidationResult(
valid=False,
value=None,
errors=["Slug cannot contain path separators"],
warnings=[]
)XSS Prevention
def validate_content(self, content: str) -> ValidationResult:
# Warn about potentially dangerous content
warnings = []
if "<script>" in content.lower():
warnings.append("Content contains script tags")
return ValidationResult(
valid=True,
value=content,
errors=[],
warnings=warnings
)Injection Prevention
def validate_metadata(self, metadata: Dict[str, Any]) -> ValidationResult:
# Prevent SQL injection in metadata values
dangerous_patterns = ["--", ";", "DROP", "DELETE"]
warnings = []
for key, value in metadata.items():
str_value = str(value).upper()
if any(pattern in str_value for pattern in dangerous_patterns):
warnings.append(f"Suspicious pattern in {key}")Migration Path
- Create
render_engine_api.validationmodule - Implement
EntryValidatorwith basic rules - Migrate CLI validation functions
- Add security checks
- Update CLI to use
EntryValidator - Update TUI to use
EntryValidator - Add comprehensive tests with edge cases
- Document validation rules
Example Usage
CLI
from render_engine_api.validation import EntryValidator, ArgumentParser
validator = EntryValidator()
# Validate slug
slug_result = validator.validate_slug(args.slug)
if not slug_result.valid:
click.echo("\n".join(slug_result.errors), err=True)
return
# Parse arguments
args_result = ArgumentParser.parse_key_value_args(args.args)
if not args_result.valid:
click.echo("\n".join(args_result.errors), err=True)
return
# Validate complete entry
entry_result = validator.validate_entry(
slug=slug_result.value,
title=args.title,
content=args.content,
metadata=args_result.value
)
if entry_result.warnings:
click.echo("Warnings:", err=True)
for warning in entry_result.warnings:
click.echo(f" - {warning}", err=True)TUI
from render_engine_api.validation import EntryValidator
validator = EntryValidator()
# Validate on form submit
result = validator.validate_entry(
slug=form.slug.value,
title=form.title.value,
content=form.content.value,
metadata={"date": form.date.value}
)
if not result.valid:
self.notify("\n".join(result.errors), severity="error")
return
if result.warnings:
self.notify("\n".join(result.warnings), severity="warning")Dependencies
python-slugifyfor slug generationdateutilordateparserfor flexible date parsing- No external dependencies for basic validation
Related Issues
- CREATE: Collection Adapter #3: Collection Operations API
- #008: Result Formatting API
Metadata
Metadata
Assignees
Labels
No labels