Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 67 additions & 2 deletions src/Converter/HtmlToDjot.php
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,22 @@ protected function processLink(DOMElement $node): string
// Skip href, title, and data-djot-ref since they're in the reference syntax
$attrs = $this->formatInlineAttributes($node, ['href', 'title', 'data-djot-ref']);

if ($refLabel === '' && !$this->isSafeReferenceLabel($text)) {
if ($title !== '') {
return '[' . $text . '](' . $href . ' ' . $this->quoteLinkTitle($title) . ')' . $attrs;
}

return '[' . $text . '](' . $href . ')' . $attrs;
}

if ($refLabel !== '' && !$this->isSafeReferenceLabel($refLabel)) {
if ($title !== '') {
return '[' . $text . '](' . $href . ' ' . $this->quoteLinkTitle($title) . ')' . $attrs;
}

return '[' . $text . '](' . $href . ')' . $attrs;
}

// Collect reference definition
// For collapsed reference (empty label), use the link text as label
$defLabel = $refLabel === '' ? $text : $refLabel;
Expand Down Expand Up @@ -1014,6 +1030,22 @@ protected function processImage(DOMElement $node): string
// Skip src, alt, title, and data-djot-ref since they're in the reference syntax
$attrs = $this->formatInlineAttributes($node, ['src', 'alt', 'title', 'data-djot-ref']);

if ($refLabel === '' && !$this->isSafeReferenceLabel($alt)) {
if ($title !== '') {
return '![' . $alt . '](' . $src . ' ' . $this->quoteLinkTitle($title) . ')' . $attrs;
}

return '![' . $alt . '](' . $src . ')' . $attrs;
}

if ($refLabel !== '' && !$this->isSafeReferenceLabel($refLabel)) {
if ($title !== '') {
return '![' . $alt . '](' . $src . ' ' . $this->quoteLinkTitle($title) . ')' . $attrs;
}

return '![' . $alt . '](' . $src . ')' . $attrs;
}

// Collect reference definition
// For collapsed reference (empty label), use the alt text as label
$defLabel = $refLabel === '' ? $alt : $refLabel;
Expand Down Expand Up @@ -1551,7 +1583,9 @@ protected function serializeTableCellContent(DOMElement $cell): string
$content = $hasBlockChildren ? $this->processBlock($cell) : $this->processChildren($cell);
$content = trim($content);

return preg_replace('/\s+/', ' ', $content) ?? $content;
$content = preg_replace('/\s+/', ' ', $content) ?? $content;

return str_replace('|', '\|', $content);
}

protected function findFirstDirectChildByTagName(DOMElement $node, string $tagName): ?DOMElement
Expand Down Expand Up @@ -1729,7 +1763,12 @@ protected function processRawInline(DOMElement $node): string

protected function processRawHtmlInlineElement(DOMElement $node): string
{
$html = $node->ownerDocument?->saveHTML($node);
$clone = $node->cloneNode(true);
if ($clone instanceof DOMElement) {
$this->stripDjotDataAttributes($clone);
}

$html = $clone instanceof DOMElement ? $clone->ownerDocument?->saveHTML($clone) : null;
if (!is_string($html)) {
$html = '';
}
Expand All @@ -1754,6 +1793,32 @@ protected function linkRequiresRawHtmlFallback(DOMElement $node): bool
return false;
}

protected function isSafeReferenceLabel(string $label): bool
{
return strpbrk($label, '[]\\') === false;
}

protected function stripDjotDataAttributes(DOMElement $node): void
{
$toRemove = [];
/** @var \DOMAttr $attr */
foreach ($node->attributes as $attr) {
if (str_starts_with($attr->name, 'data-djot-')) {
$toRemove[] = $attr->name;
}
}

foreach ($toRemove as $name) {
$node->removeAttribute($name);
}

foreach ($node->childNodes as $child) {
if ($child instanceof DOMElement) {
$this->stripDjotDataAttributes($child);
}
}
}

/**
* Process semantic HTML elements to Djot span syntax
*
Expand Down
50 changes: 48 additions & 2 deletions tests/TestCase/Converter/HtmlToDjotTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,23 @@ public function testLinkEscapesBackslashInLabel(): void
$this->assertStringContainsString('<a href="https://example.com">a \ b</a>', $htmlBack);
}

public function testCollapsedReferenceLinkWithUnsafeLabelFallsBackToInlineLink(): void
{
$result = $this->converter->convert('<a href="https://example.com" data-djot-ref="">a ] b</a>');

$this->assertSame("[a \\] b](https://example.com)\n", $result);
$this->assertStringNotContainsString("\n[a \\] b]:", $result);
}

public function testReferenceLinkWithUnsafeReferenceLabelFallsBackToInlineLink(): void
{
$result = $this->converter->convert('<a href="https://example.com" data-djot-ref="ref]x">txt</a>');

$this->assertSame("[txt](https://example.com)\n", $result);
$this->assertStringNotContainsString('[txt][', $result);
$this->assertStringNotContainsString("\n[ref]x]:", $result);
}

// ==================== Images ====================

public function testImage(): void
Expand Down Expand Up @@ -195,6 +212,14 @@ public function testLinkWrappingProblematicImageFallsBackToRawHtml(): void
$this->assertStringContainsString('<a href="https://example.com"><img src="img.png" alt="a [ b"></a>', $htmlBack);
}

public function testRawImageFallbackStripsDjotMetadata(): void
{
$result = $this->converter->convert('<img src="img.png" alt="a [ b" data-djot-ref="">');

$this->assertSame("`<img src=\"img.png\" alt=\"a [ b\">`{=html}\n", $result);
$this->assertStringNotContainsString('data-djot-ref', $result);
}

// ==================== Code ====================

public function testInlineCode(): void
Expand Down Expand Up @@ -342,8 +367,8 @@ public function testNestedTableDoesNotLeakInnerRowsIntoOuterTable(): void

$result = $this->converter->convert($html);

$this->assertStringContainsString('| outer', $result);
$this->assertSame(1, substr_count($result, '| inner |'));
$this->assertSame("| outer \\| inner \\| |\n", $result);
$this->assertStringNotContainsString("\n| inner |", $result);
}

public function testDivWithoutClassPreservesAttributes(): void
Expand Down Expand Up @@ -543,6 +568,27 @@ public function testTableCellWithNestedListFallsBackToSingleLineCellText(): void
$this->assertStringContainsString('<td>- Item</td>', $htmlBack);
}

public function testTableCellEscapesLiteralPipeCharacters(): void
{
$html = '<table><tr><td>A | B</td></tr></table>';
$result = $this->converter->convert($html);

$this->assertSame("| A \\| B |\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<td>A | B</td>', $htmlBack);
$this->assertStringNotContainsString('<td>A</td>', $htmlBack);
}

public function testTableCellEscapesPipeCharactersAfterBlockDegradation(): void
{
$html = '<table><tr><td><p>A | B</p><p>C</p></td></tr></table>';
$result = $this->converter->convert($html);

$this->assertSame("| A \\| B C |\n", $result);
$htmlBack = (new DjotConverter())->convert($result);
$this->assertStringContainsString('<td>A | B C</td>', $htmlBack);
}

public function testTableWithMultilineCaptionKeepsAllCaptionTextInsideCaption(): void
{
$html = '<table><caption><p>cap one</p><p>cap two</p></caption><tr><td>x</td></tr></table>';
Expand Down
Loading