diff --git a/src/Converter/HtmlToDjot.php b/src/Converter/HtmlToDjot.php
index 1d881af..90aa295 100644
--- a/src/Converter/HtmlToDjot.php
+++ b/src/Converter/HtmlToDjot.php
@@ -970,6 +970,22 @@ protected function processLink(DOMElement $node): string
// Skip href, title, and data-djot-ref since they're in the reference syntax
$attrs = $this->formatInlineAttributes($node, ['href', 'title', 'data-djot-ref']);
+ if ($refLabel === '' && !$this->isSafeReferenceLabel($text)) {
+ if ($title !== '') {
+ return '[' . $text . '](' . $href . ' ' . $this->quoteLinkTitle($title) . ')' . $attrs;
+ }
+
+ return '[' . $text . '](' . $href . ')' . $attrs;
+ }
+
+ if ($refLabel !== '' && !$this->isSafeReferenceLabel($refLabel)) {
+ if ($title !== '') {
+ return '[' . $text . '](' . $href . ' ' . $this->quoteLinkTitle($title) . ')' . $attrs;
+ }
+
+ return '[' . $text . '](' . $href . ')' . $attrs;
+ }
+
// Collect reference definition
// For collapsed reference (empty label), use the link text as label
$defLabel = $refLabel === '' ? $text : $refLabel;
@@ -1014,6 +1030,22 @@ protected function processImage(DOMElement $node): string
// Skip src, alt, title, and data-djot-ref since they're in the reference syntax
$attrs = $this->formatInlineAttributes($node, ['src', 'alt', 'title', 'data-djot-ref']);
+ if ($refLabel === '' && !$this->isSafeReferenceLabel($alt)) {
+ if ($title !== '') {
+ return ' . ')' . $attrs;
+ }
+
+ return '' . $attrs;
+ }
+
+ if ($refLabel !== '' && !$this->isSafeReferenceLabel($refLabel)) {
+ if ($title !== '') {
+ return ' . ')' . $attrs;
+ }
+
+ return '' . $attrs;
+ }
+
// Collect reference definition
// For collapsed reference (empty label), use the alt text as label
$defLabel = $refLabel === '' ? $alt : $refLabel;
@@ -1551,7 +1583,9 @@ protected function serializeTableCellContent(DOMElement $cell): string
$content = $hasBlockChildren ? $this->processBlock($cell) : $this->processChildren($cell);
$content = trim($content);
- return preg_replace('/\s+/', ' ', $content) ?? $content;
+ $content = preg_replace('/\s+/', ' ', $content) ?? $content;
+
+ return str_replace('|', '\|', $content);
}
protected function findFirstDirectChildByTagName(DOMElement $node, string $tagName): ?DOMElement
@@ -1729,7 +1763,12 @@ protected function processRawInline(DOMElement $node): string
protected function processRawHtmlInlineElement(DOMElement $node): string
{
- $html = $node->ownerDocument?->saveHTML($node);
+ $clone = $node->cloneNode(true);
+ if ($clone instanceof DOMElement) {
+ $this->stripDjotDataAttributes($clone);
+ }
+
+ $html = $clone instanceof DOMElement ? $clone->ownerDocument?->saveHTML($clone) : null;
if (!is_string($html)) {
$html = '';
}
@@ -1754,6 +1793,32 @@ protected function linkRequiresRawHtmlFallback(DOMElement $node): bool
return false;
}
+ protected function isSafeReferenceLabel(string $label): bool
+ {
+ return strpbrk($label, '[]\\') === false;
+ }
+
+ protected function stripDjotDataAttributes(DOMElement $node): void
+ {
+ $toRemove = [];
+ /** @var \DOMAttr $attr */
+ foreach ($node->attributes as $attr) {
+ if (str_starts_with($attr->name, 'data-djot-')) {
+ $toRemove[] = $attr->name;
+ }
+ }
+
+ foreach ($toRemove as $name) {
+ $node->removeAttribute($name);
+ }
+
+ foreach ($node->childNodes as $child) {
+ if ($child instanceof DOMElement) {
+ $this->stripDjotDataAttributes($child);
+ }
+ }
+ }
+
/**
* Process semantic HTML elements to Djot span syntax
*
diff --git a/tests/TestCase/Converter/HtmlToDjotTest.php b/tests/TestCase/Converter/HtmlToDjotTest.php
index b86a97f..99abfe0 100644
--- a/tests/TestCase/Converter/HtmlToDjotTest.php
+++ b/tests/TestCase/Converter/HtmlToDjotTest.php
@@ -148,6 +148,23 @@ public function testLinkEscapesBackslashInLabel(): void
$this->assertStringContainsString('a \ b', $htmlBack);
}
+ public function testCollapsedReferenceLinkWithUnsafeLabelFallsBackToInlineLink(): void
+ {
+ $result = $this->converter->convert('a ] b');
+
+ $this->assertSame("[a \\] b](https://example.com)\n", $result);
+ $this->assertStringNotContainsString("\n[a \\] b]:", $result);
+ }
+
+ public function testReferenceLinkWithUnsafeReferenceLabelFallsBackToInlineLink(): void
+ {
+ $result = $this->converter->convert('txt');
+
+ $this->assertSame("[txt](https://example.com)\n", $result);
+ $this->assertStringNotContainsString('[txt][', $result);
+ $this->assertStringNotContainsString("\n[ref]x]:", $result);
+ }
+
// ==================== Images ====================
public function testImage(): void
@@ -195,6 +212,14 @@ public function testLinkWrappingProblematicImageFallsBackToRawHtml(): void
$this->assertStringContainsString('
', $htmlBack);
}
+ public function testRawImageFallbackStripsDjotMetadata(): void
+ {
+ $result = $this->converter->convert('
');
+
+ $this->assertSame("`
`{=html}\n", $result);
+ $this->assertStringNotContainsString('data-djot-ref', $result);
+ }
+
// ==================== Code ====================
public function testInlineCode(): void
@@ -342,8 +367,8 @@ public function testNestedTableDoesNotLeakInnerRowsIntoOuterTable(): void
$result = $this->converter->convert($html);
- $this->assertStringContainsString('| outer', $result);
- $this->assertSame(1, substr_count($result, '| inner |'));
+ $this->assertSame("| outer \\| inner \\| |\n", $result);
+ $this->assertStringNotContainsString("\n| inner |", $result);
}
public function testDivWithoutClassPreservesAttributes(): void
@@ -543,6 +568,27 @@ public function testTableCellWithNestedListFallsBackToSingleLineCellText(): void
$this->assertStringContainsString('
| A | B |
A | B C |
| x |