From 0ac147e8c746940a77541d674971995514bde94f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Mar 2026 14:05:06 +0000 Subject: [PATCH 1/3] Initial plan From a8b61dd0b69188f8409122981150b256e2c3d587 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Mar 2026 14:35:22 +0000 Subject: [PATCH 2/3] Add PHP port of Etherpad Lite Changeset library Co-authored-by: Gared <913630+Gared@users.noreply.github.com> --- src/Changeset/AttributePool.php | 148 +++++++ src/Changeset/Builder.php | 91 +++++ src/Changeset/Changeset.php | 546 +++++++++++++++++++++++++ src/Changeset/MergingOpAssembler.php | 89 ++++ src/Changeset/Op.php | 58 +++ src/Changeset/OpAssembler.php | 30 ++ src/Changeset/SmartOpAssembler.php | 104 +++++ src/Changeset/StringAssembler.php | 30 ++ src/Changeset/StringIterator.php | 63 +++ tests/Unit/Changeset/ChangesetTest.php | 422 +++++++++++++++++++ 10 files changed, 1581 insertions(+) create mode 100644 src/Changeset/AttributePool.php create mode 100644 src/Changeset/Builder.php create mode 100644 src/Changeset/Changeset.php create mode 100644 src/Changeset/MergingOpAssembler.php create mode 100644 src/Changeset/Op.php create mode 100644 src/Changeset/OpAssembler.php create mode 100644 src/Changeset/SmartOpAssembler.php create mode 100644 src/Changeset/StringAssembler.php create mode 100644 src/Changeset/StringIterator.php create mode 100644 tests/Unit/Changeset/ChangesetTest.php diff --git a/src/Changeset/AttributePool.php b/src/Changeset/AttributePool.php new file mode 100644 index 0000000..939769a --- /dev/null +++ b/src/Changeset/AttributePool.php @@ -0,0 +1,148 @@ + + */ + public array $numToAttrib = []; + + /** + * Maps the string representation of an attribute to its non-negative identifier. + * + * @var array + */ + private array $attribToNum = []; + + /** + * The attribute ID to assign to the next new attribute. + */ + private int $nextNum = 0; + + /** + * Add an attribute to the attribute set, or query for an existing attribute identifier. + * + * @param array{0: string, 1: string} $attrib - The attribute's [key, value] pair of strings. + * @param bool $dontAddIfAbsent - If true, do not insert the attribute into the pool if absent. + * @return int The attribute's identifier, or -1 if the attribute is not in the pool. + */ + public function putAttrib(array $attrib, bool $dontAddIfAbsent = false): int + { + $str = $attrib[0] . ',' . $attrib[1]; + if (isset($this->attribToNum[$str])) { + return $this->attribToNum[$str]; + } + if ($dontAddIfAbsent) { + return -1; + } + $num = $this->nextNum++; + $this->attribToNum[$str] = $num; + $this->numToAttrib[$num] = [$attrib[0], $attrib[1]]; + return $num; + } + + /** + * @param int $num - The identifier of the attribute to fetch. + * @return array{0: string, 1: string}|null The attribute with the given identifier. + */ + public function getAttrib(int $num): ?array + { + if (!isset($this->numToAttrib[$num])) { + return null; + } + return [$this->numToAttrib[$num][0], $this->numToAttrib[$num][1]]; + } + + /** + * @param int $num - The identifier of the attribute to fetch. + * @return string Equivalent to getAttrib(num)[0] if the attribute exists, otherwise empty string. + */ + public function getAttribKey(int $num): string + { + if (!isset($this->numToAttrib[$num])) { + return ''; + } + return $this->numToAttrib[$num][0]; + } + + /** + * @param int $num - The identifier of the attribute to fetch. + * @return string Equivalent to getAttrib(num)[1] if the attribute exists, otherwise empty string. + */ + public function getAttribValue(int $num): string + { + if (!isset($this->numToAttrib[$num])) { + return ''; + } + return $this->numToAttrib[$num][1]; + } + + /** + * Executes a callback for each attribute in the pool. + * + * @param callable(string, string): void $func - Callback called with key and value arguments. + */ + public function eachAttrib(callable $func): void + { + foreach ($this->numToAttrib as $pair) { + $func($pair[0], $pair[1]); + } + } + + /** + * @return array{numToAttrib: array, nextNum: int} + * An object suitable for serialization that can be passed to fromJsonable to reconstruct the pool. + */ + public function toJsonable(): array + { + return [ + 'numToAttrib' => $this->numToAttrib, + 'nextNum' => $this->nextNum, + ]; + } + + /** + * Replace the contents of this attribute pool with values from a previous call to toJsonable. + * + * @param array{numToAttrib: array, nextNum: int} $obj + */ + public function fromJsonable(array $obj): self + { + $this->numToAttrib = $obj['numToAttrib']; + $this->nextNum = $obj['nextNum']; + $this->attribToNum = []; + foreach ($this->numToAttrib as $n => $attrib) { + $this->attribToNum[$attrib[0] . ',' . $attrib[1]] = $n; + } + return $this; + } + + /** + * @return AttributePool A deep copy of this attribute pool. + */ + public function clone(): AttributePool + { + $c = new AttributePool(); + foreach ($this->numToAttrib as $n => $a) { + $c->numToAttrib[$n] = [$a[0], $a[1]]; + } + $c->attribToNum = $this->attribToNum; + $c->nextNum = $this->nextNum; + return $c; + } +} diff --git a/src/Changeset/Builder.php b/src/Changeset/Builder.php new file mode 100644 index 0000000..eab657d --- /dev/null +++ b/src/Changeset/Builder.php @@ -0,0 +1,91 @@ +oldLen = $oldLen; + $this->assem = new SmartOpAssembler(); + $this->o = new Op(); + $this->charBank = new StringAssembler(); + } + + /** + * @param int $n - Number of characters to keep. + * @param int $l - Number of newlines among the N characters. If positive, the last character must be a newline. + * @param string $attribs - Either '*0*1...' attribute string. + * @return Builder this + */ + public function keep(int $n, int $l = 0, string $attribs = ''): Builder + { + $this->o->opcode = '='; + $this->o->attribs = $attribs; + $this->o->chars = $n; + $this->o->lines = $l; + $this->assem->append($this->o); + return $this; + } + + /** + * @param string $text - Text to keep. + * @param string $attribs - Attribute string. + * @return Builder this + */ + public function keepText(string $text, string $attribs = ''): Builder + { + foreach (Changeset::opsFromText('=', $text, $attribs) as $op) { + $this->assem->append($op); + } + return $this; + } + + /** + * @param string $text - Text to insert. + * @param string $attribs - Attribute string. + * @return Builder this + */ + public function insert(string $text, string $attribs = ''): Builder + { + foreach (Changeset::opsFromText('+', $text, $attribs) as $op) { + $this->assem->append($op); + } + $this->charBank->append($text); + return $this; + } + + /** + * @param int $n - Number of characters to remove. + * @param int $l - Number of newlines among the N characters. If positive, the last character must be a newline. + * @return Builder this + */ + public function remove(int $n, int $l = 0): Builder + { + $this->o->opcode = '-'; + $this->o->attribs = ''; + $this->o->chars = $n; + $this->o->lines = $l; + $this->assem->append($this->o); + return $this; + } + + public function __toString(): string + { + $this->assem->endDocument(); + $newLen = $this->oldLen + $this->assem->getLengthChange(); + return Changeset::pack($this->oldLen, $newLen, (string) $this->assem, (string) $this->charBank); + } +} diff --git a/src/Changeset/Changeset.php b/src/Changeset/Changeset.php new file mode 100644 index 0000000..55d013f --- /dev/null +++ b/src/Changeset/Changeset.php @@ -0,0 +1,546 @@ +$ + * where: + * - is the length of the original text in base 36 + * - is '>' for growth or '<' for shrinkage + * - is the magnitude of the length change in base 36 + * - is a sequence of operations + * - contains inserted characters + * + * Port of Changeset.ts and ChangesetUtils.ts from etherpad-lite: + * https://github.com/ether/etherpad-lite/blob/master/src/static/js/Changeset.ts + * https://github.com/ether/etherpad-lite/blob/master/src/static/js/ChangesetUtils.ts + */ +class Changeset +{ + /** + * Parses a number from string base 36. + * + * @param string $str - string of the number in base 36 + * @return int number + */ + public static function parseNum(string $str): int + { + return (int) base_convert($str, 36, 10); + } + + /** + * Writes a number in base 36 and puts it in a string. + * + * @param int $num - number + * @return string string + */ + public static function numToString(int $num): string + { + return strtolower(base_convert((string) $num, 10, 36)); + } + + /** + * Throw an error with an easysync flag. + * + * @param string $msg - error message + * @throws \RuntimeException + */ + public static function error(string $msg): never + { + throw new \RuntimeException($msg); + } + + /** + * Assert that a condition is truthy. Throws if falsy. + * + * @param bool $b - assertion condition + * @param string $msg - error message to include in the exception + * @throws \RuntimeException + */ + public static function assert(bool $b, string $msg): void + { + if (!$b) { + self::error("Failed assertion: {$msg}"); + } + } + + /** + * Cleans an Op object. + * + * @param Op $op - object to clear + */ + public static function clearOp(Op $op): void + { + $op->opcode = ''; + $op->chars = 0; + $op->lines = 0; + $op->attribs = ''; + } + + /** + * Copies op1 to op2. + * + * @param Op $op1 - source Op + * @param Op|null $op2 - destination Op. If not given, a new Op is used. + * @return Op $op2 + */ + public static function copyOp(Op $op1, ?Op $op2 = null): Op + { + if ($op2 === null) { + $op2 = new Op(); + } + $op2->opcode = $op1->opcode; + $op2->chars = $op1->chars; + $op2->lines = $op1->lines; + $op2->attribs = $op1->attribs; + return $op2; + } + + /** + * Parses a string of serialized changeset operations. + * + * @param string $ops - Serialized changeset operations. + * @return \Generator + */ + public static function deserializeOps(string $ops): \Generator + { + $regex = '/((?:\*[0-9a-z]+)*)(?:\|([0-9a-z]+))?([-+=])([0-9a-z]+)|(.)/'; + $offset = 0; + while (preg_match($regex, $ops, $match, PREG_OFFSET_CAPTURE, $offset) === 1) { + $offset = $match[0][1] + strlen($match[0][0]); + if (isset($match[5][0])) { + if ($match[5][0] === '$') { + return; // Start of the insert operation character bank. + } + self::error('invalid operation: ' . substr($ops, $offset - 1)); + } + $op = new Op($match[3][0]); + $op->lines = self::parseNum($match[2][0] !== '' ? $match[2][0] : '0'); + $op->chars = self::parseNum($match[4][0]); + $op->attribs = $match[1][0]; + yield $op; + } + } + + /** + * Generates operations from the given text and attributes. + * + * @param string $opcode - The operator to use ('=', '+', '-', ''). + * @param string $text - The text to remove/add/keep. + * @param string $attribs - The attributes to apply to the operations. + * @return \Generator + */ + public static function opsFromText(string $opcode, string $text, string $attribs = ''): \Generator + { + $op = new Op($opcode); + $op->attribs = $attribs; + $lastNewlinePos = strrpos($text, "\n"); + if ($lastNewlinePos === false) { + $op->chars = strlen($text); + $op->lines = 0; + if ($op->chars > 0) { + yield $op; + } + } else { + $op->chars = $lastNewlinePos + 1; + $op->lines = substr_count($text, "\n"); + if ($op->chars > 0) { + yield $op; + } + $op2 = self::copyOp($op); + $op2->chars = strlen($text) - ($lastNewlinePos + 1); + $op2->lines = 0; + if ($op2->chars > 0) { + yield $op2; + } + } + } + + /** + * Unpacks a string encoded changeset into a structured object. + * + * @param string $cs - String representation of the Changeset + * @return array{oldLen: int, newLen: int, ops: string, charBank: string} + * @throws \RuntimeException + */ + public static function unpack(string $cs): array + { + $headerRegex = '/^Z:([0-9a-z]+)([><])([0-9a-z]+)/'; + if (preg_match($headerRegex, $cs, $headerMatch) !== 1 || $headerMatch[0] === '') { + self::error("Not a changeset: {$cs}"); + } + $oldLen = self::parseNum($headerMatch[1]); + $changeSign = ($headerMatch[2] === '>') ? 1 : -1; + $changeMag = self::parseNum($headerMatch[3]); + $newLen = $oldLen + $changeSign * $changeMag; + $opsStart = strlen($headerMatch[0]); + $opsEnd = strpos($cs, '$'); + if ($opsEnd === false) { + $opsEnd = strlen($cs); + } + return [ + 'oldLen' => $oldLen, + 'newLen' => $newLen, + 'ops' => substr($cs, $opsStart, $opsEnd - $opsStart), + 'charBank' => substr($cs, $opsEnd + 1), + ]; + } + + /** + * Creates an encoded changeset. + * + * @param int $oldLen - The length of the document before applying the changeset. + * @param int $newLen - The length of the document after applying the changeset. + * @param string $opsStr - Encoded operations to apply to the document. + * @param string $bank - Characters for insert operations. + * @return string The encoded changeset. + */ + public static function pack(int $oldLen, int $newLen, string $opsStr, string $bank): string + { + $lenDiff = $newLen - $oldLen; + $lenDiffStr = ($lenDiff >= 0 ? '>' . self::numToString($lenDiff) : '<' . self::numToString(-$lenDiff)); + return 'Z:' . self::numToString($oldLen) . $lenDiffStr . $opsStr . '$' . $bank; + } + + /** + * Returns the required length of the text before changeset can be applied. + * + * @param string $cs - String representation of the Changeset + * @return int oldLen property + */ + public static function oldLen(string $cs): int + { + return self::unpack($cs)['oldLen']; + } + + /** + * Returns the length of the text after changeset is applied. + * + * @param string $cs - String representation of the Changeset + * @return int newLen property + */ + public static function newLen(string $cs): int + { + return self::unpack($cs)['newLen']; + } + + /** + * Applies a Changeset to a string. + * + * @param string $cs - String encoded Changeset + * @param string $str - String to which a Changeset should be applied + * @return string The resulting string + * @throws \RuntimeException + */ + public static function applyToText(string $cs, string $str): string + { + $unpacked = self::unpack($cs); + self::assert( + strlen($str) === $unpacked['oldLen'], + 'mismatched apply: ' . strlen($str) . ' / ' . $unpacked['oldLen'] + ); + $bankIter = new StringIterator($unpacked['charBank']); + $strIter = new StringIterator($str); + $assem = new StringAssembler(); + foreach (self::deserializeOps($unpacked['ops']) as $op) { + switch ($op->opcode) { + case '+': + if ($op->lines !== substr_count($bankIter->peek($op->chars), "\n")) { + throw new \RuntimeException("newline count is wrong in op +; cs:{$cs} and text:{$str}"); + } + $assem->append($bankIter->take($op->chars)); + break; + case '-': + if ($op->lines !== substr_count($strIter->peek($op->chars), "\n")) { + throw new \RuntimeException("newline count is wrong in op -; cs:{$cs} and text:{$str}"); + } + $strIter->skip($op->chars); + break; + case '=': + if ($op->lines !== substr_count($strIter->peek($op->chars), "\n")) { + throw new \RuntimeException("newline count is wrong in op =; cs:{$cs} and text:{$str}"); + } + $assem->append($strIter->take($op->chars)); + break; + } + } + $assem->append($strIter->take($strIter->remaining())); + return (string) $assem; + } + + /** + * Creates a Changeset which works on originalText and removes text from start to + * start+numRemoved and inserts newText instead. + * + * @param string $orig - Original text. + * @param int $start - Index into $orig where characters should be removed and inserted. + * @param int $ndel - Number of characters to delete at $start. + * @param string $ins - Text to insert at $start (after deleting $ndel characters). + * @param string $attribs - Optional attributes to apply to the inserted text. + * @return string The encoded changeset. + * @throws \RuntimeException + */ + public static function makeSplice( + string $orig, + int $start, + int $ndel, + string $ins, + string $attribs = '' + ): string { + if ($start < 0) { + throw new \RangeException("start index must be non-negative (is {$start})"); + } + if ($ndel < 0) { + throw new \RangeException("characters to delete must be non-negative (is {$ndel})"); + } + if ($start > strlen($orig)) { + $start = strlen($orig); + } + if ($ndel > strlen($orig) - $start) { + $ndel = strlen($orig) - $start; + } + $deleted = substr($orig, $start, $ndel); + $assem = new SmartOpAssembler(); + foreach (self::opsFromText('=', substr($orig, 0, $start)) as $op) { + $assem->append($op); + } + foreach (self::opsFromText('-', $deleted) as $op) { + $assem->append($op); + } + foreach (self::opsFromText('+', $ins, $attribs) as $op) { + $assem->append($op); + } + $assem->endDocument(); + return self::pack(strlen($orig), strlen($orig) + strlen($ins) - $ndel, (string) $assem, $ins); + } + + /** + * Returns a changeset that is the identity for documents of length N. + * Applying this changeset to a document leaves it unchanged. + * + * @param int $n - Length of the document. + * @return string The identity changeset. + */ + public static function identity(int $n): string + { + return self::pack($n, $n, '', ''); + } + + /** + * Checks if a changeset is the identity changeset. + * + * @param string $cs - The changeset to check. + * @return bool True if the changeset is the identity. + */ + public static function isIdentity(string $cs): bool + { + $unpacked = self::unpack($cs); + return $unpacked['ops'] === '' && $unpacked['oldLen'] === $unpacked['newLen']; + } + + /** + * Compose two changesets together. + * Changeset cs1 is applied first, then cs2. + * + * @param string $cs1 - First changeset. + * @param string $cs2 - Second changeset. + * @param AttributePool $pool - Attribute pool. + * @return string The composed changeset. + */ + public static function compose(string $cs1, string $cs2, AttributePool $pool): string + { + $unpacked1 = self::unpack($cs1); + $unpacked2 = self::unpack($cs2); + self::assert( + $unpacked1['newLen'] === $unpacked2['oldLen'], + 'mismatched composition' + ); + $len1 = $unpacked1['oldLen']; + $len2 = $unpacked2['newLen']; + if (self::isIdentity($cs2)) { + return $cs1; + } + if (self::isIdentity($cs1)) { + return $cs2; + } + + $assem = new SmartOpAssembler(); + $bankIter1 = new StringIterator($unpacked1['charBank']); + $bankIter2 = new StringIterator($unpacked2['charBank']); + $bankAssem = new StringAssembler(); + + $ops1 = self::deserializeOps($unpacked1['ops']); + $ops2 = self::deserializeOps($unpacked2['ops']); + $ops1->current(); // initialize + $ops2->current(); // initialize + + $op1 = $ops1->valid() ? self::copyOp($ops1->current()) : null; + $op2 = $ops2->valid() ? self::copyOp($ops2->current()) : null; + + while ($op1 !== null || $op2 !== null) { + if ($op1 !== null && $op1->opcode === '-') { + $assem->append($op1); + $ops1->next(); + $op1 = $ops1->valid() ? self::copyOp($ops1->current()) : null; + } elseif ($op2 !== null && $op2->opcode === '+') { + $assem->append($op2); + $bankAssem->append($bankIter2->take($op2->chars)); + $ops2->next(); + $op2 = $ops2->valid() ? self::copyOp($ops2->current()) : null; + } else { + if ($op1 === null || $op2 === null) { + break; + } + self::slicerZipperFunc($op1, $op2, $pool, $bankIter1, $bankIter2, $bankAssem, $assem); + if ($op1->chars === 0) { + $ops1->next(); + $op1 = $ops1->valid() ? self::copyOp($ops1->current()) : null; + } + if ($op2->chars === 0) { + $ops2->next(); + $op2 = $ops2->valid() ? self::copyOp($ops2->current()) : null; + } + } + } + + $assem->endDocument(); + return self::pack($len1, $len2, (string) $assem, (string) $bankAssem); + } + + /** + * @internal Used by compose() to combine pairs of operations + */ + private static function slicerZipperFunc( + Op $op1, + Op $op2, + AttributePool $pool, + StringIterator $bankIter1, + StringIterator $bankIter2, + StringAssembler $bankAssem, + SmartOpAssembler $assem + ): void { + if ($op1->opcode === '+') { + if ($op2->opcode === '-') { + if ($op1->chars <= $op2->chars) { + $bankIter1->skip($op1->chars); + $op2->chars -= $op1->chars; + $op2->lines -= $op1->lines; + $op1->chars = 0; + $op1->lines = 0; + } else { + $bankIter1->skip($op2->chars); + $op1->chars -= $op2->chars; + $op1->lines -= $op2->lines; + $op2->chars = 0; + $op2->lines = 0; + } + } elseif ($op2->opcode === '=') { + if ($op1->chars <= $op2->chars) { + $newOp = self::copyOp($op1); + $bankAssem->append($bankIter1->take($op1->chars)); + $assem->append($newOp); + $op2->chars -= $op1->chars; + $op2->lines -= $op1->lines; + $op1->chars = 0; + $op1->lines = 0; + } else { + $newOp = self::copyOp($op2); + $newOp->opcode = '+'; + $newOp->chars = $op2->chars; + $newOp->lines = $op2->lines; + $bankAssem->append($bankIter1->take($op2->chars)); + $assem->append($newOp); + $op1->chars -= $op2->chars; + $op1->lines -= $op2->lines; + $op2->chars = 0; + $op2->lines = 0; + } + } + } elseif ($op1->opcode === '=') { + if ($op2->opcode === '-') { + if ($op1->chars <= $op2->chars) { + $newOp = self::copyOp($op1); + $newOp->opcode = '-'; + $assem->append($newOp); + $op2->chars -= $op1->chars; + $op2->lines -= $op1->lines; + $op1->chars = 0; + $op1->lines = 0; + } else { + $newOp = self::copyOp($op2); + $assem->append($newOp); + $op1->chars -= $op2->chars; + $op1->lines -= $op2->lines; + $op2->chars = 0; + $op2->lines = 0; + } + } elseif ($op2->opcode === '=') { + if ($op1->chars <= $op2->chars) { + $newOp = self::copyOp($op1); + $assem->append($newOp); + $op2->chars -= $op1->chars; + $op2->lines -= $op1->lines; + $op1->chars = 0; + $op1->lines = 0; + } else { + $newOp = self::copyOp($op2); + $assem->append($newOp); + $op1->chars -= $op2->chars; + $op1->lines -= $op2->lines; + $op2->chars = 0; + $op2->lines = 0; + } + } + } + } + + /** + * Iterate over attribute numbers in a changeset and call func with each one. + * + * @param string $cs - Changeset/attribution string to iterate over + * @param callable(int): void $func - Callback called with each attribute number. + */ + public static function eachAttribNumber(string $cs, callable $func): void + { + preg_match_all('/\*([0-9a-z]+)/', $cs, $matches); + foreach ($matches[1] as $match) { + $func(self::parseNum($match)); + } + } + + /** + * Iterate over attributes in a changeset and move them from oldPool to newPool. + * + * @param string $cs - Changeset/attribution string to iterate over + * @param AttributePool $oldPool - old attributes pool + * @param AttributePool $newPool - new attributes pool + * @return string the new Changeset + */ + public static function moveOpsToNewPool(string $cs, AttributePool $oldPool, AttributePool $newPool): string + { + $dollarPos = strpos($cs, '$'); + if ($dollarPos === false) { + $dollarPos = strlen($cs); + } + $upToDollar = substr($cs, 0, $dollarPos); + $fromDollar = substr($cs, $dollarPos); + + return preg_replace_callback( + '/\*([0-9a-z]+)/', + function (array $match) use ($oldPool, $newPool): string { + $oldNum = self::parseNum($match[1]); + $attrib = $oldPool->getAttrib($oldNum); + if ($attrib === null) { + return $match[0]; + } + $newNum = $newPool->putAttrib($attrib); + return '*' . self::numToString($newNum); + }, + $upToDollar + ) . $fromDollar; + } +} diff --git a/src/Changeset/MergingOpAssembler.php b/src/Changeset/MergingOpAssembler.php new file mode 100644 index 0000000..c3f33cb --- /dev/null +++ b/src/Changeset/MergingOpAssembler.php @@ -0,0 +1,89 @@ +assem = new OpAssembler(); + $this->bufOp = new Op(); + // If we get, for example, insertions [xxx\n,yyy], those don't merge, + // but if we get [xxx\n,yyy,zzz\n], that merges to [xxx\nyyyzzz\n]. + // This variable stores the length of yyy and any other newline-less + // ops immediately after it. + $this->bufOpAdditionalCharsAfterNewline = 0; + } + + public function flush(bool $isEndDocument = false): void + { + if ($this->bufOp->opcode === '') { + return; + } + if ($isEndDocument && $this->bufOp->opcode === '=' && $this->bufOp->attribs === '') { + // final merged keep, leave it implicit + } else { + $this->assem->append($this->bufOp); + if ($this->bufOpAdditionalCharsAfterNewline > 0) { + $this->bufOp->chars = $this->bufOpAdditionalCharsAfterNewline; + $this->bufOp->lines = 0; + $this->assem->append($this->bufOp); + $this->bufOpAdditionalCharsAfterNewline = 0; + } + } + $this->bufOp->opcode = ''; + } + + public function append(Op $op): void + { + if ($op->chars <= 0) { + return; + } + if ($this->bufOp->opcode === $op->opcode && $this->bufOp->attribs === $op->attribs) { + if ($op->lines > 0) { + // bufOp and additional chars are all mergeable into a multi-line op + $this->bufOp->chars += $this->bufOpAdditionalCharsAfterNewline + $op->chars; + $this->bufOp->lines += $op->lines; + $this->bufOpAdditionalCharsAfterNewline = 0; + } elseif ($this->bufOp->lines === 0) { + // both bufOp and op are in-line + $this->bufOp->chars += $op->chars; + } else { + // append in-line text to multi-line bufOp + $this->bufOpAdditionalCharsAfterNewline += $op->chars; + } + } else { + $this->flush(); + Changeset::copyOp($op, $this->bufOp); + } + } + + public function endDocument(): void + { + $this->flush(true); + } + + public function __toString(): string + { + $this->flush(); + return (string) $this->assem; + } + + public function clear(): void + { + $this->assem->clear(); + Changeset::clearOp($this->bufOp); + } +} diff --git a/src/Changeset/Op.php b/src/Changeset/Op.php new file mode 100644 index 0000000..547478c --- /dev/null +++ b/src/Changeset/Op.php @@ -0,0 +1,58 @@ +opcode = $opcode; + $this->chars = 0; + $this->lines = 0; + $this->attribs = ''; + } + + public function __toString(): string + { + if ($this->opcode === '') { + throw new \RuntimeException('null op'); + } + $l = $this->lines > 0 ? '|' . Changeset::numToString($this->lines) : ''; + return $this->attribs . $l . $this->opcode . Changeset::numToString($this->chars); + } +} diff --git a/src/Changeset/OpAssembler.php b/src/Changeset/OpAssembler.php new file mode 100644 index 0000000..570c5a1 --- /dev/null +++ b/src/Changeset/OpAssembler.php @@ -0,0 +1,30 @@ +serialized .= (string) $op; + } + + public function __toString(): string + { + return $this->serialized; + } + + public function clear(): void + { + $this->serialized = ''; + } +} diff --git a/src/Changeset/SmartOpAssembler.php b/src/Changeset/SmartOpAssembler.php new file mode 100644 index 0000000..4b9d78a --- /dev/null +++ b/src/Changeset/SmartOpAssembler.php @@ -0,0 +1,104 @@ +minusAssem = new MergingOpAssembler(); + $this->plusAssem = new MergingOpAssembler(); + $this->keepAssem = new MergingOpAssembler(); + $this->assem = new StringAssembler(); + } + + private function flushKeeps(): void + { + $this->assem->append((string) $this->keepAssem); + $this->keepAssem->clear(); + } + + private function flushPlusMinus(): void + { + $this->assem->append((string) $this->minusAssem); + $this->minusAssem->clear(); + $this->assem->append((string) $this->plusAssem); + $this->plusAssem->clear(); + } + + public function append(Op $op): void + { + if ($op->opcode === '') { + return; + } + if ($op->chars === 0) { + return; + } + + if ($op->opcode === '-') { + if ($this->lastOpcode === '=') { + $this->flushKeeps(); + } + $this->minusAssem->append($op); + $this->lengthChange -= $op->chars; + } elseif ($op->opcode === '+') { + if ($this->lastOpcode === '=') { + $this->flushKeeps(); + } + $this->plusAssem->append($op); + $this->lengthChange += $op->chars; + } elseif ($op->opcode === '=') { + if ($this->lastOpcode !== '=') { + $this->flushPlusMinus(); + } + $this->keepAssem->append($op); + } + $this->lastOpcode = $op->opcode; + } + + public function __toString(): string + { + $this->flushPlusMinus(); + $this->flushKeeps(); + return (string) $this->assem; + } + + public function clear(): void + { + $this->minusAssem->clear(); + $this->plusAssem->clear(); + $this->keepAssem->clear(); + $this->assem->clear(); + $this->lengthChange = 0; + } + + public function endDocument(): void + { + $this->keepAssem->endDocument(); + } + + public function getLengthChange(): int + { + return $this->lengthChange; + } +} diff --git a/src/Changeset/StringAssembler.php b/src/Changeset/StringAssembler.php new file mode 100644 index 0000000..657c3b6 --- /dev/null +++ b/src/Changeset/StringAssembler.php @@ -0,0 +1,30 @@ +str = ''; + } + + public function append(string $x): void + { + $this->str .= $x; + } + + public function __toString(): string + { + return $this->str; + } +} diff --git a/src/Changeset/StringIterator.php b/src/Changeset/StringIterator.php new file mode 100644 index 0000000..48a4ac4 --- /dev/null +++ b/src/Changeset/StringIterator.php @@ -0,0 +1,63 @@ +str = $str; + $this->newLines = substr_count($str, "\n"); + } + + public function remaining(): int + { + return strlen($this->str) - $this->curIndex; + } + + public function getNewLines(): int + { + return $this->newLines; + } + + private function assertRemaining(int $n): void + { + if ($n > $this->remaining()) { + throw new \RuntimeException("!({$n} <= {$this->remaining()})"); + } + } + + public function take(int $n): string + { + $this->assertRemaining($n); + $s = substr($this->str, $this->curIndex, $n); + $this->newLines -= substr_count($s, "\n"); + $this->curIndex += $n; + return $s; + } + + public function peek(int $n): string + { + $this->assertRemaining($n); + return substr($this->str, $this->curIndex, $n); + } + + public function skip(int $n): void + { + $this->assertRemaining($n); + $skipped = substr($this->str, $this->curIndex, $n); + $this->newLines -= substr_count($skipped, "\n"); + $this->curIndex += $n; + } +} diff --git a/tests/Unit/Changeset/ChangesetTest.php b/tests/Unit/Changeset/ChangesetTest.php new file mode 100644 index 0000000..ee474f4 --- /dev/null +++ b/tests/Unit/Changeset/ChangesetTest.php @@ -0,0 +1,422 @@ +5+5$hello', $packed); + + $unpacked = Changeset::unpack($packed); + self::assertSame(10, $unpacked['oldLen']); + self::assertSame(15, $unpacked['newLen']); + self::assertSame('+5', $unpacked['ops']); + self::assertSame('hello', $unpacked['charBank']); + } + + public function testPackWithShrinkage(): void + { + $packed = Changeset::pack(15, 10, '-5', ''); + self::assertSame('Z:f<5-5$', $packed); + + $unpacked = Changeset::unpack($packed); + self::assertSame(15, $unpacked['oldLen']); + self::assertSame(10, $unpacked['newLen']); + } + + public function testIdentity(): void + { + $identity = Changeset::identity(10); + self::assertSame('Z:a>0$', $identity); + self::assertTrue(Changeset::isIdentity($identity)); + } + + public function testIdentityWithNonIdentity(): void + { + $cs = Changeset::pack(10, 15, '+5', 'hello'); + self::assertFalse(Changeset::isIdentity($cs)); + } + + public function testOldLenNewLen(): void + { + $packed = Changeset::pack(10, 15, '+5', 'hello'); + self::assertSame(10, Changeset::oldLen($packed)); + self::assertSame(15, Changeset::newLen($packed)); + } + + public function testMakeSpliceInsert(): void + { + $orig = 'Hello\n'; + $cs = Changeset::makeSplice($orig, 5, 0, ' World'); + $result = Changeset::applyToText($cs, $orig); + self::assertSame('Hello World\n', $result); + } + + public function testMakeSpliceDelete(): void + { + $orig = 'Hello World\n'; + $cs = Changeset::makeSplice($orig, 5, 6, ''); + $result = Changeset::applyToText($cs, $orig); + self::assertSame('Hello\n', $result); + } + + public function testMakeSpliceReplace(): void + { + $orig = 'Hello World\n'; + $cs = Changeset::makeSplice($orig, 6, 5, 'PHP'); + $result = Changeset::applyToText($cs, $orig); + self::assertSame('Hello PHP\n', $result); + } + + public function testMakeSpliceInsertAtStart(): void + { + $orig = 'World\n'; + $cs = Changeset::makeSplice($orig, 0, 0, 'Hello '); + $result = Changeset::applyToText($cs, $orig); + self::assertSame('Hello World\n', $result); + } + + public function testMakeSpliceInsertAtEnd(): void + { + $orig = "Hello\n"; + $cs = Changeset::makeSplice($orig, 6, 0, ' World\n'); + $result = Changeset::applyToText($cs, $orig); + self::assertSame("Hello\n World\n", $result); + } + + public function testMakeSpliceWithNewlines(): void + { + $orig = "Line1\nLine2\n"; + $cs = Changeset::makeSplice($orig, 6, 5, 'Replaced'); + $result = Changeset::applyToText($cs, $orig); + self::assertSame("Line1\nReplaced\n", $result); + } + + public function testApplyToTextKeep(): void + { + $cs = Changeset::identity(5); + $result = Changeset::applyToText($cs, 'hello'); + self::assertSame('hello', $result); + } + + public function testApplyToTextInsert(): void + { + // Insert 'XYZ' at the beginning of 'hello' + $cs = Changeset::makeSplice('hello', 0, 0, 'XYZ'); + $result = Changeset::applyToText($cs, 'hello'); + self::assertSame('XYZhello', $result); + } + + public function testApplyToTextDelete(): void + { + // Delete 2 characters at position 2 from 'hello' + $cs = Changeset::makeSplice('hello', 2, 2, ''); + $result = Changeset::applyToText($cs, 'hello'); + self::assertSame('heo', $result); + } + + public function testDeserializeOps(): void + { + $ops = '+5=3-2'; + $result = []; + foreach (Changeset::deserializeOps($ops) as $op) { + $result[] = ['opcode' => $op->opcode, 'chars' => $op->chars, 'lines' => $op->lines]; + } + self::assertCount(3, $result); + self::assertSame('+', $result[0]['opcode']); + self::assertSame(5, $result[0]['chars']); + self::assertSame('=', $result[1]['opcode']); + self::assertSame(3, $result[1]['chars']); + self::assertSame('-', $result[2]['opcode']); + self::assertSame(2, $result[2]['chars']); + } + + public function testDeserializeOpsWithNewlines(): void + { + $ops = '|2=c+5'; + $result = []; + foreach (Changeset::deserializeOps($ops) as $op) { + $result[] = ['opcode' => $op->opcode, 'chars' => $op->chars, 'lines' => $op->lines]; + } + self::assertCount(2, $result); + self::assertSame('=', $result[0]['opcode']); + self::assertSame(12, $result[0]['chars']); + self::assertSame(2, $result[0]['lines']); + } + + public function testClearOp(): void + { + $op = new Op('+'); + $op->chars = 5; + $op->lines = 1; + $op->attribs = '*0'; + + Changeset::clearOp($op); + + self::assertSame('', $op->opcode); + self::assertSame(0, $op->chars); + self::assertSame(0, $op->lines); + self::assertSame('', $op->attribs); + } + + public function testCopyOp(): void + { + $op1 = new Op('+'); + $op1->chars = 5; + $op1->lines = 1; + $op1->attribs = '*0'; + + $op2 = Changeset::copyOp($op1); + + self::assertSame('+', $op2->opcode); + self::assertSame(5, $op2->chars); + self::assertSame(1, $op2->lines); + self::assertSame('*0', $op2->attribs); + self::assertNotSame($op1, $op2); + } + + public function testCopyOpToExisting(): void + { + $op1 = new Op('+'); + $op1->chars = 5; + $op2 = new Op(); + Changeset::copyOp($op1, $op2); + self::assertSame('+', $op2->opcode); + self::assertSame(5, $op2->chars); + } + + public function testOpsFromTextSimple(): void + { + $ops = iterator_to_array(Changeset::opsFromText('+', 'hello')); + self::assertCount(1, $ops); + self::assertSame('+', $ops[0]->opcode); + self::assertSame(5, $ops[0]->chars); + self::assertSame(0, $ops[0]->lines); + } + + public function testOpsFromTextWithNewlines(): void + { + $ops = iterator_to_array(Changeset::opsFromText('=', "hello\nworld")); + // Should split into two ops: one for "hello\n" (with newline) and one for "world" + self::assertCount(2, $ops); + self::assertSame('=', $ops[0]->opcode); + self::assertSame(6, $ops[0]->chars); // "hello\n" + self::assertSame(1, $ops[0]->lines); + self::assertSame('=', $ops[1]->opcode); + self::assertSame(5, $ops[1]->chars); // "world" + self::assertSame(0, $ops[1]->lines); + } + + public function testMakeSpliceNegativeStartThrows(): void + { + $this->expectException(\RangeException::class); + Changeset::makeSplice('hello', -1, 0, ''); + } + + public function testMakeSpliceNegativeNdelThrows(): void + { + $this->expectException(\RangeException::class); + Changeset::makeSplice('hello', 0, -1, ''); + } + + public function testBuilderInsert(): void + { + $builder = new Builder(5); + $builder->keepText('hello'); + $builder->insert(' world'); + $cs = (string) $builder; + $result = Changeset::applyToText($cs, 'hello'); + self::assertSame('hello world', $result); + } + + public function testBuilderRemove(): void + { + $builder = new Builder(5); + $builder->remove(5); + $cs = (string) $builder; + $result = Changeset::applyToText($cs, 'hello'); + self::assertSame('', $result); + } + + public function testBuilderKeep(): void + { + // Trailing keep ops are implicit - keeping 3 chars in a 5-char document + // does not truncate unless you also explicitly remove the remaining 2 chars. + $builder = new Builder(5); + $builder->keep(3); + $builder->remove(2); + $cs = (string) $builder; + $result = Changeset::applyToText($cs, 'hello'); + self::assertSame('hel', $result); + } + + public function testAttributePool(): void + { + $pool = new AttributePool(); + $num = $pool->putAttrib(['bold', 'true']); + self::assertSame(0, $num); + + // Re-adding same attribute should return same number + $num2 = $pool->putAttrib(['bold', 'true']); + self::assertSame(0, $num2); + + // Adding new attribute + $num3 = $pool->putAttrib(['italic', 'true']); + self::assertSame(1, $num3); + + self::assertSame(['bold', 'true'], $pool->getAttrib(0)); + self::assertSame(['italic', 'true'], $pool->getAttrib(1)); + self::assertNull($pool->getAttrib(5)); + + self::assertSame('bold', $pool->getAttribKey(0)); + self::assertSame('true', $pool->getAttribValue(0)); + } + + public function testAttributePoolDontAdd(): void + { + $pool = new AttributePool(); + $num = $pool->putAttrib(['bold', 'true'], true); + self::assertSame(-1, $num); // Not found and not added + + // Now add it + $pool->putAttrib(['bold', 'true']); + $num2 = $pool->putAttrib(['bold', 'true'], true); + self::assertSame(0, $num2); // Found + } + + public function testAttributePoolJsonable(): void + { + $pool = new AttributePool(); + $pool->putAttrib(['bold', 'true']); + $pool->putAttrib(['italic', 'true']); + + $json = $pool->toJsonable(); + self::assertArrayHasKey('numToAttrib', $json); + self::assertArrayHasKey('nextNum', $json); + self::assertSame(2, $json['nextNum']); + + $pool2 = new AttributePool(); + $pool2->fromJsonable($json); + self::assertSame(['bold', 'true'], $pool2->getAttrib(0)); + self::assertSame(['italic', 'true'], $pool2->getAttrib(1)); + } + + public function testAttributePoolEachAttrib(): void + { + $pool = new AttributePool(); + $pool->putAttrib(['bold', 'true']); + $pool->putAttrib(['italic', 'false']); + + $attribs = []; + $pool->eachAttrib(function (string $key, string $value) use (&$attribs): void { + $attribs[$key] = $value; + }); + + self::assertSame(['bold' => 'true', 'italic' => 'false'], $attribs); + } + + public function testAttributePoolClone(): void + { + $pool = new AttributePool(); + $pool->putAttrib(['bold', 'true']); + + $clone = $pool->clone(); + $clone->putAttrib(['italic', 'true']); + + // Original should not be modified + self::assertNull($pool->getAttrib(1)); + // Clone should have both + self::assertSame(['bold', 'true'], $clone->getAttrib(0)); + self::assertSame(['italic', 'true'], $clone->getAttrib(1)); + } + + public function testUnpackInvalidChangeset(): void + { + $this->expectException(\RuntimeException::class); + Changeset::unpack('invalid'); + } + + public function testEachAttribNumber(): void + { + $cs = Changeset::pack(5, 8, '*0*1+3', 'abc'); + $attribNums = []; + Changeset::eachAttribNumber($cs, function (int $num) use (&$attribNums): void { + $attribNums[] = $num; + }); + self::assertSame([0, 1], $attribNums); + } + + public function testMoveOpsToNewPool(): void + { + $oldPool = new AttributePool(); + $oldPool->putAttrib(['bold', 'true']); // num 0 + $oldPool->putAttrib(['italic', 'true']); // num 1 + + $newPool = new AttributePool(); + $newPool->putAttrib(['color', 'red']); // num 0 in new pool + $newPool->putAttrib(['bold', 'true']); // num 1 in new pool + + $cs = Changeset::pack(5, 8, '*0*1+3', 'abc'); + $newCs = Changeset::moveOpsToNewPool($cs, $oldPool, $newPool); + // bold was 0 in old pool -> 1 in new pool + // italic was 1 in old pool -> 2 in new pool (new entry) + self::assertStringContainsString('*1*2', $newCs); + } + + /** + * Tests writing text into a pad-like scenario: inserting text at different positions + * and building up the document step by step. + */ + public function testWritingTextIntoPad(): void + { + // Start with an empty pad (just a newline, as etherpad always has at least one newline) + $padText = "\n"; + + // Step 1: Insert "Hello World" before the trailing newline + $cs1 = Changeset::makeSplice($padText, 0, 0, 'Hello World'); + $padText = Changeset::applyToText($cs1, $padText); + self::assertSame("Hello World\n", $padText); + + // Step 2: Append a new line + $cs2 = Changeset::makeSplice($padText, strlen($padText) - 1, 0, "\nSecond line"); + $padText = Changeset::applyToText($cs2, $padText); + self::assertSame("Hello World\nSecond line\n", $padText); + + // Step 3: Modify text in the middle + $cs3 = Changeset::makeSplice($padText, 6, 5, 'PHP'); + $padText = Changeset::applyToText($cs3, $padText); + self::assertSame("Hello PHP\nSecond line\n", $padText); + + // Step 4: Delete the second line (12 chars: "Second line\n" starts at position 10) + $cs4 = Changeset::makeSplice($padText, 10, 12, ''); + $padText = Changeset::applyToText($cs4, $padText); + self::assertSame("Hello PHP\n", $padText); + } +} From e2a3f86958e70804c848f88ace8ef9153ce873c5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 9 Mar 2026 22:23:58 +0000 Subject: [PATCH 3/3] Add POC etherpad-writer.php script; fix AttributePool JSON serialization for wire protocol Co-authored-by: Gared <913630+Gared@users.noreply.github.com> --- bin/etherpad-writer.php | 254 +++++++++++++++++++++++++ src/Changeset/AttributePool.php | 23 ++- tests/Unit/Changeset/ChangesetTest.php | 3 + 3 files changed, 275 insertions(+), 5 deletions(-) create mode 100644 bin/etherpad-writer.php diff --git a/bin/etherpad-writer.php b/bin/etherpad-writer.php new file mode 100644 index 0000000..632d1be --- /dev/null +++ b/bin/etherpad-writer.php @@ -0,0 +1,254 @@ +#!/usr/bin/env php + + * + * Example: + * php etherpad-writer.php https://etherpad.example.com my-pad + * php etherpad-writer.php https://demo.etherpad.org test-pad-123 + */ + +declare(strict_types=1); + +require __DIR__ . '/../vendor/autoload.php'; + +use ElephantIO\Client as ElephantClient; +use ElephantIO\Engine\SocketIO; +use Gared\EtherScan\Changeset\AttributePool; +use Gared\EtherScan\Changeset\Changeset; +use GuzzleHttp\Client as HttpClient; +use GuzzleHttp\Cookie\CookieJar; +use GuzzleHttp\RequestOptions; + +// --------------------------------------------------------------------------- +// CLI argument parsing +// --------------------------------------------------------------------------- + +if ($argc < 3) { + fwrite(STDERR, "Usage: php etherpad-writer.php \n"); + fwrite(STDERR, "Example: php etherpad-writer.php https://etherpad.example.com my-pad\n"); + exit(1); +} + +$baseUrl = rtrim($argv[1], '/') . '/'; +$padId = $argv[2]; +$insertText = 'Hello world'; + +echo "=== Etherpad Pad Writer (Proof of Concept) ===\n"; +echo "Server : {$baseUrl}\n"; +echo "Pad : {$padId}\n"; +echo "Insert : \"{$insertText}\" at the middle of the current text\n\n"; + +// --------------------------------------------------------------------------- +// Step 1: Visit the pad URL to obtain a session cookie +// --------------------------------------------------------------------------- + +$cookies = new CookieJar(); +$httpClient = new HttpClient([ + 'timeout' => 10.0, + 'connect_timeout' => 5.0, + RequestOptions::HEADERS => [ + 'User-Agent' => 'EtherpadPadWriter/1.0', + ], + 'verify' => false, +]); + +echo "[1/4] Fetching pad page to obtain session cookie...\n"; +try { + $httpClient->get($baseUrl . 'p/' . $padId, ['cookies' => $cookies]); +} catch (\Throwable $e) { + // Some pads may return non-200 but still set a cookie; continue + echo " (note: HTTP request returned: " . $e->getMessage() . ")\n"; +} + +$cookieString = ''; +foreach ($cookies as $cookie) { + $cookieString .= $cookie->getName() . '=' . $cookie->getValue() . ';'; +} +echo " Cookies: " . ($cookieString !== '' ? $cookieString : '(none)') . "\n\n"; + +// --------------------------------------------------------------------------- +// Step 2: Connect via Socket.IO and send CLIENT_READY +// --------------------------------------------------------------------------- + +echo "[2/4] Connecting via Socket.IO...\n"; + +$token = 't.' . bin2hex(random_bytes(16)); + +$socketClient = new ElephantClient( + ElephantClient::engine(ElephantClient::CLIENT_4X, $baseUrl . 'socket.io/', [ + 'persistent' => false, + 'context' => [ + 'ssl' => [ + 'verify_peer' => false, + 'verify_peer_name' => false, + ], + ], + 'headers' => [ + 'Cookie' => $cookieString, + ], + ]) +); + +$socketClient->connect(); +$socketClient->of('/'); + +$socketClient->emit('message', [ + 'component' => 'pad', + 'type' => 'CLIENT_READY', + 'padId' => $padId, + 'sessionID' => 'null', + 'token' => $token, + 'password' => null, + 'protocolVersion' => 2, +]); + +echo " Connected and CLIENT_READY sent.\n\n"; + +// --------------------------------------------------------------------------- +// Step 3: Wait for CLIENT_VARS to get the current pad state +// --------------------------------------------------------------------------- + +echo "[3/4] Waiting for CLIENT_VARS...\n"; + +$currentText = null; +$currentRev = null; +$serverApool = null; + +while ($result = $socketClient->wait('message', 5)) { + if (!is_array($result->data)) { + continue; + } + + $msg = $result->data; + + // Skip COLLABROOM/CUSTOM messages (e.g. plugin broadcasts) + if (isset($msg['data']['type']) && $msg['data']['type'] === 'CUSTOM') { + continue; + } + + // The CLIENT_VARS message carries access status + $accessStatus = $msg['accessStatus'] ?? null; + if ($accessStatus === 'deny') { + fwrite(STDERR, "ERROR: Pad access denied. The pad may require authentication.\n"); + $socketClient->disconnect(); + exit(1); + } + + // CLIENT_VARS message: {type:'CLIENT_VARS', data: clientVars} + $type = $msg['type'] ?? null; + if ($type !== 'CLIENT_VARS') { + // Not CLIENT_VARS yet, keep waiting + continue; + } + + $clientVars = $msg['data']; + $collabClientVars = $clientVars['collab_client_vars'] ?? null; + + if ($collabClientVars === null) { + fwrite(STDERR, "ERROR: Unexpected CLIENT_VARS structure (missing collab_client_vars).\n"); + $socketClient->disconnect(); + exit(1); + } + + $currentRev = (int) $collabClientVars['rev']; + $currentText = $collabClientVars['initialAttributedText']['text']; + $serverApool = $collabClientVars['apool']; // {numToAttrib: {...}, nextNum: N} + + echo " Current revision : {$currentRev}\n"; + echo " Current text : " . json_encode($currentText) . "\n"; + break; +} + +if ($currentText === null || $currentRev === null || $serverApool === null) { + fwrite(STDERR, "ERROR: Did not receive CLIENT_VARS within timeout.\n"); + $socketClient->disconnect(); + exit(1); +} + +// --------------------------------------------------------------------------- +// Step 4: Build a changeset that inserts text in the middle, then send it +// --------------------------------------------------------------------------- + +echo "\n[4/4] Building and sending changeset...\n"; + +// Etherpad pads always end with "\n". The text we receive will look like "some text\n". +// We want to insert "Hello world" in the middle of the textual content +// (i.e., halfway through the characters, not counting the final newline). + +$textLength = strlen($currentText); // includes trailing "\n" + +// Find the insertion point: middle of the text (before the trailing newline) +$contentLength = max(0, $textLength - 1); // exclude trailing "\n" +$insertionPoint = (int) ($contentLength / 2); + +echo " Text length : {$textLength} chars\n"; +echo " Insertion point : position {$insertionPoint}\n"; + +// Reconstruct the server's attribute pool so we can pass it to moveOpsToNewPool if needed. +// For a simple plain-text insertion with no attributes, we use an empty local pool. +$localPool = new AttributePool(); + +// Build the changeset: keep the first half, insert the new text, keep the rest. +$changeset = Changeset::makeSplice($currentText, $insertionPoint, 0, $insertText); + +echo " Changeset : {$changeset}\n"; + +// Verify the changeset applies correctly (sanity check) +$resultText = Changeset::applyToText($changeset, $currentText); +echo " Resulting text : " . json_encode($resultText) . "\n"; + +// Send the USER_CHANGES message wrapped in COLLABROOM +$socketClient->emit('message', [ + 'type' => 'COLLABROOM', + 'component' => 'pad', + 'data' => [ + 'type' => 'USER_CHANGES', + 'baseRev' => $currentRev, + 'changeset' => $changeset, + 'apool' => $localPool->toJsonable(), + ], +]); + +echo " USER_CHANGES sent.\n"; + +// Wait for ACCEPT_COMMIT confirmation +$accepted = false; +while ($result = $socketClient->wait('message', 5)) { + if (!is_array($result->data)) { + continue; + } + $msg = $result->data; + $type = $msg['data']['type'] ?? null; + + if ($type === 'ACCEPT_COMMIT') { + $newRev = $msg['data']['newRev'] ?? '?'; + echo " ACCEPT_COMMIT received! New revision: {$newRev}\n"; + $accepted = true; + break; + } + + // Handle disconnect/error response + if (isset($msg['disconnect'])) { + fwrite(STDERR, "ERROR: Server rejected changeset: " . $msg['disconnect'] . "\n"); + $socketClient->disconnect(); + exit(1); + } +} + +$socketClient->disconnect(); + +if ($accepted) { + echo "\n✓ Success! \"{$insertText}\" was inserted at position {$insertionPoint} in pad '{$padId}'.\n"; +} else { + fwrite(STDERR, "\nWARNING: Changeset was sent but no ACCEPT_COMMIT was received within timeout.\n"); + fwrite(STDERR, "The change may have been applied; check the pad manually.\n"); + exit(2); +} diff --git a/src/Changeset/AttributePool.php b/src/Changeset/AttributePool.php index 939769a..3a1208b 100644 --- a/src/Changeset/AttributePool.php +++ b/src/Changeset/AttributePool.php @@ -105,13 +105,18 @@ public function eachAttrib(callable $func): void } /** - * @return array{numToAttrib: array, nextNum: int} - * An object suitable for serialization that can be passed to fromJsonable to reconstruct the pool. + * Returns a representation suitable for JSON encoding for the wire protocol. + * + * The `numToAttrib` field is returned as a \stdClass so that json_encode() serializes it as a + * JSON object ({}) rather than a JSON array ([]), which is required by the Etherpad protocol + * even when the pool is empty. + * + * @return array{numToAttrib: \stdClass, nextNum: int} */ public function toJsonable(): array { return [ - 'numToAttrib' => $this->numToAttrib, + 'numToAttrib' => (object) $this->numToAttrib, 'nextNum' => $this->nextNum, ]; } @@ -119,11 +124,19 @@ public function toJsonable(): array /** * Replace the contents of this attribute pool with values from a previous call to toJsonable. * - * @param array{numToAttrib: array, nextNum: int} $obj + * The `numToAttrib` value may be either a PHP array (from internal round-tripping) or a + * \stdClass instance (returned by toJsonable() or by json_decode without the assoc flag). + * Both forms are accepted and handled transparently. + * + * @param array{numToAttrib: array|\stdClass, nextNum: int} $obj */ public function fromJsonable(array $obj): self { - $this->numToAttrib = $obj['numToAttrib']; + // Accept both \stdClass (from toJsonable()) and plain array (from json_decode with assoc=true) + $raw = $obj['numToAttrib']; + /** @var array $numToAttrib */ + $numToAttrib = $raw instanceof \stdClass ? (array) $raw : $raw; + $this->numToAttrib = $numToAttrib; $this->nextNum = $obj['nextNum']; $this->attribToNum = []; foreach ($this->numToAttrib as $n => $attrib) { diff --git a/tests/Unit/Changeset/ChangesetTest.php b/tests/Unit/Changeset/ChangesetTest.php index ee474f4..7a8c0a1 100644 --- a/tests/Unit/Changeset/ChangesetTest.php +++ b/tests/Unit/Changeset/ChangesetTest.php @@ -321,7 +321,10 @@ public function testAttributePoolJsonable(): void self::assertArrayHasKey('numToAttrib', $json); self::assertArrayHasKey('nextNum', $json); self::assertSame(2, $json['nextNum']); + // numToAttrib must be a stdClass (JSON object) for correct wire protocol serialization + self::assertInstanceOf(\stdClass::class, $json['numToAttrib']); + // fromJsonable accepts the result of toJsonable() directly (no manual cast needed) $pool2 = new AttributePool(); $pool2->fromJsonable($json); self::assertSame(['bold', 'true'], $pool2->getAttrib(0));