diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md index fb4737052..131f1a3db 100644 --- a/doc/changes/unreleased.md +++ b/doc/changes/unreleased.md @@ -1,3 +1,7 @@ # Unreleased ## Summary + +## Refactorings + +* #763: Parse and Manipulate Changes Files diff --git a/exasol/toolbox/util/release/markdown.py b/exasol/toolbox/util/release/markdown.py new file mode 100644 index 000000000..ffd6daf55 --- /dev/null +++ b/exasol/toolbox/util/release/markdown.py @@ -0,0 +1,195 @@ +""" +Class Markdown represents a file in markdown syntax with some additional +constraints: + +* The file must start with a title in the first line. +* Each subsequent title must be of a higher level, ie. start with more "#" + characters than the top-level title. + +Each title starts a section, optionally containing an additional intro and a +bullet list of items. + +Each section can also contain subsections as children, hence sections can be +nested up to the top-level section representing the whole file. +""" + +from __future__ import annotations + +import io +from pathlib import Path + + +class ParseError(Exception): + """ + Indicates inconsistencies when parsing a changelog from raw + text. E.g. a section with a body but no title. + """ + + +class IllegalChild(Exception): + """ + When adding a child to a parent with higher level title. + """ + + +def is_title(line: str) -> bool: + return bool(line) and line.startswith("#") + + +def is_list_item(line: str) -> bool: + return bool(line) and (line.startswith("*") or line.startswith("-")) + + +def is_intro(line: str) -> bool: + return bool(line) and not is_title(line) and not is_list_item(line) + + +def level(title: str) -> int: + """ + Return the hierarchical level of the title, i.e. the number of "#" + chars at the beginning of the title. + """ + return len(title) - len(title.lstrip("#")) + + +class Markdown: + """ + Represents a Markdown file or a section within a Markdown file. + """ + + def __init__( + self, + title: str, + intro: str = "", + items: str = "", + children: list[Markdown] | None = None, + ): + self.title = title.rstrip("\n") + self.intro = intro + self.items = items + children = children or [] + for child in children: + self._check(child) + self.children = children + + def can_contain(self, child: Markdown) -> bool: + return level(self.title) < level(child.title) + + def find(self, child_title: str) -> tuple[int, Markdown] | None: + """ + Return index and child having the specified title, or None if + there is none. + """ + for i, child in enumerate(self.children): + if child.title == child_title: + return i, child + return None + + def child(self, title: str) -> Markdown | None: + """ + Retrieve the child with the specified title. + """ + return found[1] if (found := self.find(title)) else None + + def _check(self, child: Markdown) -> Markdown: + if not self.can_contain(child): + raise IllegalChild( + f'Markdown section "{self.title}" cannot have "{child.title}" as child.' + ) + return child + + def add_child(self, child: Markdown, pos: int = 1) -> Markdown: + """ + Insert the specified section as child at the specified position. + """ + + self.children.insert(pos, self._check(child)) + return self + + def replace_or_append_child(self, child: Markdown) -> Markdown: + """ + If there is a child with the same title then replace this child + otherwise append the specified child. + """ + + self._check(child) + if found := self.find(child.title): + self.children[found[0]] = child + else: + self.children.append(child) + return self + + @property + def rendered(self) -> str: + def elements(): + yield from (self.title, self.intro, self.items) + yield from (c.rendered for c in self.children) + + return "\n\n".join(e for e in elements() if e) + + def __eq__(self, other) -> bool: + return ( + isinstance(other, Markdown) + and other.title == self.title + and other.intro == self.intro + and other.items == self.items + and other.children == self.children + ) + + def __str__(self) -> str: + return self.rendered + + @classmethod + def read(cls, file: Path) -> Markdown: + """ + Parse Markdown instance from the provided file. + """ + + with file.open("r") as stream: + return cls.parse(stream) + + @classmethod + def from_text(cls, text: str) -> Markdown: + """ + Parse Markdown instance from the provided text. + """ + + return cls.parse(io.StringIO(text)) + + @classmethod + def parse(cls, stream: io.TextIOBase) -> Markdown: + """ + Parse Markdown instance from the provided stream. + """ + + line = stream.readline() + if not is_title(line): + raise ParseError( + f'First line of markdown file must be a title, but is "{line}"' + ) + + section, line = cls._parse(stream, line) + if not line: + return section + raise ParseError( + f'Found additional line "{line}" after top-level section "{section.title}".' + ) + + @classmethod + def _parse(cls, stream: io.TextIOBase, title: str) -> tuple[Markdown, str]: + intro = "" + items = "" + children = [] + + line = stream.readline() + while is_intro(line): + intro += line + line = stream.readline() + if is_list_item(line): + while line and not is_title(line): + items += line + line = stream.readline() + while is_title(line) and level(title) < level(line): + child, line = Markdown._parse(stream, title=line) + children.append(child) + return Markdown(title, intro.strip("\n"), items.strip("\n"), children), line diff --git a/test/unit/util/release/markdown_test.py b/test/unit/util/release/markdown_test.py new file mode 100644 index 000000000..b3fba3956 --- /dev/null +++ b/test/unit/util/release/markdown_test.py @@ -0,0 +1,308 @@ +from inspect import cleandoc + +import pytest + +from exasol.toolbox.util.release.markdown import ( + IllegalChild, + Markdown, + ParseError, +) + + +def _markdown(text: str) -> Markdown: + return Markdown.from_text(cleandoc(text)) + + +class Scenario: + def __init__( + self, initial: str, expected_output: str, expected_children: list[str] + ): + self.initial = cleandoc(initial) + self.expected_output = cleandoc(expected_output) + self.expected_children = expected_children + + def create_testee(self) -> Markdown: + return Markdown.from_text(self.initial) + + +MINIMAL = Scenario( + initial=""" + # title + body + """, + expected_output=""" + # title + + body + """, + expected_children=[], +) + +FULL = Scenario( + initial=""" + # title + intro + * item one + * item two + ## Child + cintro + - item c1 + - item c2 + """, + expected_output=""" + # title + + intro + + * item one + * item two + + ## Child + + cintro + + - item c1 + - item c2 + """, + expected_children=["## Child"], +) + +TWO_CHILDREN = Scenario( + initial=""" + # Parent + text + ## C1 + aaa + ## C2 + bbb + """, + expected_output=""" + # Parent + + text + + ## C1 + + aaa + + ## C2 + + bbb + """, + expected_children=["## C1", "## C2"], +) + + +NESTED = Scenario( + initial=""" + # Parent + text + ## Child A + aaa + ### Grand Child + ccc + ## Child B + bbb + """, + expected_output=""" + # Parent + + text + + ## Child A + + aaa + + ### Grand Child + + ccc + + ## Child B + + bbb + """, + expected_children=["## Child A", "## Child B"], +) + +CHILD = _markdown(""" + ## Sample Child + child intro. + """) + +ILLEGAL_CHILD = _markdown(""" + # Top-level + intro + """) + + +def test_no_title_error(): + with pytest.raises(ParseError, match="First line of markdown file must be a title"): + Markdown.from_text("body\n# title") + + +def test_additional_line_error(): + invalid_markdown = cleandoc(""" + # Title + Some text. + # Another Title + """) + + expected_error = ( + 'additional line "# Another Title" after top-level section "# Title".' + ) + with pytest.raises(ParseError, match=expected_error): + Markdown.from_text(invalid_markdown) + + +def test_constructor_illegal_child(): + with pytest.raises(IllegalChild): + Markdown("# title", children=[ILLEGAL_CHILD]) + + +@pytest.mark.parametrize( + "content, expected", + [ + pytest.param( + """ + # title + """, + Markdown("# title"), + id="only_title", + ), + pytest.param( + """ + # title + intro + """, + Markdown("# title", "intro"), + id="intro", + ), + pytest.param( + """ + # title + * item 1 + """, + Markdown("# title", "", "* item 1"), + id="items", + ), + pytest.param( + """ + # title + intro + * item 1 + * item 2 + """, + Markdown("# title", "intro", "* item 1\n* item 2"), + id="intro_and_items", + ), + pytest.param( + """ + # title + intro + - item 1 + - item 2 + """, + Markdown("# title", "intro", "- item 1\n- item 2"), + id="intro_dash_items", + ), + ], +) +def test_equals(content: str, expected: Markdown) -> None: + assert Markdown.from_text(cleandoc(content)) == expected + + +@pytest.mark.parametrize( + "attr, value", + [ + ("title", "# other"), + ("intro", "other"), + ("items", "- aaa"), + ("children", []), + ], +) +def test_different(attr, value) -> None: + testee = FULL.create_testee() + other = FULL.create_testee() + setattr(other, attr, value) + assert testee != other + + +def test_test_read(tmp_path) -> None: + file = tmp_path / "sample.md" + file.write_text(MINIMAL.initial) + assert Markdown.read(file) == MINIMAL.create_testee() + + +ALL_SCENARIOS = [MINIMAL, FULL, TWO_CHILDREN, NESTED] + + +@pytest.mark.parametrize("scenario", ALL_SCENARIOS) +def test_number_of_children(scenario: Scenario): + assert len(scenario.create_testee().children) == len(scenario.expected_children) + + +@pytest.mark.parametrize("scenario", ALL_SCENARIOS) +def test_non_existing_child(scenario: Scenario): + assert scenario.create_testee().child("non existing") is None + + +@pytest.mark.parametrize("scenario", ALL_SCENARIOS) +def test_valid_child(scenario: Scenario): + assert all(scenario.create_testee().child(c) for c in scenario.expected_children) + + +@pytest.mark.parametrize("scenario", ALL_SCENARIOS) +def test_rendered(scenario: Scenario): + assert scenario.create_testee().rendered == scenario.expected_output + + +@pytest.mark.parametrize( + "scenario, pos", + [ + (MINIMAL, 0), + (FULL, 1), + (TWO_CHILDREN, 1), + ], +) +def test_add_child(scenario: Scenario, pos: int): + testee = scenario.create_testee() + testee.add_child(CHILD) + assert testee.children[pos] == CHILD + + +def test_replace_illegal_child(): + testee = FULL.create_testee() + with pytest.raises(IllegalChild): + testee.replace_or_append_child(ILLEGAL_CHILD) + + +@pytest.mark.parametrize("scenario", ALL_SCENARIOS) +def test_replace_existing_child(scenario: Scenario): + testee = FULL.create_testee() + old_child = testee.children[0] + old_rendered = testee.rendered + new_child = Markdown(old_child.title, "new intro") + expected = old_rendered.replace(old_child.rendered, new_child.rendered) + testee.replace_or_append_child(new_child) + assert testee.rendered == expected + + +@pytest.mark.parametrize("scenario", ALL_SCENARIOS) +def test_replace_non_existing_child(scenario: Scenario): + testee = scenario.create_testee() + expected = len(testee.children) + 1 + testee.replace_or_append_child(CHILD) + assert len(testee.children) == expected + assert testee.children[-1] == CHILD + + +@pytest.mark.parametrize("scenario", ALL_SCENARIOS) +def test_add_illegal_child(scenario: Scenario): + testee = scenario.create_testee() + with pytest.raises(IllegalChild): + testee.add_child(ILLEGAL_CHILD) + + +def test_nested(): + testee = NESTED.create_testee() + assert testee.child("## Child A").child("### Grand Child") is not None