Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
70 changes: 70 additions & 0 deletions exasol/toolbox/util/workflows/formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import re

from yaml import SafeDumper
from yaml.resolver import Resolver

YAML_TAG = "tag:yaml.org,2002"
YAML_TAG_STR = f"{YAML_TAG}:str"

# Regex for common strings in YAML that lose quotes:
# 1. Version numbers (e.g., 2.3.0, 3.10)
# 2. OS/image names (e.g., ubuntu-24.04)
# 3. Numeric strings that look like octals or floats (e.g., 045, 1.2)
# This is important, as without the quotes, GitHub will misinterpret versions
# like 3.10 as 3.1, which are not equivalent.
QUOTE_REGEX = re.compile(r"^(\d+\.\d+(\.\d+)?|[a-zA-Z]+-\d+\.\d+|0\d+)$")

# YAML uses a shorthand to identify "on" and "off" tags.
# For GitHub workflows, we do NOT want "on" replaced with "True".
for character in ["O", "o"]:
Resolver.yaml_implicit_resolvers[character] = [
x
for x in Resolver.yaml_implicit_resolvers[character]
if x[0] != "tag:yaml.org,2002:bool"
]


class GitHubDumper(SafeDumper):
pass


def empty_representer(dumper: SafeDumper, data):
"""
Leave empty fields like empty, instead of adding "null".

Without using `empty_representer`
on:
workflow_call: null

Using `empty_representer`
on:
workflow_call:
"""
return dumper.represent_scalar(f"{YAML_TAG}:null", "")


def str_presenter(dumper: SafeDumper, data):
"""
Represent a string in a custom format compatible with GitHub.
"""
# For line breaks in a multiline step, use pipe "|" instead of quotes "'"
if "\n" in data:
# Ensure it ends with \n so PyYAML doesn't add the '-' strip indicator
if not data.endswith("\n"):
data += "\n"
return dumper.represent_scalar(YAML_TAG_STR, data, style="|")

# For strings with versions, ensure that they are quoted '"' so that they
# are not incorrectly parsed in the workflow, e.g. to an integer instead of a float.
if QUOTE_REGEX.match(data):
return dumper.represent_scalar(YAML_TAG_STR, data, style='"')

# For cases where GitHub secrets or variables are used, these should be quoted.
if data.startswith("${{") or data.startswith("__"):
return dumper.represent_scalar(YAML_TAG_STR, data, style='"')

return dumper.represent_scalar(YAML_TAG_STR, data)


GitHubDumper.add_representer(str, str_presenter)
GitHubDumper.add_representer(type(None), empty_representer)
91 changes: 91 additions & 0 deletions exasol/toolbox/util/workflows/template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from dataclasses import dataclass
from inspect import cleandoc
from itertools import count
from re import (
MULTILINE,
sub,
)
from typing import Any

from jinja2 import Environment
from yaml import (
dump,
safe_load,
)

from exasol.toolbox.util.workflows.formatting import GitHubDumper

jinja_env = Environment(
variable_start_string="((", variable_end_string="))", autoescape=True
)


@dataclass(frozen=True)
class TemplateToWorkflow:
template_str: str
github_template_dict: dict[str, Any]
_comment_id: count = count(0)

def _render_with_jinja(self, input_str: str) -> str:
"""
Render the template with Jinja.
"""
jinja_template = jinja_env.from_string(input_str)
return jinja_template.render(self.github_template_dict)

def _convert_comment_to_key_pair(self, input_str: str) -> str:
"""
Convert a comment to a key-pair, which is parseable by PyYaml.

Example:
# Comment 1
build-job:
....

__com_1: "Comment 1"
build-job:
....

Case where it does not work:
schedule:
# At 00:00 on every 7th day-of-month from 1 through 31. (https://crontab.guru)
- cron: "0 0 1/7 * *"

Here the replacement comment would need to start with a - to be valid YAML.
This is possible to do, but the code would be more complicated, as it is
not guaranteed that the next line starts with a -.
"""

def comment_to_key(match):
indent = match.group(1)
content = match.group(2)
return f'{indent}_com_{next(self._comment_id)}: "{content}"'

pattern = r"(^\s*)#\s*(.*)"
return sub(pattern, comment_to_key, input_str, flags=MULTILINE)

@staticmethod
def _convert_key_pair_to_comment(input_str: str) -> str:
"""
Convert a special key-pair back to a comment. This performs the reverse
operation of :meth:`_convert_comment_to_tag`.
"""
pattern = r"(^\s*)_com_\d+:\s*(.*)"
return sub(pattern, r"\1# \2", input_str, flags=MULTILINE)

def convert(self) -> str:
"""
Convert a workflow template to a rendered workflow that works for GitHub.
"""

workflow_string = self._render_with_jinja(self.template_str)
workflow_string = self._convert_comment_to_key_pair(workflow_string)
workflow_dict = safe_load(workflow_string)
workflow_string = dump(
workflow_dict,
Dumper=GitHubDumper,
width=200, # To prevent longer lines from wrapping
sort_keys=False, # if True, then re-orders the jobs alphabetically
)
workflow_string = self._convert_key_pair_to_comment(workflow_string)
return cleandoc(workflow_string)
46 changes: 46 additions & 0 deletions exasol/toolbox/util/workflows/template_alternate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from dataclasses import dataclass
from itertools import count
from typing import Any

from jinja2 import Environment

jinja_env = Environment(
variable_start_string="((", variable_end_string="))", autoescape=True
)

import io
from inspect import cleandoc

from ruamel.yaml import YAML


@dataclass(frozen=True)
class TemplateToWorkflow:
template_str: str
github_template_dict: dict[str, Any]
_comment_id: count = count(0)

def _render_with_jinja(self, input_str: str) -> str:
"""
Render the template with Jinja.
"""
jinja_template = jinja_env.from_string(input_str)
return jinja_template.render(self.github_template_dict)

def convert(self) -> str:
"""
Convert a workflow template to a rendered workflow that works for GitHub.
"""
yaml = YAML()
yaml.width = 200
yaml.preserve_quotes = True
yaml.sort_base_mapping_type_on_output = False # Ensures keys stay in order
yaml.indent(mapping=2, sequence=4, offset=2)

workflow_string = self._render_with_jinja(self.template_str)
workflow_dict = yaml.load(workflow_string)

stream = io.StringIO()
yaml.dump(workflow_dict, stream)
workflow_string = stream.getvalue()
return cleandoc(workflow_string)
30 changes: 30 additions & 0 deletions exasol/toolbox/util/workflows/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from pathlib import Path
from typing import Any

from pydantic import (
BaseModel,
ConfigDict,
)

from exasol.toolbox.util.workflows.template import TemplateToWorkflow


class Workflow(BaseModel):
model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)

content: str

@classmethod
def load_from_template(cls, file_path: Path, github_template_dict: dict[str, Any]):
if not file_path.exists():
raise FileNotFoundError(file_path)

try:
raw_content = file_path.read_text()
template_to_workflow = TemplateToWorkflow(
template_str=raw_content, github_template_dict=github_template_dict
)
workflow = template_to_workflow.convert()
return cls(content=workflow)
except Exception as e:
raise ValueError(f"Error rendering file: {str(e)}")
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dependencies = [
"pytest>=7.2.2,<10",
"pyupgrade>=2.38.2,<4.0.0",
"pyyaml (>=6.0.3,<7.0.0)",
"ruamel-yaml (>=0.18.0,<=0.18.16)",
"ruff>=0.14.5,<0.15",
"shibuya>=2024.5.14",
"sphinx>=5.3,<8",
Expand Down
File renamed without changes.
132 changes: 132 additions & 0 deletions test/unit/util/workflows/formatting_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from inspect import cleandoc

from yaml import (
dump,
safe_load,
)

from exasol.toolbox.util.workflows.formatting import GitHubDumper


class TestEmptyRepresenter:
documentation = """
name: Merge-Gate
on:
workflow_call:
"""

def test_works_as_expected(self):
data = safe_load(cleandoc(self.documentation))
output = dump(
data,
Dumper=GitHubDumper,
)
assert output == cleandoc(self.documentation) + "\n"

def test_default_behavior_differs(self):
expected = cleandoc(
"""
name: Merge-Gate
on:
workflow_call: null
"""
)

data = safe_load(cleandoc(self.documentation))

output = dump(data)
assert output == expected + "\n"


class TestStrPresenter:
doc_with_line_break = """
steps:
- name: Generate GitHub Summary
run: |
echo -e "# Summary" >> $GITHUB_STEP_SUMMARY
poetry run -- nox -s project:report -- --format markdown >> $GITHUB_STEP_SUMMARY
"""
doc_with_version = """
steps:
- name: Setup Python & Poetry Environment
uses: exasol/python-toolbox/.github/actions/python-environment@v5
with:
python-version: "3.10"
poetry-version: "2.3.0"
"""
doc_with_github_secrets = """
steps:
- name: PyPi Release
env:
POETRY_HTTP_BASIC_PYPI_USERNAME: "__token__"
POETRY_HTTP_BASIC_PYPI_PASSWORD: "${{ secrets.PYPI_TOKEN }}"
run: poetry publish
"""

def test_line_break_works_as_expected(self):
data = safe_load(cleandoc(self.doc_with_line_break))
output = dump(
data,
Dumper=GitHubDumper,
)
assert output == cleandoc(self.doc_with_line_break) + "\n"

def test_line_break_with_default_differs(self):
data = safe_load(cleandoc(self.doc_with_line_break))
output = dump(data)
assert output == (
"steps:\n"
"- name: Generate GitHub Summary\n"
' run: \'echo -e "# Summary" >> $GITHUB_STEP_SUMMARY\n'
"\n"
" poetry run -- nox -s project:report -- --format markdown >> "
"$GITHUB_STEP_SUMMARY'\n"
)

def test_quote_regex_works_as_expected(self):
data = safe_load(cleandoc(self.doc_with_version))
output = dump(
data,
Dumper=GitHubDumper,
sort_keys=False, # if True, then re-orders the jobs alphabetically
)
assert output == cleandoc(self.doc_with_version) + "\n"

def test_quote_regex_with_default_differs(self):
data = safe_load(cleandoc(self.doc_with_version))
output = dump(
data,
sort_keys=False, # if True, then re-orders the jobs alphabetically
)
assert output == (
"steps:\n"
"- name: Setup Python & Poetry Environment\n"
" uses: exasol/python-toolbox/.github/actions/python-environment@v5\n"
" with:\n"
" python-version: '3.10'\n"
" poetry-version: 2.3.0\n"
)

def test_quote_github_secrets_works_as_expected(self):
data = safe_load(cleandoc(self.doc_with_github_secrets))
output = dump(
data,
Dumper=GitHubDumper,
sort_keys=False, # if True, then re-orders the jobs alphabetically
)
assert output == cleandoc(self.doc_with_github_secrets) + "\n"

def test_quote_github_secrets_with_default_differs(self):
data = safe_load(cleandoc(self.doc_with_github_secrets))
output = dump(
data,
sort_keys=False, # if True, then re-orders the jobs alphabetically
)
assert output == (
"steps:\n"
"- name: PyPi Release\n"
" env:\n"
" POETRY_HTTP_BASIC_PYPI_USERNAME: __token__\n"
" POETRY_HTTP_BASIC_PYPI_PASSWORD: ${{ secrets.PYPI_TOKEN }}\n"
" run: poetry publish\n"
)
Loading
Loading