Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sbom/sbom/cmd_graph/savedcmd_parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only OR MIT
# Copyright (C) 2025 TNG Technology Consulting GmbH

from sbom.cmd_graph.savedcmd_parser.savedcmd_parser import parse_inputs_from_commands

__all__ = ["parse_inputs_from_commands"]
124 changes: 124 additions & 0 deletions sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# SPDX-License-Identifier: GPL-2.0-only OR MIT
# Copyright (C) 2025 TNG Technology Consulting GmbH

import re
from dataclasses import dataclass


# If Block pattern to match a simple, single-level if-then-fi block. Nested If blocks are not supported.
IF_BLOCK_PATTERN = re.compile(
r"""
^if(.*?);\s* # Match 'if <condition>;' (non-greedy)
then(.*?);\s* # Match 'then <body>;' (non-greedy)
fi\b # Match 'fi'
""",
re.VERBOSE,
)


@dataclass
class IfBlock:
condition: str
then_statement: str


def _unwrap_outer_parentheses(s: str) -> str:
s = s.strip()
if not (s.startswith("(") and s.endswith(")")):
return s

count = 0
for i, char in enumerate(s):
if char == "(":
count += 1
elif char == ")":
count -= 1
# If count is 0 before the end, outer parentheses don't match
if count == 0 and i != len(s) - 1:
return s

# outer parentheses do match, unwrap once
return _unwrap_outer_parentheses(s[1:-1])


def _find_first_top_level_command_separator(
commands: str, separators: list[str] = [";", "&&"]
) -> tuple[int | None, int | None]:
in_single_quote = False
in_double_quote = False
in_curly_braces = 0
in_braces = 0
for i, char in enumerate(commands):
if char == "'" and not in_double_quote:
# Toggle single quote state (unless inside double quotes)
in_single_quote = not in_single_quote
elif char == '"' and not in_single_quote:
# Toggle double quote state (unless inside single quotes)
in_double_quote = not in_double_quote

if in_single_quote or in_double_quote:
continue

# Toggle braces state
if char == "{":
in_curly_braces += 1
if char == "}":
in_curly_braces -= 1

if char == "(":
in_braces += 1
if char == ")":
in_braces -= 1

if in_curly_braces > 0 or in_braces > 0:
continue

# return found separator position and separator length
for separator in separators:
if commands[i : i + len(separator)] == separator:
return i, len(separator)

return None, None


def split_commands(commands: str) -> list[str | IfBlock]:
"""
Splits a string of command-line commands into individual parts.

This function handles:
- Top-level command separators (e.g., `;` and `&&`) to split multiple commands.
- Conditional if-blocks, returning them as `IfBlock` instances.
- Preserves the order of commands and trims whitespace.

Args:
commands (str): The raw command string.

Returns:
list[str | IfBlock]: A list of single commands or `IfBlock` objects.
"""
single_commands: list[str | IfBlock] = []
remaining_commands = _unwrap_outer_parentheses(commands)
while len(remaining_commands) > 0:
remaining_commands = remaining_commands.strip()

# if block
matched_if = IF_BLOCK_PATTERN.match(remaining_commands)
if matched_if:
condition, then_statement = matched_if.groups()
single_commands.append(IfBlock(condition.strip(), then_statement.strip()))
full_matched = matched_if.group(0)
remaining_commands = remaining_commands.removeprefix(full_matched).lstrip("; \n")
continue

# command until next separator
separator_position, separator_length = _find_first_top_level_command_separator(remaining_commands)
if separator_position is not None and separator_length is not None:
single_commands.append(remaining_commands[:separator_position].strip())
remaining_commands = remaining_commands[separator_position + separator_length :].strip()
continue

# single last command
single_commands.append(remaining_commands)
break

return single_commands
60 changes: 60 additions & 0 deletions sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# SPDX-License-Identifier: GPL-2.0-only OR MIT
# Copyright (C) 2025 TNG Technology Consulting GmbH

from typing import Any
import sbom.sbom_logging as sbom_logging
from sbom.cmd_graph.savedcmd_parser.command_splitter import IfBlock, split_commands
from sbom.cmd_graph.savedcmd_parser.single_command_parsers import SINGLE_COMMAND_PARSERS
from sbom.cmd_graph.savedcmd_parser.tokenizer import CmdParsingError
from sbom.path_utils import PathStr


def parse_inputs_from_commands(commands: str, fail_on_unknown_build_command: bool) -> list[PathStr]:
"""
Extract input files referenced in a set of command-line commands.

Args:
commands (str): Command line expression to parse.
fail_on_unknown_build_command (bool): Whether to fail if an unknown build command is encountered. If False, errors are logged as warnings.

Returns:
list[PathStr]: List of input file paths required by the commands.
"""

def log_error_or_warning(message: str, /, **kwargs: Any) -> None:
if fail_on_unknown_build_command:
sbom_logging.error(message, **kwargs)
else:
sbom_logging.warning(message, **kwargs)

input_files: list[PathStr] = []
for single_command in split_commands(commands):
if isinstance(single_command, IfBlock):
inputs = parse_inputs_from_commands(single_command.then_statement, fail_on_unknown_build_command)
if inputs:
log_error_or_warning(
"Skipped parsing command {then_statement} because input files in IfBlock 'then' statement are not supported",
then_statement=single_command.then_statement,
)
continue

matched_parser = next(
(parser for pattern, parser in SINGLE_COMMAND_PARSERS if pattern.match(single_command)), None
)
if matched_parser is None:
log_error_or_warning(
"Skipped parsing command {single_command} because no matching parser was found",
single_command=single_command,
)
continue
try:
inputs = matched_parser(single_command)
input_files.extend(inputs)
except CmdParsingError as e:
log_error_or_warning(
"Skipped parsing command {single_command} because of command parsing error: {error_message}",
single_command=single_command,
error_message=e.message,
)

return [input.strip().rstrip("/") for input in input_files]
Loading
Loading