-
Notifications
You must be signed in to change notification settings - Fork 11
Lint flow config parser and format checker #118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| # Copyright lowRISC contributors (OpenTitan project). | ||
| # Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """Flow configuration checking.""" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| # Copyright lowRISC contributors (OpenTitan project). | ||
| # Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """Check flow configuration files for validity.""" | ||
|
|
||
| from pathlib import Path | ||
|
|
||
| import hjson | ||
|
|
||
| from dvsim.linting.parser import parse_lint_flow_config | ||
|
|
||
|
|
||
| class FlowCheckError(Exception): | ||
| """Error checking flow configuration.""" | ||
|
|
||
|
|
||
| def detect_flow_type(hjson_path: Path) -> str | None: | ||
| """Detect the flow type from an hjson file. | ||
|
|
||
| Args: | ||
| hjson_path: Path to the hjson configuration file | ||
|
|
||
| Returns: | ||
| Flow type string (e.g., "lint", "sim") or None if not detected | ||
|
|
||
| Raises: | ||
| FlowCheckError: If the file cannot be read or parsed | ||
|
|
||
| """ | ||
| try: | ||
| with hjson_path.open() as f: | ||
| data = hjson.load(f, use_decimal=True) | ||
| except hjson.HjsonDecodeError as e: | ||
| msg = f"Failed to parse hjson: {e}" | ||
| raise FlowCheckError(msg) from e | ||
| except OSError as e: | ||
| msg = f"Failed to read file: {e}" | ||
| raise FlowCheckError(msg) from e | ||
|
|
||
| return data.get("flow") | ||
|
|
||
|
|
||
| def check_flow_config(hjson_path: Path) -> tuple[bool, str, str | None]: | ||
| """Check a flow configuration file for validity. | ||
|
|
||
| Args: | ||
| hjson_path: Path to the hjson configuration file | ||
|
|
||
| Returns: | ||
| Tuple of (success, message, flow_type): | ||
| - success: True if config is valid, False otherwise | ||
| - message: Human-readable message describing the result | ||
| - flow_type: The detected flow type or None | ||
|
|
||
| """ | ||
| hjson_path = Path(hjson_path) | ||
|
|
||
| if not hjson_path.exists(): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we don't squash the errors in the previous function, this could be folded into the next try block. |
||
| return False, f"File not found: {hjson_path}", None | ||
|
|
||
| # Detect flow type | ||
| try: | ||
| flow_type = detect_flow_type(hjson_path) | ||
| except FlowCheckError as e: | ||
| return False, str(e), None | ||
|
|
||
| if flow_type is None: | ||
| return False, "No 'flow' field found in configuration", None | ||
|
|
||
| # Check based on flow type | ||
| if flow_type == "lint": | ||
| try: | ||
| config = parse_lint_flow_config(hjson_path) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is upsetting: it will load and parse the hjson file twice. Can't we change |
||
| except (FileNotFoundError, RuntimeError) as e: | ||
| return False, f"Invalid lint flow config: {e}", flow_type | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. I'm not convinced about the description here (and not really convinced about globbing the two error types either) |
||
| else: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wouldn't suggest this structure. Since there is no |
||
| return True, f"Valid lint flow config: {config.name}", flow_type | ||
|
|
||
| # Other flow types not yet supported | ||
| return False, f"Flow type '{flow_type}' checking not yet implemented", flow_type | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| # Copyright lowRISC contributors (OpenTitan project). | ||
| # Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """Pydantic models for lint flow configuration.""" | ||
|
|
||
| from pydantic import BaseModel, ConfigDict, Field | ||
|
|
||
|
|
||
| class MessageBucket(BaseModel): | ||
| """Message bucket configuration for categorizing lint messages.""" | ||
|
|
||
| model_config = ConfigDict(frozen=True) | ||
|
|
||
| category: str = Field(..., description="Message category (e.g., 'flow', 'lint')") | ||
| severity: str = Field(..., description="Message severity (e.g., 'info', 'warning', 'error')") | ||
| label: str = Field(default="", description="Optional label for the bucket") | ||
|
|
||
|
|
||
| class LintFlowConfig(BaseModel): | ||
| """Configuration for the lint flow. | ||
|
|
||
| This model represents the core lint flow configuration that can be parsed | ||
| from hjson files. It focuses on lint-specific fields and does not include | ||
| all the base flow configuration fields that are handled by the existing | ||
| FlowCfg class hierarchy. | ||
| """ | ||
|
|
||
| # Flow identification | ||
| flow: str = Field(default="lint", description="Flow type identifier") | ||
| name: str = Field(..., description="Name of the lint configuration") | ||
|
|
||
| # Build configuration | ||
| build_dir: str = Field(default="", description="Build directory path") | ||
| build_log: str = Field(default="", description="Build log file path") | ||
| build_cmd: str = Field(default="", description="Build command") | ||
|
Comment on lines
+35
to
+37
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've just had to do some reading to understand how the If I understand correctly, these lines mean that If so, wouldn't it make more sense to give them type |
||
| build_opts: list[str] = Field(default_factory=list, description="Build options") | ||
|
|
||
| # FuseSoC configuration | ||
| fusesoc_core: str = Field(default="", description="FuseSoC core to use") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similarly, if this is optional I think it should probably be Same for |
||
| additional_fusesoc_argument: str = Field( | ||
| default="", description="Additional FuseSoC argument (e.g., mapping)" | ||
| ) | ||
|
|
||
| # Lint-specific configuration | ||
| is_style_lint: bool = Field(default=False, description="Whether this is a style lint run") | ||
| report_severities: list[str] = Field( | ||
| default_factory=lambda: ["info", "warning", "error"], | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Clearly for a later PR, but this really looks like it should be an enum. |
||
| description="Message severities to include in reports", | ||
| ) | ||
| fail_severities: list[str] = Field( | ||
| default_factory=lambda: ["warning", "error"], | ||
| description="Message severities that cause the flow to fail", | ||
| ) | ||
| message_buckets: list[MessageBucket] = Field( | ||
| default_factory=list, description="Message bucket configuration" | ||
| ) | ||
|
|
||
| # Tool configuration | ||
| tool: str | None = Field(default=None, description="Lint tool to use") | ||
| dut: str = Field(default="", description="Design under test name") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, the default should probably be None and maybe the documentation needs to explain what that means. Similarly for |
||
|
|
||
| # Directory paths | ||
| scratch_path: str = Field(default="", description="Scratch directory path") | ||
| rel_path: str = Field(default="", description="Relative path for results") | ||
|
|
||
| # Import and use configurations | ||
| import_cfgs: list[str] = Field( | ||
| default_factory=list, description="Configuration files to import" | ||
| ) | ||
| use_cfgs: list[dict | str] = Field( | ||
| default_factory=list, description="Sub-configurations to use (for primary configs)" | ||
| ) | ||
|
|
||
| model_config = ConfigDict( | ||
| frozen=True, # Make the model immutable | ||
| extra="forbid", # Forbid extra fields to catch configuration errors | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| # Copyright lowRISC contributors (OpenTitan project). | ||
| # Licensed under the Apache License, Version 2.0, see LICENSE for details. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """Helper class for parsing lint reports into a generic hjson format.""" | ||
|
|
||
| import re | ||
| from pathlib import Path | ||
|
|
||
| import hjson | ||
|
|
||
|
|
||
| # TODO(#9079): this class will be refactored so that it can be integrated into | ||
| # the Dvsim core code. | ||
| class LintParser: | ||
| def __init__(self) -> None: | ||
| self.buckets = { | ||
| "flow_info": [], | ||
| "flow_warning": [], | ||
| "flow_error": [], | ||
| "lint_info": [], | ||
| "lint_warning": [], | ||
| "lint_error": [], | ||
| # this bucket is temporary and will be removed at the end of the | ||
| # parsing pass. | ||
| "fusesoc-error": [], | ||
| } | ||
| self.severities = { | ||
| "flow_info": "info", | ||
| "flow_warning": "warning", | ||
| "flow_error": "error", | ||
| "lint_info": "info", | ||
| "lint_warning": "warning", | ||
| "lint_error": "error", | ||
| } | ||
|
|
||
| def extract_messages(self, log_content: str, patterns: list[str]) -> None: | ||
| """Extract messages from the string buffer log_content. | ||
|
|
||
| The argument patterns needs to be a list of tuples with | ||
| (<error_severity>, <pattern_to_match_for>). | ||
|
|
||
| A substring that matches two different patterns will be stored in the | ||
| bucket associated with the first pattern that matches. | ||
| """ | ||
| # Iterate through all the patterns in reverse order and store hits | ||
| # against the index of their first character. Doing this in reverse | ||
| # order means that patterns earlier in the list "win": if two different | ||
| # patterns match a particular substring, only the bucket of the first | ||
| # one will end up in the found dict. | ||
| found = {} | ||
| for bucket, pattern in reversed(patterns): | ||
| for m in re.finditer(pattern, log_content, flags=re.MULTILINE): | ||
| found[m.start()] = (bucket, m.group(0)) | ||
|
|
||
| # Now that we've ignored duplicate hits, flatten things out into | ||
| # self.buckets. | ||
| for bucket, hit in found.values(): | ||
| self.buckets[bucket].append(hit) | ||
|
|
||
| def get_results(self, args: dict[Path, list[tuple]]) -> dict[str, int]: | ||
| """Parse report and corresponding logfiles and extract error, warning | ||
| and info messages for each IP present in the result folder. | ||
| """ | ||
| # Open all log files | ||
| for path, patterns in args.items(): | ||
| try: | ||
| with path.open() as f: | ||
| log_content = f.read() | ||
| self.extract_messages(log_content, patterns) | ||
| except OSError as err: | ||
| self.buckets["flow_error"] += [f"IOError: {err}"] | ||
|
|
||
| # If there are no errors or warnings, add the "fusesoc-error" field to | ||
| # "errors" (which will be reported as tooling errors). Remove the | ||
| # "fusesoc-error" field either way. | ||
| num_messages = {"info": 0, "warning": 0, "error": 0} | ||
| for key, sev in self.severities.items(): | ||
| num_messages[sev] += len(self.buckets[key]) | ||
| if num_messages["error"] == 0 and num_messages["warning"] == 0: | ||
| self.buckets["flow_error"] = self.buckets["fusesoc-error"] | ||
| del self.buckets["fusesoc-error"] | ||
|
|
||
| return num_messages | ||
|
|
||
| def write_results_as_hjson(self, outpath: Path) -> None: | ||
| with outpath.open("w") as results_file: | ||
| # Construct results dict for Hjson file. | ||
| hjson.dump( | ||
| self.buckets, | ||
| results_file, | ||
| ensure_ascii=False, | ||
| for_json=True, | ||
| use_decimal=True, | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm. Is this the right thing to do? We end up squashing "bad config" and "nonexistent config" as a result. I think the API would probably be better if they were left separate?