From 9b20e176353adf3cfb33cd0d7c8ab02cfd50b23e Mon Sep 17 00:00:00 2001 From: s-heppner Date: Sun, 4 May 2025 20:22:44 +0200 Subject: [PATCH] Refactor parser to use string `entry_type`s Previously, we used the `EntryType` enum to define the possible entry types. This was a little too constrained, as it would fail parsing the `BibTeXEntry`, if the `EntryType` was unknown. So in order to allow for better usability of the tool, such as writing rules for entry types that may be completely uncommon for my personal field, we refactor to use a string `BibTeXEntry.entry_type` from now on. This change needed to be carried out everwhere where `EntryType` was used before, so it turned out to be a significant refactor of the code base. Fixes #5 --- bibtex_linter/default_rules.py | 20 +++++------ bibtex_linter/main.py | 2 +- bibtex_linter/parser.py | 61 +++++++++------------------------- bibtex_linter/verification.py | 4 +-- test/test_parser.py | 56 +++++++++++++++---------------- 5 files changed, 57 insertions(+), 86 deletions(-) diff --git a/bibtex_linter/default_rules.py b/bibtex_linter/default_rules.py index aab2024..0b8203d 100644 --- a/bibtex_linter/default_rules.py +++ b/bibtex_linter/default_rules.py @@ -1,6 +1,6 @@ from typing import List, Set -from bibtex_linter.parser import BibTeXEntry, EntryType +from bibtex_linter.parser import BibTeXEntry from bibtex_linter.verification import ( linter_rule, check_required_fields, @@ -8,7 +8,7 @@ ) -@linter_rule(entry_type=EntryType.ARTICLE) +@linter_rule(entry_type="article") def check_article(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.ARTICLE` are there and that there are no fields present, that would @@ -34,7 +34,7 @@ def check_article(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.CONFERENCE) +@linter_rule(entry_type="conference") def check_conference(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.CONFERENCE` are there and that there are no fields present, that would @@ -64,7 +64,7 @@ def check_conference(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.ONLINE) +@linter_rule(entry_type="online") def check_online(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.ONLINE` are there and that there are no fields present, that would @@ -95,7 +95,7 @@ def check_online(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.BOOK) +@linter_rule(entry_type="book") def check_book(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.BOOK` are there and that there are no fields present, that would @@ -123,7 +123,7 @@ def check_book(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.IN_BOOK) +@linter_rule(entry_type="inbook") def check_in_book(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.IN_BOOK` are there and that there are no fields present, that would @@ -153,7 +153,7 @@ def check_in_book(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.IN_COLLECTION) +@linter_rule(entry_type="incollection") def check_in_collection(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.IN_COLLECTION` are there and that there are no fields present, that @@ -180,7 +180,7 @@ def check_in_collection(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.STANDARD) +@linter_rule(entry_type="standard") def check_standard(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.STANDARD` are there and that there are no fields present, that would @@ -212,7 +212,7 @@ def check_standard(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.TECH_REPORT) +@linter_rule(entry_type="techreport") def check_tech_report(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.TECH_REPORT` are there and that there are no fields present, that @@ -242,7 +242,7 @@ def check_tech_report(entry: BibTeXEntry) -> List[str]: return invariant_violations -@linter_rule(entry_type=EntryType.MISC) +@linter_rule(entry_type="misc") def check_misc(entry: BibTeXEntry) -> List[str]: """ Check that the required fields for `EntryType.MISC` are there and that there are no fields present, that would diff --git a/bibtex_linter/main.py b/bibtex_linter/main.py index f0d9f75..94c8acd 100644 --- a/bibtex_linter/main.py +++ b/bibtex_linter/main.py @@ -53,7 +53,7 @@ def main() -> None: total_number_of_violations += len(violations) if violations: had_violations = True - print(f"\nEntry '{entry.name}' of type '{entry.entry_type.name}' failed verification:") + print(f"\nEntry '{entry.name}' of type '{entry.entry_type}' failed verification:") print(" ❌ Invariant Violations:") for issue in violations: print(f" - {issue}") diff --git a/bibtex_linter/parser.py b/bibtex_linter/parser.py index 1930ce8..168cfe7 100644 --- a/bibtex_linter/parser.py +++ b/bibtex_linter/parser.py @@ -4,47 +4,10 @@ import re -class EntryType(enum.Enum): - """ - A collection of entry types that the LaTeX `IEEEtran` offers. Note that these only include what I - need at the moment. The full list can be found at: - https://ctan.net/macros/latex/contrib/IEEEtran/bibtex/IEEEtran_bst_HOWTO.pdf - """ - ARTICLE = "ARTICLE" # A typical journal article - CONFERENCE = "CONFERENCE" # A typical conference paper. Alias to: `IN_PROCEEDINGS` - ONLINE = "ONLINE" # A reference on the internet. Alias to: `ELECTRONIC` - BOOK = "BOOK" # Referencing a whole book - IN_BOOK = "IN_BOOK" # Referencing a part of a book (chapters or pages) - IN_COLLECTION = "IN_COLLECTION" # Referencing a part of a book that has its own name - STANDARD = "STANDARD" # Used for proposed or formally published standards - TECH_REPORT = "TECH_REPORT" # Used for technical reports, or reports about standards. Compare to `STANDARD`! - MISC = "MISC" # Anything else that does not fit the above - - @classmethod - def from_string(cls, s: str) -> 'EntryType': - """ - Get the `EntryType` from the string. Can deal with common aliases. - - :raises: KeyError, if the given string does not correspond to one of the entry types - """ - s = s.upper() - str_to_entry_type_map: Dict[str, "EntryType"] = { - "ARTICLE": EntryType.ARTICLE, - "CONFERENCE": EntryType.CONFERENCE, - "INPROCEEDINGS": EntryType.CONFERENCE, - - "BOOK": EntryType.BOOK, - "INBOOK": EntryType.IN_BOOK, - "INCOLLECTION": EntryType.IN_COLLECTION, - "STANDARD": EntryType.STANDARD, - "TECHREPORT": EntryType.TECH_REPORT, - - "ONLINE": EntryType.ONLINE, - "ELECTRONIC": EntryType.ONLINE, - - "MISC": EntryType.MISC, - } - return str_to_entry_type_map[s] +RESOLVE_ENTRY_TYPE_ALIAS: Dict[str, str] = { + "inproceedings": "conference", + "electronic": "online", +} @dataclasses.dataclass @@ -52,7 +15,8 @@ class BibTeXEntry: """ An entry in a BibTeX file - :ivar entry_type: Type of the entry (e.g. `@misc`). See `EntryType` for details + :ivar entry_type: Type of the entry (e.g. `@misc`). We always assume that the `entry_type` is in small letters only, + and we transform some common `entry_type` aliases to their "canonical" form (e.g. the name I prefer to use). :ivar name: Name or ID of the entry. So basically what is here: `@misc{Name_or_ID,` :ivar fields: Fields of the entry, as a Dict mapping the field key (e.g. `author`) to its cleaned up value. @@ -72,14 +36,21 @@ class BibTeXEntry: will be parsed to: `{"note": "This value\nspans multiple\nlines"}`. For the implementation details, check out the `BibTeXEntry._parse_field_value` static method. """ - entry_type: EntryType + entry_type: str name: str fields: Dict[str, str] @classmethod def from_string(cls, entry_string: str) -> "BibTeXEntry": - entry_type_string: str = entry_string.split("{")[0].lstrip("@") - entry_type = EntryType.from_string(entry_type_string) + """ + Parse a `BibTeXEntry` from a string. + """ + # First, we find and canonicalize the `entry_type` + entry_type_string: str = entry_string.split("{")[0].lstrip("@").lower() + if RESOLVE_ENTRY_TYPE_ALIAS.get(entry_type_string): + entry_type: str = RESOLVE_ENTRY_TYPE_ALIAS[entry_type_string] + else: + entry_type = entry_type_string name: str = entry_string.split("{")[1].split(",")[0] raw_fields = cls._split_fields(entry_string) diff --git a/bibtex_linter/verification.py b/bibtex_linter/verification.py index 9da9463..391af64 100644 --- a/bibtex_linter/verification.py +++ b/bibtex_linter/verification.py @@ -6,7 +6,7 @@ """ from typing import Callable, TypeVar, List, Optional, Set -from bibtex_linter.parser import BibTeXEntry, EntryType +from bibtex_linter.parser import BibTeXEntry # The dynamic list of known rules. # This list gets updated when a method with the `@linter_rule` decorator gets imported. @@ -17,7 +17,7 @@ LINTER_RULE_TYPE = TypeVar("LINTER_RULE_TYPE", bound=Callable[[BibTeXEntry], List[str]]) -def linter_rule(entry_type: Optional[EntryType] = None) -> Callable[[LINTER_RULE_TYPE], LINTER_RULE_TYPE]: +def linter_rule(entry_type: Optional[str] = None) -> Callable[[LINTER_RULE_TYPE], LINTER_RULE_TYPE]: """ Decorator to mark a method defines rules to be checked by the linter for a specific entry type. diff --git a/test/test_parser.py b/test/test_parser.py index 26621e5..9eb6276 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -2,7 +2,7 @@ import os from typing import Dict, List -from bibtex_linter.parser import EntryType, BibTeXEntry, split_entries, parse_bibtex_file +from bibtex_linter.parser import BibTeXEntry, split_entries, parse_bibtex_file class TestBibTeXEntry(unittest.TestCase): @@ -245,15 +245,15 @@ def test_parse_all_entries(self) -> None: self.assertEqual(17, len(entries)) expected_types = { - EntryType.ARTICLE: 1, - EntryType.CONFERENCE: 1, - EntryType.ONLINE: 1, - EntryType.TECH_REPORT: 1, - EntryType.BOOK: 1, - EntryType.MISC: 9, - EntryType.STANDARD: 1, - EntryType.IN_BOOK: 1, - EntryType.IN_COLLECTION: 1, + "article": 1, + "conference": 1, + "online": 1, + "techreport": 1, + "book": 1, + "misc": 9, + "standard": 1, + "inbook": 1, + "incollection": 1, } for entry_type, expected_count in expected_types.items(): @@ -262,9 +262,9 @@ def test_parse_all_entries(self) -> None: self.assertEqual(expected_count, actual_count) def test_entry_fields_and_values(self) -> None: - expected_entries: List[Dict[str, EntryType | Dict[str, str]]] = [ + expected_entries: List[Dict[str, str | Dict[str, str]]] = [ { - "type": EntryType.ARTICLE, + "type": "article", "fields": { "author": "Tests basic article", "title": "Standard field format", @@ -272,7 +272,7 @@ def test_entry_fields_and_values(self) -> None: } }, { - "type": EntryType.CONFERENCE, + "type": "conference", "fields": { "author": "Should map to CONFERENCE", "title": "Using alias INPROCEEDINGS", @@ -280,21 +280,21 @@ def test_entry_fields_and_values(self) -> None: } }, { - "type": EntryType.ONLINE, + "type": "online", "fields": { "author": "Should map to ONLINE", "url": "https://example.com" } }, { - "type": EntryType.TECH_REPORT, + "type": "techreport", "fields": { "author": "Should map to TECH_REPORT", "title": "Tech report via alias" } }, { - "type": EntryType.BOOK, + "type": "book", "fields": { "author": "Extra spaces around field and value", "title": "Trimmed Title", @@ -302,73 +302,73 @@ def test_entry_fields_and_values(self) -> None: } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "note": "This has nested braces inside", "comment": "But only outermost braces should be stripped" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "author": "Double brace test", "title": "Another level of nesting" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "author": "Quoted Author", "title": "Simple quoted title" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "note": "This value\nspans multiple\nlines" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "howpublished": r"\url{https://wrapped-url.com}" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "title": r"\LaTeX command in value" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "author": "Trailing Comma", "title": "Should be OK" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "author": "No Trailing Comma" } }, { - "type": EntryType.MISC, + "type": "misc", "fields": { "author": "Newlines and spacing\neverywhere", "title": "Still valid" } }, { - "type": EntryType.STANDARD, + "type": "standard", "fields": { "author": "Tests EntryType.STANDARD", "title": "Formal standard ref" } }, { - "type": EntryType.IN_BOOK, + "type": "inbook", "fields": { "author": "Part of a book", "title": "Chapter Title", @@ -376,7 +376,7 @@ def test_entry_fields_and_values(self) -> None: } }, { - "type": EntryType.IN_COLLECTION, + "type": "incollection", "fields": { "author": "Self-contained part of a collection", "title": "In Collection Title",