From 9ad2f5ced43c98600e222959f7a950a33a095a0f Mon Sep 17 00:00:00 2001 From: s-heppner Date: Sun, 4 May 2025 19:50:59 +0200 Subject: [PATCH] parser.py: Allow equal signs in field values Previously, the parser did not account for the possibility of equal signs (`=`) in the values of fields and would result in crashing the script with a non descriptive error message, due to expecting a simple `string.split("=")` to work. However, it is completely possible to have equal signs in the value, for example in an URL field: `URL = "https://example.org/query?x=1"`. This adapts the parsing logic, making it stable against equal signs in field values. Furthermore, it cleans up the `BibTeXEntry.from_string` method a little bit and adds additional test cases to the unittests. Fixes #3 --- bibtex_linter/parser.py | 20 +++++++++++++----- test/test_parser.py | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/bibtex_linter/parser.py b/bibtex_linter/parser.py index f52b50d..1930ce8 100644 --- a/bibtex_linter/parser.py +++ b/bibtex_linter/parser.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Tuple import dataclasses import enum import re @@ -85,10 +85,7 @@ def from_string(cls, entry_string: str) -> "BibTeXEntry": raw_fields = cls._split_fields(entry_string) fields: Dict[str, str] = {} for raw_field in raw_fields: - raw_key, raw_value = raw_field.split("=") - # Clean up key and value - key = raw_key.strip(" ").lower() - value = cls._parse_field_value(raw_value) + key, value = cls._split_field_into_key_and_value(raw_field) fields[key] = value return BibTeXEntry( @@ -157,6 +154,19 @@ def _parse_field_value(raw_value: str) -> str: return raw_value + @staticmethod + def _split_field_into_key_and_value(raw_field: str) -> Tuple[str, str]: + """ + Splits a field, such as `author = {{John Doe}},` into the field's key and value and cleans up both. + + :param raw_field: + :return: + """ + parts = raw_field.split("=", 1) + key = parts[0].strip().lower() + value = parts[1].strip() if len(parts) > 1 else "" + return key, BibTeXEntry._parse_field_value(value) + def split_entries(raw_content: str) -> List[str]: """ diff --git a/test/test_parser.py b/test/test_parser.py index fd40738..497b787 100644 --- a/test/test_parser.py +++ b/test/test_parser.py @@ -110,6 +110,53 @@ def test_split_fields_missing_open_brace(self) -> None: with self.assertRaises(KeyError): BibTeXEntry._split_fields(entry) + def test_field_with_equals_in_value(self) -> None: + bibtex_string = """@misc{test_entry, + note = {This URL has equals: https://example.com/?id=123&lang=en}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("note") + expected = "This URL has equals: https://example.com/?id=123&lang=en" + self.assertEqual(expected, actual) + + def test_double_braces(self) -> None: + bibtex_string = """@misc{test_entry, + title = {{Title with {{extra}} braces}}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("title") + expected = "Title with {{extra}} braces" + self.assertEqual(expected, actual) + + def test_quoted_field(self) -> None: + bibtex_string = """@misc{test_entry, + author = "Jane Doe", + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("author") + expected = "Jane Doe" + self.assertEqual(expected, actual) + + def test_multiline_field(self) -> None: + bibtex_string = """@misc{test_entry, + note = {This is a + multi-line + note.}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("note") + expected = "This is a\n multi-line\n note." + self.assertEqual(expected, actual) + + def test_field_with_url_and_brackets(self) -> None: + bibtex_string = """@misc{test_entry, + howpublished = {\\url{https://example.org/query?x=1&y=2}}, + }""" + entry = BibTeXEntry.from_string(bibtex_string) + actual = entry.fields.get("howpublished") + expected = "\\url{https://example.org/query?x=1&y=2}" + self.assertEqual(expected, actual) + class TestSplitEntries(unittest.TestCase): def test_single_entry(self) -> None: