Skip to content

Commit d0f786a

Browse files
committed
Preliminary support for years with unknown digits
1 parent 78cbbbb commit d0f786a

File tree

5 files changed

+26
-5
lines changed

5 files changed

+26
-5
lines changed

src/undate/converters/calendars/hebrew/transformer.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,24 @@ def hebrew_date(self, items):
2121
if child.data in ["year", "month", "day"]:
2222
# in each case we expect one integer value;
2323
# anonymous tokens convert to their value and cast as int
24-
value = int(child.children[0])
24+
try:
25+
value = int(child.children[0])
26+
except ValueError:
27+
# if missing digits are present, leave as a string
28+
value = child.children[0]
29+
2530
parts[str(child.data)] = value
2631

2732
# initialize and return an undate with year, month, day and
2833
# configured calendar (hebrew by default)
2934
# NOTE: use self.calendar so Seleucid can extend more easily
3035
return Undate(**parts, calendar=self.calendar)
3136

37+
def UNKNOWN_DIGITS(self, token):
38+
"""Convert unknown digits into undate missing digit character."""
39+
unknown_digits = token.strip("[]").replace(".", Undate.MISSING_DIGIT)
40+
return token.update(value=unknown_digits)
41+
3242
def year(self, items):
3343
# combine multiple parts into a single string
3444
value = "".join([str(i) for i in items])

src/undate/converters/grammars/hebrew.lark

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
%import common.WS
1+
%import common (WS, DIGIT)
22
%ignore WS
33

44
// Ignore periods and commas in dates
5-
%import .undate_common.DATE_PUNCTUATION
5+
%import .undate_common (DATE_PUNCTUATION, UNKNOWN_DIGITS)
66
%ignore DATE_PUNCTUATION
77

88
// only support day month year format for now
@@ -16,7 +16,7 @@ hebrew_date: weekday? day month year | month year | year
1616
// "first third of", seasons (can look for more examples)
1717

1818
// Hebrew calendar starts with year 1 in 3761 BCE
19-
year: /\d+/
19+
year: /\d+/ | DIGIT* UNKNOWN_DIGITS DIGIT*
2020

2121
// months
2222
month: month_1
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
// Some abbreviations use periods; some default date formats
22
// include commas. Ignore both
33
DATE_PUNCTUATION: "." | ","
4+
5+
// In some sources like PGP, unknown digits are represented by
6+
// brackets and periods, where the periods indicate the number of
7+
// unknown digits, e.g. 18[..] or 14[.]3
8+
UNKNOWN_DIGITS: /\[\.+\]/

tests/test_converters/test_calendars/test_hebrew/test_hebrew_parser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
"536",
3434
"53",
3535
"3",
36+
# years with missing digit
37+
"53[.]2",
38+
"5[..]2",
3639
]
3740

3841

tests/test_converters/test_calendars/test_hebrew/test_hebrew_transformer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ def test_hebrew_undate():
3232
("Thursday 12 Sivan 4795", HebrewUndate(4795, 3, 12), DatePrecision.DAY),
3333
# huh, current parsing completely ignores whitespace; do we want that?
3434
("Thursday12Sivan4795", HebrewUndate(4795, 3, 12), DatePrecision.DAY),
35+
# years with missing digits
36+
("53[.]2", HebrewUndate("53X2"), DatePrecision.YEAR),
37+
("5[..]2", HebrewUndate("5XX2"), DatePrecision.YEAR),
3538
]
3639

3740

@@ -41,7 +44,7 @@ def test_transform(date_string, expected, expected_precision):
4144
# parse the input string, then transform to undate object
4245
parsetree = hebrew_parser.parse(date_string)
4346
transformed_date = transformer.transform(parsetree)
44-
assert transformed_date == expected
47+
assert repr(transformed_date) == repr(expected)
4548
# currently only undates have date precision
4649
if isinstance(transformed_date, Undate):
4750
assert transformed_date.precision == expected_precision

0 commit comments

Comments
 (0)