From f0f9102771e261a01697c7afd681655135e2e40b Mon Sep 17 00:00:00 2001 From: Serhii A Date: Fri, 23 Jan 2026 14:08:35 +0100 Subject: [PATCH] Add tests to cover Finnish dates with 'klo' (#1301) * Add tests to cover Finnish dates with 'klo' * Add Finnish "klo" (o'clock) to skip tokens and add translation test - Add "klo" to skip tokens in Finnish language data files - Modified: dateparser/data/date_translation_data/fi.py - Modified: dateparser_data/supplementary_language_data/date_translation_data/fi.yaml - Add translation test case for Finnish dates with "klo" - Modified: tests/test_languages.py This fixes parsing of Finnish dates like "28 maalis klo 9:37" (March 28 at 9:37). The word "klo" (abbreviation for "kello", meaning "o'clock") is now properly skipped during tokenization, allowing correct date parsing. --- dateparser/data/date_translation_data/fi.py | 1 + .../date_translation_data/fi.yaml | 2 +- tests/test_date_parser.py | 5 +++++ tests/test_languages.py | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/dateparser/data/date_translation_data/fi.py b/dateparser/data/date_translation_data/fi.py index 3b8f7646e..ecc6762f1 100644 --- a/dateparser/data/date_translation_data/fi.py +++ b/dateparser/data/date_translation_data/fi.py @@ -316,6 +316,7 @@ "locale_specific": {}, "skip": [ ":n", + "klo", " ", "'", ",", diff --git a/dateparser_data/supplementary_language_data/date_translation_data/fi.yaml b/dateparser_data/supplementary_language_data/date_translation_data/fi.yaml index 71e5fd24f..377aeef97 100644 --- a/dateparser_data/supplementary_language_data/date_translation_data/fi.yaml +++ b/dateparser_data/supplementary_language_data/date_translation_data/fi.yaml @@ -1,4 +1,4 @@ -skip: [":n"] +skip: [":n", "klo"] sentence_splitter_group : 1 diff --git a/tests/test_date_parser.py b/tests/test_date_parser.py index 654d37777..d343e647e 100644 --- a/tests/test_date_parser.py +++ b/tests/test_date_parser.py @@ -214,6 +214,11 @@ def setUp(self): # Finnish dates param("5.7.2018 5.45 ip.", datetime(2018, 7, 5, 17, 45)), param("5 .7 .2018 5.45 ip.", datetime(2018, 7, 5, 17, 45)), + param("28 maalis klo 9:37", datetime(2012, 3, 28, 9, 37)), + param("28 maalis 9:37", datetime(2012, 3, 28, 9, 37)), + param("15 tammi klo 14:30", datetime(2012, 1, 15, 14, 30)), + param("5 kesä klo 18:00", datetime(2012, 6, 5, 18, 0)), + param("12.5.2020 klo 16:45", datetime(2020, 5, 12, 16, 45)), # Croatian dates param("06. travnja 2021.", datetime(2021, 4, 6, 0, 0)), param("13. svibanj 2022.", datetime(2022, 5, 13, 0, 0)), diff --git a/tests/test_languages.py b/tests/test_languages.py index 111ef266b..1ba280794 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -214,6 +214,7 @@ def setUp(self): param("fi", "su joulu 16, 2015", "sunday december 16 2015"), param("fi", "1. tammikuuta, 2016", "1. january 2016"), param("fi", "tiistaina, 27. lokakuuta 2015", "tuesday 27. october 2015"), + param("fi", "28 maalis klo 9:37", "28 march 9:37"), # Japanese param("ja", "午後3時", "pm 3:00"), param("ja", "2時", "2:00"),