From 73449672b386881e2e321034a39327291496608b Mon Sep 17 00:00:00 2001 From: Matthew Spah Date: Wed, 4 Feb 2026 21:33:54 -0800 Subject: [PATCH 1/2] fix(models): tolerate inconsistent MLB API shapes + add gamepk regression tests - Enable number->string coercion for string fields (e.g. playEvents.base) - Coerce RunnerMovement.isOut null -> false - Accept person dicts for PlayEvent.umpire and linescore.offense first/second/third - Allow reviewDetails.additionalReviews to be string or list - Allow HitCoordinates x/y to be null - Add/expand regression tests keyed to reported gamepk IDs --- mlbstatsapi/models/base.py | 4 + .../game/livedata/linescore/attributes.py | 9 +- .../game/livedata/plays/play/attributes.py | 5 +- .../plays/play/playevent/playevent.py | 14 ++- .../plays/play/playrunner/attributes.py | 12 +- .../livedata/plays/playbyinning/attributes.py | 7 +- tests/test_gamepk_validation_regressions.py | 110 ++++++++++++++++++ 7 files changed, 147 insertions(+), 14 deletions(-) create mode 100644 tests/test_gamepk_validation_regressions.py diff --git a/mlbstatsapi/models/base.py b/mlbstatsapi/models/base.py index c438f9b..188063f 100644 --- a/mlbstatsapi/models/base.py +++ b/mlbstatsapi/models/base.py @@ -20,4 +20,8 @@ class MLBBaseModel(BaseModel): extra="ignore", alias_generator=to_camel_case, populate_by_name=True, + # MLB's API occasionally returns numbers for fields that are logically strings + # (e.g. liveData.plays.*.playEvents.*.base can be 1/2/3). + # Enable coercion to be resilient to these inconsistencies. + coerce_numbers_to_str=True, ) diff --git a/mlbstatsapi/models/game/livedata/linescore/attributes.py b/mlbstatsapi/models/game/livedata/linescore/attributes.py index 68fbe7d..5aafe2a 100644 --- a/mlbstatsapi/models/game/livedata/linescore/attributes.py +++ b/mlbstatsapi/models/game/livedata/linescore/attributes.py @@ -112,11 +112,12 @@ class LinescoreOffense(MLBBaseModel): in_hole: Optional[Person] = Field(default=None, alias="inHole") pitcher: Optional[Person] = None batting_order: Optional[int] = Field(default=None, alias="battingOrder") - first: Optional[str] = None - second: Optional[str] = None - third: Optional[str] = None + # MLB API sometimes returns these as person objects (id/link/fullName). + first: Optional[Person] = None + second: Optional[Person] = None + third: Optional[Person] = None - @field_validator('batter', 'on_deck', 'in_hole', 'pitcher', mode='before') + @field_validator('batter', 'on_deck', 'in_hole', 'pitcher', 'first', 'second', 'third', mode='before') @classmethod def empty_dict_to_none(cls, v: Any) -> Any: """Convert empty dicts to None.""" diff --git a/mlbstatsapi/models/game/livedata/plays/play/attributes.py b/mlbstatsapi/models/game/livedata/plays/play/attributes.py index fda0cf2..63fc195 100644 --- a/mlbstatsapi/models/game/livedata/plays/play/attributes.py +++ b/mlbstatsapi/models/game/livedata/plays/play/attributes.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Optional, Any, Dict, List, Union from pydantic import Field from mlbstatsapi.models.base import MLBBaseModel @@ -99,4 +99,5 @@ class PlayReviewDetails(MLBBaseModel): in_progress: bool = Field(alias="inProgress") review_type: str = Field(alias="reviewType") challenge_team_id: Optional[int] = Field(default=None, alias="challengeTeamId") - additional_reviews: Optional[str] = Field(default=None, alias="additionalReviews") + # MLB API returns this as either null, a string, or a list of review objects. + additional_reviews: Optional[Union[str, List[Dict[str, Any]]]] = Field(default=None, alias="additionalReviews") diff --git a/mlbstatsapi/models/game/livedata/plays/play/playevent/playevent.py b/mlbstatsapi/models/game/livedata/plays/play/playevent/playevent.py index 10b44ab..7a18521 100644 --- a/mlbstatsapi/models/game/livedata/plays/play/playevent/playevent.py +++ b/mlbstatsapi/models/game/livedata/plays/play/playevent/playevent.py @@ -1,5 +1,5 @@ -from typing import Optional -from pydantic import Field +from typing import Optional, Union, Any +from pydantic import Field, field_validator from mlbstatsapi.models.base import MLBBaseModel from mlbstatsapi.models.people import Person, Position from mlbstatsapi.models.data import Count, HitData, PitchData, PlayDetails @@ -55,7 +55,8 @@ class PlayEvent(MLBBaseModel): pfx_id: Optional[str] = Field(default=None, alias="pfxId") start_time: Optional[str] = Field(default=None, alias="startTime") end_time: Optional[str] = Field(default=None, alias="endTime") - umpire: Optional[str] = None + # MLB API sometimes returns a person object (id/link) instead of a string. + umpire: Optional[Union[str, Person]] = None base: Optional[str] = None play_id: Optional[str] = Field(default=None, alias="playId") pitch_number: Optional[int] = Field(default=None, alias="pitchNumber") @@ -71,3 +72,10 @@ class PlayEvent(MLBBaseModel): replaced_player: Optional[Person] = Field(default=None, alias="replacedPlayer") review_details: Optional[dict] = Field(default=None, alias="reviewDetails") injury_type: Optional[str] = Field(default=None, alias="injuryType") + + @field_validator("umpire", mode="before") + @classmethod + def _empty_dict_to_none(cls, v: Any) -> Any: + if isinstance(v, dict) and not v: + return None + return v diff --git a/mlbstatsapi/models/game/livedata/plays/play/playrunner/attributes.py b/mlbstatsapi/models/game/livedata/plays/play/playrunner/attributes.py index 788afbc..a31396a 100644 --- a/mlbstatsapi/models/game/livedata/plays/play/playrunner/attributes.py +++ b/mlbstatsapi/models/game/livedata/plays/play/playrunner/attributes.py @@ -1,5 +1,5 @@ from typing import Optional -from pydantic import Field +from pydantic import Field, field_validator from mlbstatsapi.models.base import MLBBaseModel from mlbstatsapi.models.people import Person, Position @@ -41,13 +41,21 @@ class RunnerMovement(MLBBaseModel): out_base : str Base runner was made out. """ - is_out: bool = Field(alias="isOut") + is_out: bool = Field(default=False, alias="isOut") out_number: Optional[int] = Field(default=None, alias="outNumber") origin_base: Optional[str] = Field(default=None, alias="originBase") start: Optional[str] = None end: Optional[str] = None out_base: Optional[str] = Field(default=None, alias="outBase") + @field_validator("is_out", mode="before") + @classmethod + def _coerce_is_out(cls, v): + # MLB API occasionally returns null for isOut. + if v is None: + return False + return v + class RunnerDetails(MLBBaseModel): """ diff --git a/mlbstatsapi/models/game/livedata/plays/playbyinning/attributes.py b/mlbstatsapi/models/game/livedata/plays/playbyinning/attributes.py index 56136f2..8bfe9d5 100644 --- a/mlbstatsapi/models/game/livedata/plays/playbyinning/attributes.py +++ b/mlbstatsapi/models/game/livedata/plays/playbyinning/attributes.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional from pydantic import Field from mlbstatsapi.models.base import MLBBaseModel from mlbstatsapi.models.teams import Team @@ -16,8 +16,9 @@ class HitCoordinates(MLBBaseModel): y : float Y coordinate for hit. """ - x: float - y: float + # MLB API occasionally returns null for these coordinates. + x: Optional[float] = None + y: Optional[float] = None class HitsByTeam(MLBBaseModel): diff --git a/tests/test_gamepk_validation_regressions.py b/tests/test_gamepk_validation_regressions.py new file mode 100644 index 0000000..ac23c24 --- /dev/null +++ b/tests/test_gamepk_validation_regressions.py @@ -0,0 +1,110 @@ +import pytest + +from mlbstatsapi.models.game.livedata.linescore.attributes import LinescoreOffense +from mlbstatsapi.models.game.livedata.plays.play.attributes import PlayReviewDetails +from mlbstatsapi.models.game.livedata.plays.play.playevent.playevent import PlayEvent +from mlbstatsapi.models.game.livedata.plays.playbyinning.attributes import HitCoordinates +from mlbstatsapi.models.game.livedata.plays.play.playrunner.attributes import RunnerMovement + + +# These gamepk IDs are taken from a user-submitted error log where the MLB API payload +# was rejected by Pydantic validation. The underlying issues are schema inconsistencies +# in MLB's API responses (int where str expected, dict where str expected, null where bool expected). + +GAMEPKS_BASE_INT = [ + 776160, 776165, 776219, 776252, 776286, 776320, 776336, 776351, 776386, 776420, + 776498, 776659, 776759, 776770, 776903, 776937, 777091, 777135, 777191, 777265, + 777305, 777445, 777488, 777514, 777555, 777570, 777650, 777722, + # Additional gamepks reported later (same base=int issue) + 744814, 744819, 744824, 744826, 744832, 744836, 744837, 744838, + 745146, 745542, 745796, 745799, + 747000, 747080, 747170, +] + +GAMEPKS_ISOUT_NULL = [ + 776320, 776545, + # Additional gamepks reported later (same isOut=null issue) + 744832, 744836, +] + +GAMEPKS_UMPIRE_DICT = [ + 776221, 776367, 776420, 776525, 776650, 776850, 776903, + # Additional gamepks reported later (same umpire=dict issue) + 744831, 747000, +] + +GAMEPKS_ADDITIONAL_REVIEWS_LIST = [ + 776259, 776386, 777213, 777544, 777555, + # Additional gamepks reported later (same additionalReviews=list issue) + 747000, +] + +GAMEPKS_LINESCORE_OFFENSE_RUNNER_DICT = [ + 776784, 777091, + # Additional gamepks reported later (same offense.*=person dict issue) + 744814, 747080, +] + +GAMEPKS_HIT_COORDS_NULL = [ + 778077, +] + + +@pytest.mark.parametrize("gamepk", GAMEPKS_BASE_INT) +def test_gamepk_play_event_base_coerces_int_to_str(gamepk: int): + # path in log: liveData.plays.*.playEvents.*.base is int + evt = PlayEvent(details={}, index=0, isPitch=True, type="pitch", base=1) + assert evt.base == "1" + + +@pytest.mark.parametrize("gamepk", GAMEPKS_ISOUT_NULL) +def test_gamepk_runner_movement_is_out_coerces_null_to_false(gamepk: int): + # path in log: liveData.plays.*.runners.*.movement.isOut is null + mv = RunnerMovement(isOut=None) + assert mv.is_out is False + + +@pytest.mark.parametrize("gamepk", GAMEPKS_UMPIRE_DICT) +def test_gamepk_play_event_umpire_accepts_person_object(gamepk: int): + # path in log: liveData.plays.*.playEvents.*.umpire is a dict {id, link} + evt = PlayEvent( + details={}, + index=0, + isPitch=True, + type="pitch", + umpire={"id": 484499, "link": "/api/v1/people/484499"}, + ) + assert evt.umpire is not None + + +@pytest.mark.parametrize("gamepk", GAMEPKS_ADDITIONAL_REVIEWS_LIST) +def test_gamepk_review_details_additional_reviews_accepts_list(gamepk: int): + # path in log: liveData.plays.allPlays.*.reviewDetails.additionalReviews is a list + rd = PlayReviewDetails( + isOverturned=False, + inProgress=False, + reviewType="NA", + additionalReviews=[{"isOverturned": False, "reviewType": "NA", "challengeTeamId": 120}], + ) + assert isinstance(rd.additional_reviews, list) + + +@pytest.mark.parametrize("gamepk", GAMEPKS_LINESCORE_OFFENSE_RUNNER_DICT) +def test_gamepk_linescore_offense_baserunners_accept_person_object(gamepk: int): + # path in log: liveData.linescore.offense.first/second/third is a dict person object + offense = LinescoreOffense( + team={"id": 120, "link": "/api/v1/teams/120"}, + first={"id": 682928, "fullName": "Runner One", "link": "/api/v1/people/682928"}, + second=None, + third=None, + ) + assert offense.first is not None + + +@pytest.mark.parametrize("gamepk", GAMEPKS_HIT_COORDS_NULL) +def test_gamepk_hit_coordinates_accept_null_x_y(gamepk: int): + coords = HitCoordinates(x=None, y=None) + assert coords.x is None + assert coords.y is None + + From 52c7ea70fe2a8e5d6960b446e95913dfd70f9022 Mon Sep 17 00:00:00 2001 From: Matthew Spah Date: Wed, 4 Feb 2026 21:35:26 -0800 Subject: [PATCH 2/2] chore: version bump 0.7.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a154c8d..c585073 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-mlb-statsapi" -version = "0.7.1" +version = "0.7.2" description = "mlbstatsapi python wrapper" authors = [ "Matthew Spah ",