From f1453aceb7bf0cd4ed86e3f8532201f672982f44 Mon Sep 17 00:00:00 2001 From: garciam Date: Mon, 8 Jan 2024 18:09:45 +0100 Subject: [PATCH 1/5] implement coords validation --- xarray_schema/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index 9be35ee..be73d9d 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -79,8 +79,8 @@ def validate(self, ds: xr.Dataset) -> None: else: da_schema.validate(ds.data_vars[key]) - if self.coords is not None: # pragma: no cover - raise NotImplementedError('coords schema not implemented yet') + if self.coords is not None: + self.coords.validate(self.coords) if self.attrs: self.attrs.validate(ds.attrs) From c79139cb10f44ebd2a94549d68ceede07ce89568 Mon Sep 17 00:00:00 2001 From: garciam Date: Tue, 9 Jan 2024 09:57:11 +0100 Subject: [PATCH 2/5] add tests for DatasetSchema coordinates from_json method has been fixed too. --- tests/test_core.py | 27 ++++++++++++++++++++------- xarray_schema/dataset.py | 6 +++--- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 688eca6..ba0d7d3 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -222,29 +222,42 @@ def test_dataset_empty_constructor(): def test_dataset_example(ds): ds_schema = DatasetSchema( - { + data_vars={ 'foo': DataArraySchema(name='foo', dtype=np.int32, dims=['x']), 'bar': DataArraySchema(name='bar', dtype=np.floating, dims=['x', 'y']), - } + }, + coords={'x': DataArraySchema(name='x', dtype=np.int64, dims=['x'])}, + attrs={} ) jsonschema.validate(ds_schema.json, ds_schema._json_schema) assert list(ds_schema.json['data_vars'].keys()) == ['foo', 'bar'] + assert list(ds_schema.json['coords']["coords"].keys()) == ['x'] ds_schema.validate(ds) - ds['foo'] = ds.foo.astype('float32') + ds2 = ds.copy() + ds2['foo'] = ds2.foo.astype('float32') with pytest.raises(SchemaError, match='dtype'): - ds_schema.validate(ds) + ds_schema.validate(ds2) - ds = ds.drop_vars('foo') + ds2 = ds2.drop_vars('foo') with pytest.raises(SchemaError, match='variable foo'): - ds_schema.validate(ds) + ds_schema.validate(ds2) + + ds3 = ds.copy() + ds3['x'] = ds3.x.astype('float32') + with pytest.raises(SchemaError, match='dtype'): + ds_schema.validate(ds3) + + ds3 = ds3.drop_vars('x') + with pytest.raises(SchemaError, match='coords has missing keys'): + ds_schema.validate(ds3) # json roundtrip rt_schema = DatasetSchema.from_json(ds_schema.json) assert isinstance(rt_schema, DatasetSchema) - rt_schema.json == ds_schema.json + assert rt_schema.json == ds_schema.json def test_checks_ds(ds): diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index be73d9d..4ab7c0a 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -47,9 +47,9 @@ def from_json(cls, obj: dict): k: DataArraySchema.from_json(v) for k, v in obj['data_vars'].items() } if 'coords' in obj: - kwargs['coords'] = {k: CoordsSchema.from_json(v) for k, v in obj['coords'].items()} + kwargs['coords'] = CoordsSchema.from_json(obj["coords"]) if 'attrs' in obj: - kwargs['attrs'] = {k: AttrsSchema.from_json(v) for k, v in obj['attrs'].items()} + kwargs['attrs'] = AttrsSchema.from_json(obj["attrs"]) return cls(**kwargs) @@ -80,7 +80,7 @@ def validate(self, ds: xr.Dataset) -> None: da_schema.validate(ds.data_vars[key]) if self.coords is not None: - self.coords.validate(self.coords) + self.coords.validate(ds.coords) if self.attrs: self.attrs.validate(ds.attrs) From d3ba8807e2b69a26a98521c8ee927a13df0f0a5a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:59:35 +0000 Subject: [PATCH 3/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_core.py | 4 ++-- xarray_schema/dataset.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index ba0d7d3..5261e9b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -227,13 +227,13 @@ def test_dataset_example(ds): 'bar': DataArraySchema(name='bar', dtype=np.floating, dims=['x', 'y']), }, coords={'x': DataArraySchema(name='x', dtype=np.int64, dims=['x'])}, - attrs={} + attrs={}, ) jsonschema.validate(ds_schema.json, ds_schema._json_schema) assert list(ds_schema.json['data_vars'].keys()) == ['foo', 'bar'] - assert list(ds_schema.json['coords']["coords"].keys()) == ['x'] + assert list(ds_schema.json['coords']['coords'].keys()) == ['x'] ds_schema.validate(ds) ds2 = ds.copy() diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index 4ab7c0a..a7d2289 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -47,9 +47,9 @@ def from_json(cls, obj: dict): k: DataArraySchema.from_json(v) for k, v in obj['data_vars'].items() } if 'coords' in obj: - kwargs['coords'] = CoordsSchema.from_json(obj["coords"]) + kwargs['coords'] = CoordsSchema.from_json(obj['coords']) if 'attrs' in obj: - kwargs['attrs'] = AttrsSchema.from_json(obj["attrs"]) + kwargs['attrs'] = AttrsSchema.from_json(obj['attrs']) return cls(**kwargs) From fb05733bcfe01b491885a94a06b06cefc9275596 Mon Sep 17 00:00:00 2001 From: garciam Date: Tue, 9 Jan 2024 17:32:20 +0100 Subject: [PATCH 4/5] try to fix the attributes validation and roundtrip to JSON --- tests/test_core.py | 12 ++++++++---- xarray_schema/components.py | 9 ++++----- xarray_schema/dataarray.py | 4 ++-- xarray_schema/dataset.py | 6 +++--- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index ba0d7d3..5f1c58e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -24,7 +24,7 @@ def ds(): ds = xr.Dataset( { 'x': xr.DataArray(np.arange(4) - 2, dims='x'), - 'foo': xr.DataArray(np.ones(4, dtype='i4'), dims='x'), + 'foo': xr.DataArray(np.ones(4, dtype='i4'), dims='x', attrs=dict(units="K")), 'bar': xr.DataArray(np.arange(8, dtype=np.float32).reshape(4, 2), dims=('x', 'y')), } ) @@ -223,11 +223,15 @@ def test_dataset_empty_constructor(): def test_dataset_example(ds): ds_schema = DatasetSchema( data_vars={ - 'foo': DataArraySchema(name='foo', dtype=np.int32, dims=['x']), + 'foo': DataArraySchema( + name='foo', + dtype=np.int32, + dims=['x'], + attrs=AttrsSchema(attrs=dict(units=AttrSchema(value="K"))) + ), 'bar': DataArraySchema(name='bar', dtype=np.floating, dims=['x', 'y']), }, coords={'x': DataArraySchema(name='x', dtype=np.int64, dims=['x'])}, - attrs={} ) jsonschema.validate(ds_schema.json, ds_schema._json_schema) @@ -284,7 +288,7 @@ def test_dataset_with_attrs_schema(): expected_value = 'expected_value' actual_value = 'actual_value' ds = xr.Dataset(attrs={name: actual_value}) - ds_schema = DatasetSchema(attrs={name: AttrSchema(value=expected_value)}) + ds_schema = DatasetSchema(dict(attrs={name: AttrSchema(value=expected_value)})) jsonschema.validate(ds_schema.json, ds_schema._json_schema) ds_schema_2 = DatasetSchema(attrs=AttrsSchema({name: AttrSchema(value=expected_value)})) diff --git a/xarray_schema/components.py b/xarray_schema/components.py index d212ddc..7c21474 100644 --- a/xarray_schema/components.py +++ b/xarray_schema/components.py @@ -396,13 +396,12 @@ def __init__( @classmethod def from_json(cls, obj: dict): - attrs = {} - for key, val in obj['attrs'].items(): - attrs[key] = AttrSchema(**val) + attrs = obj.pop("attrs") if "attrs" in obj else {} + attrs = {k: AttrSchema(**v) for k, v in attrs.items()} return cls( attrs, - require_all_keys=obj['require_all_keys'], - allow_extra_keys=obj['allow_extra_keys'], + require_all_keys=obj.get('require_all_keys'), + allow_extra_keys=obj.get('allow_extra_keys') ) def validate(self, attrs: Any) -> None: diff --git a/xarray_schema/dataarray.py b/xarray_schema/dataarray.py index 4a71e62..7f3d8d0 100644 --- a/xarray_schema/dataarray.py +++ b/xarray_schema/dataarray.py @@ -149,7 +149,7 @@ def attrs(self, value): if value is None or isinstance(value, AttrsSchema): self._attrs = value else: - self._attrs = AttrsSchema(value) + self._attrs = AttrsSchema(**value) @property def coords(self) -> Optional[CoordsSchema]: @@ -213,7 +213,7 @@ def validate(self, da: xr.DataArray) -> None: if self.chunks is not None: self.chunks.validate(da.chunks, da.dims, da.shape) - if self.attrs: + if self.attrs is not None: self.attrs.validate(da.attrs) if self.array_type is not None: diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index 4ab7c0a..ca96756 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -48,7 +48,7 @@ def from_json(cls, obj: dict): } if 'coords' in obj: kwargs['coords'] = CoordsSchema.from_json(obj["coords"]) - if 'attrs' in obj: + if 'attrs' in obj and obj["attrs"] != {}: kwargs['attrs'] = AttrsSchema.from_json(obj["attrs"]) return cls(**kwargs) @@ -82,7 +82,7 @@ def validate(self, ds: xr.Dataset) -> None: if self.coords is not None: self.coords.validate(ds.coords) - if self.attrs: + if self.attrs is not None: self.attrs.validate(ds.attrs) if self.checks: @@ -98,7 +98,7 @@ def attrs(self, value: Union[AttrsSchema, Dict[Hashable, Any], None]): if value is None or isinstance(value, AttrsSchema): self._attrs = value else: - self._attrs = AttrsSchema(value) + self._attrs = AttrsSchema(**value) @property def data_vars(self) -> Optional[Dict[Hashable, Optional[DataArraySchema]]]: From 2ae96d5b67fe49cb333e9dc68cde392416c1d80e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 17 Mar 2026 12:31:43 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_core.py | 4 ++-- xarray_schema/components.py | 4 ++-- xarray_schema/dataset.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index ce8c285..fa40524 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -24,7 +24,7 @@ def ds(): ds = xr.Dataset( { 'x': xr.DataArray(np.arange(4) - 2, dims='x'), - 'foo': xr.DataArray(np.ones(4, dtype='i4'), dims='x', attrs=dict(units="K")), + 'foo': xr.DataArray(np.ones(4, dtype='i4'), dims='x', attrs=dict(units='K')), 'bar': xr.DataArray(np.arange(8, dtype=np.float32).reshape(4, 2), dims=('x', 'y')), } ) @@ -227,7 +227,7 @@ def test_dataset_example(ds): name='foo', dtype=np.int32, dims=['x'], - attrs=AttrsSchema(attrs=dict(units=AttrSchema(value="K"))) + attrs=AttrsSchema(attrs=dict(units=AttrSchema(value='K'))) ), 'bar': DataArraySchema(name='bar', dtype=np.floating, dims=['x', 'y']), }, diff --git a/xarray_schema/components.py b/xarray_schema/components.py index 7c21474..e56f71f 100644 --- a/xarray_schema/components.py +++ b/xarray_schema/components.py @@ -396,12 +396,12 @@ def __init__( @classmethod def from_json(cls, obj: dict): - attrs = obj.pop("attrs") if "attrs" in obj else {} + attrs = obj.pop('attrs') if 'attrs' in obj else {} attrs = {k: AttrSchema(**v) for k, v in attrs.items()} return cls( attrs, require_all_keys=obj.get('require_all_keys'), - allow_extra_keys=obj.get('allow_extra_keys') + allow_extra_keys=obj.get('allow_extra_keys'), ) def validate(self, attrs: Any) -> None: diff --git a/xarray_schema/dataset.py b/xarray_schema/dataset.py index ca96756..8146ef1 100644 --- a/xarray_schema/dataset.py +++ b/xarray_schema/dataset.py @@ -47,9 +47,9 @@ def from_json(cls, obj: dict): k: DataArraySchema.from_json(v) for k, v in obj['data_vars'].items() } if 'coords' in obj: - kwargs['coords'] = CoordsSchema.from_json(obj["coords"]) - if 'attrs' in obj and obj["attrs"] != {}: - kwargs['attrs'] = AttrsSchema.from_json(obj["attrs"]) + kwargs['coords'] = CoordsSchema.from_json(obj['coords']) + if 'attrs' in obj and obj['attrs'] != {}: + kwargs['attrs'] = AttrsSchema.from_json(obj['attrs']) return cls(**kwargs)