From 580c81b903b8f195f91b4d15f2551b08af5c9fa1 Mon Sep 17 00:00:00 2001 From: Jan Lukas Rinker Date: Thu, 5 Feb 2026 14:39:40 +0100 Subject: [PATCH 1/2] Fix TableModel validator to accept pandas StringDtype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TableModel validator now properly accepts modern pandas StringDtype for instance_key columns, along with CategoricalDtype with string categories. This fixes #1062 where the validator incorrectly rejected StringDtype columns, forcing users to use deprecated object dtypes. The new validation logic: - Explicitly checks for pd.StringDtype instances - Accepts pd.CategoricalDtype with string categories - Maintains backward compatibility with integer and object dtypes - Provides clearer error messages 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- src/spatialdata/models/models.py | 49 ++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index 6a126b02..f68b01ac 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -1047,25 +1047,38 @@ def _validate_table_annotation_metadata(self, data: AnnData) -> None: raise ValueError(f"`{attr[self.REGION_KEY_KEY]}` not found in `adata.obs`. Please create the column.") if attr[self.INSTANCE_KEY] not in data.obs: raise ValueError(f"`{attr[self.INSTANCE_KEY]}` not found in `adata.obs`. Please create the column.") - if ( - (dtype := data.obs[attr[self.INSTANCE_KEY]].dtype) - not in [ - int, - np.int16, - np.uint16, - np.int32, - np.uint32, - np.int64, - np.uint64, - "O", - ] - and not pd.api.types.is_string_dtype(data.obs[attr[self.INSTANCE_KEY]]) - or (dtype == "O" and (val_dtype := type(data.obs[attr[self.INSTANCE_KEY]].iloc[0])) is not str) - ): - dtype = dtype if dtype != "O" else val_dtype + dtype = data.obs[attr[self.INSTANCE_KEY]].dtype + + # Check if dtype is valid for instance_key column + is_valid_dtype = False + + # Check for integer types + if dtype in [int, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64]: + is_valid_dtype = True + # Check for pandas StringDtype + elif isinstance(dtype, pd.StringDtype): + is_valid_dtype = True + # Check for CategoricalDtype with string categories + elif isinstance(dtype, pd.CategoricalDtype): + if pd.api.types.is_string_dtype(dtype.categories.dtype) or isinstance(dtype.categories.dtype, pd.StringDtype): + is_valid_dtype = True + # Check for object dtype with string values + elif dtype == "O": + if len(data.obs[attr[self.INSTANCE_KEY]]) > 0: + val_dtype = type(data.obs[attr[self.INSTANCE_KEY]].iloc[0]) + if val_dtype is str: + is_valid_dtype = True + else: + # Empty column with object dtype is acceptable + is_valid_dtype = True + # Fallback check using pandas is_string_dtype + elif pd.api.types.is_string_dtype(dtype): + is_valid_dtype = True + + if not is_valid_dtype: raise TypeError( - f"Only int, np.int16, np.int32, np.int64, uint equivalents or string allowed as dtype for " - f"instance_key column in obs. Dtype found to be {dtype}" + f"Only int, np.int16, np.int32, np.int64, uint equivalents, pandas StringDtype, or string " + f"allowed as dtype for instance_key column in obs. Dtype found to be {dtype}" ) expected_regions = attr[self.REGION_KEY] if isinstance(attr[self.REGION_KEY], list) else [attr[self.REGION_KEY]] found_regions = data.obs[attr[self.REGION_KEY_KEY]].unique().tolist() From eb19535418486dc8f88c5d0768b753b6048c9c17 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 5 Feb 2026 13:47:24 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata/models/models.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/spatialdata/models/models.py b/src/spatialdata/models/models.py index f68b01ac..e148a986 100644 --- a/src/spatialdata/models/models.py +++ b/src/spatialdata/models/models.py @@ -1053,14 +1053,15 @@ def _validate_table_annotation_metadata(self, data: AnnData) -> None: is_valid_dtype = False # Check for integer types - if dtype in [int, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64]: - is_valid_dtype = True - # Check for pandas StringDtype - elif isinstance(dtype, pd.StringDtype): + if dtype in [int, np.int16, np.uint16, np.int32, np.uint32, np.int64, np.uint64] or isinstance( + dtype, pd.StringDtype + ): is_valid_dtype = True # Check for CategoricalDtype with string categories elif isinstance(dtype, pd.CategoricalDtype): - if pd.api.types.is_string_dtype(dtype.categories.dtype) or isinstance(dtype.categories.dtype, pd.StringDtype): + if pd.api.types.is_string_dtype(dtype.categories.dtype) or isinstance( + dtype.categories.dtype, pd.StringDtype + ): is_valid_dtype = True # Check for object dtype with string values elif dtype == "O":