From b2f01dbd857f356a7a0795266706c8dbb2fbe030 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Wed, 12 Jun 2024 22:13:13 +1200 Subject: [PATCH 1/5] refactor(datafile): ignore "text" parameter, add attributes from file --- autotest/test_binaryfile.py | 82 ++++++++++++++++++++++++++++- autotest/test_formattedfile.py | 1 + flopy/export/utils.py | 15 +++--- flopy/export/vtk.py | 6 ++- flopy/mf6/utils/binaryfile_utils.py | 6 +-- flopy/utils/binaryfile/__init__.py | 82 ++++++++++++++++------------- flopy/utils/datafile.py | 9 +++- 7 files changed, 150 insertions(+), 51 deletions(-) diff --git a/autotest/test_binaryfile.py b/autotest/test_binaryfile.py index 59558d98a2..85ec9047f0 100644 --- a/autotest/test_binaryfile.py +++ b/autotest/test_binaryfile.py @@ -3,6 +3,7 @@ See also test_cellbudgetfile.py for similar tests. """ +import warnings from itertools import repeat import numpy as np @@ -99,6 +100,8 @@ def test_headfile_build_index(example_data_path): assert hds.ncol == 20 assert hds.nlay == 3 assert not hasattr(hds, "nper") + assert hds.text == "head" + assert hds.text_bytes == b"HEAD".rjust(16) assert hds.totalbytes == 10_676_004 assert len(hds.recordarray) == 3291 assert type(hds.recordarray) == np.ndarray @@ -145,7 +148,80 @@ def test_headfile_build_index(example_data_path): ) -def test_concentration_build_index(example_data_path): +@pytest.mark.parametrize( + "pth, expected", + [ + pytest.param( + "mf6-freyberg/freyberg.hds", + { + "precision": "double", + "nlay, nrow, ncol": (1, 40, 20), + "text": "head", + "text_bytes": b"HEAD".ljust(16), + "len(obj)": 1, + }, + id="freyberg.hds", + ), + pytest.param( + "mf6/create_tests/test_transport/expected_output/gwt_mst03.ucn", + { + "precision": "double", + "nlay, nrow, ncol": (1, 1, 1), + "text": "concentration", + "text_bytes": b"CONCENTRATION".ljust(16), + "len(obj)": 28, + }, + id="gwt_mst03.ucn", + ), + pytest.param( + "mfusg_test/03A_conduit_unconfined/output/ex3A.cln.hds", + { + "precision": "single", + "nlay, nrow, ncol": (1, 1, 2), + "text": "cln_heads", + "text_bytes": b"CLN HEADS".rjust(16), + "len(obj)": 1, + }, + id="ex3A.cln.hds", + ), + pytest.param( + "mfusg_test/03A_conduit_unconfined/output/ex3A.ddn", + { + "precision": "single", + "nlay, nrow, ncol": (2, 100, 100), + "text": "drawdown", + "text_bytes": b"DRAWDOWN".rjust(16), + "len(obj)": 2, + }, + id="ex3A.ddn", + ), + ], +) +def test_headfile_examples(example_data_path, pth, expected): + with HeadFile(example_data_path / pth) as obj: + assert obj.precision == expected["precision"] + assert (obj.nlay, obj.nrow, obj.ncol) == expected["nlay, nrow, ncol"] + assert obj.text == expected["text"] + assert obj.text_bytes == expected["text_bytes"] + assert len(obj) == expected["len(obj)"] + + +@pytest.mark.parametrize( + "pth", + [ + "mt3d_test/mf96mt3d/P01/case1b/MT3D001.UCN", + "unstructured/headu.githds", + ], +) +def test_not_headfile(example_data_path, pth): + # These examples pass get_headfile_precision, but are not HeadFiles + with pytest.raises(ValueError, match="cannot read file with HeadFile"): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + HeadFile(example_data_path / pth) + + +def test_ucnfile_build_index(example_data_path): # test low-level BinaryLayerFile._build_index() method with UCN file pth = example_data_path / "mt3d_test/mf2005mt3d/P07/MT3D001.UCN" with UcnFile(pth) as ucn: @@ -154,6 +230,8 @@ def test_concentration_build_index(example_data_path): assert ucn.ncol == 21 assert ucn.nlay == 8 assert not hasattr(ucn, "nper") + assert ucn.text == "concentration" + assert ucn.text_bytes == b"CONCENTRATION".ljust(16) assert ucn.totalbytes == 10_432 assert len(ucn.recordarray) == 8 assert type(ucn.recordarray) == np.ndarray @@ -286,6 +364,8 @@ def test_headu_file_data(function_tmpdir, example_data_path): headobj = HeadUFile(fname) assert isinstance(headobj, HeadUFile) assert headobj.nlay == 3 + assert headobj.text == "headu" + assert headobj.text_bytes == b"HEADU".rjust(16) # ensure recordarray is has correct data ra = headobj.recordarray diff --git a/autotest/test_formattedfile.py b/autotest/test_formattedfile.py index 26ac20f451..479927f009 100644 --- a/autotest/test_formattedfile.py +++ b/autotest/test_formattedfile.py @@ -21,6 +21,7 @@ def test_headfile_build_index(example_data_path): assert hds.ncol == 10 assert hds.nlay == 1 assert not hasattr(hds, "nper") + assert hds.text == "head" assert hds.totalbytes == 1613 assert len(hds.recordarray) == 1 assert type(hds.recordarray) == np.ndarray diff --git a/flopy/export/utils.py b/flopy/export/utils.py index a71b2289c3..c7a162c48b 100644 --- a/flopy/export/utils.py +++ b/flopy/export/utils.py @@ -159,6 +159,9 @@ def _add_output_nc_variable( array = np.zeros((len(times), shape3d[0], shape3d[1], shape3d[2]), dtype=np.float32) array[:] = np.nan + if isinstance(text, bytes): + text = text.decode("ascii") + if isinstance(out_obj, ZBNetOutput): a = np.asarray(out_obj.zone_array, dtype=np.float32) if mask_array3d is not None: @@ -177,7 +180,7 @@ def _add_output_nc_variable( else: a = out_obj.get_data(totim=t) except Exception as e: - nme = var_name + text.decode().strip().lower() + nme = var_name + text estr = f"error getting data for {nme} at time {t}:{e!s}" if logger: logger.warn(estr) @@ -189,7 +192,7 @@ def _add_output_nc_variable( try: array[i, :, :, :] = a.astype(np.float32) except Exception as e: - nme = var_name + text.decode().strip().lower() + nme = var_name + text estr = f"error assigning {nme} data to array for time {t}:{e!s}" if logger: logger.warn(estr) @@ -207,7 +210,7 @@ def _add_output_nc_variable( if isinstance(nc, dict): if text: - var_name = text.decode().strip().lower() + var_name = text nc[var_name] = array return nc @@ -217,7 +220,7 @@ def _add_output_nc_variable( precision_str = "f4" if text: - var_name = text.decode().strip().lower() + var_name = text attribs = {"long_name": var_name} attribs["coordinates"] = "time layer latitude longitude" attribs["min"] = mn @@ -426,7 +429,7 @@ def output_helper( times, shape3d, out_obj, - "concentration", + out_obj.text, logger=logger, mask_vals=mask_vals, mask_array3d=mask_array3d, @@ -438,7 +441,7 @@ def output_helper( times, shape3d, out_obj, - out_obj.text.decode(), + out_obj.text, logger=logger, mask_vals=mask_vals, mask_array3d=mask_array3d, diff --git a/flopy/export/vtk.py b/flopy/export/vtk.py index 1155f49c2f..5ea88dc86e 100644 --- a/flopy/export/vtk.py +++ b/flopy/export/vtk.py @@ -1197,14 +1197,18 @@ def add_heads(self, hds, kstpkper=None, masked_values=None): kstpkpers = hds.get_kstpkper() self._totim = dict(zip(kstpkpers, times)) +<<<<<<< HEAD text = hds.text.decode() d = {} +======= + d = dict() +>>>>>>> fb14357a (refactor(datafile): ignore "text" parameter, add attributes from file) for ki in kstpkper: d[ki] = hds.get_data(ki) self.__transient_output_data = False - self.add_transient_array(d, name=text, masked_values=masked_values) + self.add_transient_array(d, name=hds.text, masked_values=masked_values) self.__transient_output_data = True def add_cell_budget(self, cbc, text=None, kstpkper=None, masked_values=None): diff --git a/flopy/mf6/utils/binaryfile_utils.py b/flopy/mf6/utils/binaryfile_utils.py index 91bcd0abd2..1906a73bc1 100644 --- a/flopy/mf6/utils/binaryfile_utils.py +++ b/flopy/mf6/utils/binaryfile_utils.py @@ -192,7 +192,7 @@ def _get_binary_file_object(self, path, bintype, key): elif bintype == "DDN": try: - return bf.HeadFile(path, text="drawdown", precision="double") + return bf.HeadFile(path, precision="double") except AssertionError: raise AssertionError(f"{self.dataDict[key]} does not exist") @@ -333,9 +333,7 @@ def _setbinarykeys(self, binarypathdict): elif key[1] == "DDN": try: - readddn = bf.HeadFile( - path, text="drawdown", precision="double" - ) + readddn = bf.HeadFile(path, precision="double") self.dataDict[(key[0], key[1], "DRAWDOWN")] = path readddn.close() diff --git a/flopy/utils/binaryfile/__init__.py b/flopy/utils/binaryfile/__init__.py index 07027f8524..ca8446a6e1 100644 --- a/flopy/utils/binaryfile/__init__.py +++ b/flopy/utils/binaryfile/__init__.py @@ -322,6 +322,10 @@ def _build_index(self): header = self._get_header() self.nrow = header["nrow"] self.ncol = header["ncol"] + self.text_bytes = header["text"] + self.text = ( + self.text_bytes.decode("ascii").strip().lower().replace(" ", "_") + ) if header["ilay"] > self.nlay: self.nlay = header["ilay"] @@ -341,8 +345,12 @@ def _build_index(self): while ipos < self.totalbytes: header = self._get_header() self.recordarray.append(header) - if self.text.upper() not in header["text"]: - continue + if header["text"] != self.text_bytes: + warnings.warn( + "inconsistent text headers changing from " + f"{self.text_bytes!r} to {header['text']!r}", + UserWarning, + ) if ipos == 0: self.times.append(header["totim"]) self.kstpkper.append((header["kstp"], header["kper"])) @@ -354,6 +362,8 @@ def _build_index(self): ipos = self.file.tell() self.iposarray.append(ipos) databytes = self.get_databytes(header) + if ipos + databytes > self.totalbytes: + raise EOFError(f"attempting to seek {ipos + databytes}") self.file.seek(databytes, 1) ipos = self.file.tell() @@ -509,14 +519,13 @@ class HeadFile(BinaryLayerFile): ---------- filename : str or PathLike Path of the head file. - text : string - Name of the text string in the head file. Default is 'head'. - precision : string - Precision of floating point head data in the value. Accepted - values are 'auto', 'single' or 'double'. Default is 'auto', - which enables automatic detection of precision. - verbose : bool - Toggle logging output. Default is False. + text : str + Ignored. + precision : {'auto', 'single', 'double'} + Precision of floating point head data in the value. Default + 'auto' enables automatic detection of precision. + verbose : bool, default False + Toggle logging output. Examples -------- @@ -535,12 +544,11 @@ class HeadFile(BinaryLayerFile): def __init__( self, filename: Union[str, PathLike], - text="head", + text="head", # noqa ARG002 precision="auto", verbose=False, **kwargs, ): - self.text = text.encode() if precision == "auto": precision = get_headfile_precision(filename) if precision == "unknown": @@ -646,14 +654,15 @@ class UcnFile(BinaryLayerFile): Parameters ---------- - filename : string - Name of the concentration file - text : string - Name of the text string in the ucn file. Default is 'CONCENTRATION' - precision : string - 'auto', 'single' or 'double'. Default is 'auto'. - verbose : bool - Write information to the screen. Default is False. + filename : str or PathLike + Path of the concentration file. + text : str + Ignored. + precision : {'auto', 'single', 'double'} + Precision of floating point values. Default 'auto' enables automatic + detection of precision. + verbose : bool, default False + Write information to the screen. Attributes ---------- @@ -689,12 +698,11 @@ class UcnFile(BinaryLayerFile): def __init__( self, filename, - text="concentration", + text="concentration", # noqa ARG002 precision="auto", verbose=False, **kwargs, ): - self.text = text.encode() if precision == "auto": precision = get_headfile_precision(filename) if precision == "unknown": @@ -714,14 +722,13 @@ class HeadUFile(BinaryLayerFile): ---------- filename : str or PathLike Path of the head file - text : string - Name of the text string in the head file. Default is 'headu'. - precision : string - Precision of the floating point head data in the file. Accepted - values are 'auto', 'single' or 'double'. Default is 'auto', which - enables precision to be automatically detected. - verbose : bool - Toggle logging output. Default is False. + text : str + Ignored. + precision : {'auto', 'single', 'double'} + Precision of floating point values. Default 'auto' enables automatic + detection of precision. + verbose : bool, default False + Toggle logging output. Notes ----- @@ -752,7 +759,7 @@ class HeadUFile(BinaryLayerFile): def __init__( self, filename: Union[str, PathLike], - text="headu", + text="headu", # noqa ARG002 precision="auto", verbose=False, **kwargs, @@ -760,7 +767,6 @@ def __init__( """ Class constructor """ - self.text = text.encode() if precision == "auto": precision = get_headfile_precision(filename) if precision == "unknown": @@ -923,11 +929,11 @@ class CellBudgetFile: ---------- filename : str or PathLike Path of the cell budget file. - precision : string - Precision of floating point budget data in the file. Accepted - values are 'single' or 'double'. Default is 'single'. - verbose : bool - Toggle logging output. Default is False. + precision : {'auto', 'single', 'double'} + Precision of floating point values. Default 'auto' enables automatic + detection of precision. + verbose : bool, default False + Toggle logging output. Examples -------- @@ -2296,6 +2302,8 @@ def reverse(self, filename: Optional[PathLike] = None): this method must reverse not only the order but also the sign (direction) of the model's intercell flows. + Parameters + ---------- filename : str or PathLike, optional Path of the reversed binary cell budget file. """ diff --git a/flopy/utils/datafile.py b/flopy/utils/datafile.py index cf4960fb15..856d8730c1 100644 --- a/flopy/utils/datafile.py +++ b/flopy/utils/datafile.py @@ -213,8 +213,13 @@ def __init__(self, filename: Union[str, PathLike], precision, verbose, **kwargs) args = ",".join(kwargs.keys()) raise ValueError(f"LayerFile error: unrecognized kwargs: {args}") - # read through the file and build the pointer index - self._build_index() + try: + # read through the file and build the pointer index + self._build_index() + except EOFError: + raise ValueError( + f"cannot read file with {self.__class__.__name__}" + ) # now that we read the data and know nrow and ncol, # we can make a generic modelgrid if needed From 48ad5f0b1f8b52dfc160e97e640441adbb7d087b Mon Sep 17 00:00:00 2001 From: Bonelli Date: Thu, 19 Feb 2026 04:54:48 -0500 Subject: [PATCH 2/5] rework --- autotest/test_binaryfile.py | 122 +++++++++++---- flopy/export/vtk.py | 6 - flopy/utils/__init__.py | 9 +- flopy/utils/binaryfile/__init__.py | 236 +++++++++++++++++++++-------- flopy/utils/datafile.py | 4 +- 5 files changed, 282 insertions(+), 95 deletions(-) diff --git a/autotest/test_binaryfile.py b/autotest/test_binaryfile.py index 85ec9047f0..4e8642cc58 100644 --- a/autotest/test_binaryfile.py +++ b/autotest/test_binaryfile.py @@ -16,6 +16,7 @@ import flopy from flopy.utils import ( BinaryHeader, + BinaryLayerFile, CellBudgetFile, HeadFile, HeadUFile, @@ -148,20 +149,33 @@ def test_headfile_build_index(example_data_path): ) +def test_headfile_examples(example_data_path): + # HeadFile with default text='head' + pth = example_data_path / "mf6-freyberg/freyberg.hds" + with HeadFile(pth) as obj: + assert obj.precision == "double" + assert (obj.nlay, obj.nrow, obj.ncol) == (1, 40, 20) + assert obj.text == "head" + assert obj.text_bytes == b"HEAD".ljust(16) + assert len(obj) == 1 + + # HeadFile with explicit text='drawdown' for a drawdown file + pth = example_data_path / "mfusg_test/03A_conduit_unconfined/output/ex3A.ddn" + with HeadFile(pth, text="drawdown") as obj: + assert obj.precision == "single" + assert (obj.nlay, obj.nrow, obj.ncol) == (2, 100, 100) + assert obj.text == "drawdown" + assert obj.text_bytes == b"DRAWDOWN".rjust(16) + assert len(obj) == 2 + + # HeadFile with default text='head' raises on non-head file + with pytest.raises(ValueError, match="no records with text='head'"): + HeadFile(pth) + + @pytest.mark.parametrize( "pth, expected", [ - pytest.param( - "mf6-freyberg/freyberg.hds", - { - "precision": "double", - "nlay, nrow, ncol": (1, 40, 20), - "text": "head", - "text_bytes": b"HEAD".ljust(16), - "len(obj)": 1, - }, - id="freyberg.hds", - ), pytest.param( "mf6/create_tests/test_transport/expected_output/gwt_mst03.ucn", { @@ -197,8 +211,9 @@ def test_headfile_build_index(example_data_path): ), ], ) -def test_headfile_examples(example_data_path, pth, expected): - with HeadFile(example_data_path / pth) as obj: +def test_binarylayerfile_examples(example_data_path, pth, expected): + # BinaryLayerFile auto-detects text from file + with BinaryLayerFile(example_data_path / pth) as obj: assert obj.precision == expected["precision"] assert (obj.nlay, obj.nrow, obj.ncol) == expected["nlay, nrow, ncol"] assert obj.text == expected["text"] @@ -206,19 +221,74 @@ def test_headfile_examples(example_data_path, pth, expected): assert len(obj) == expected["len(obj)"] -@pytest.mark.parametrize( - "pth", - [ - "mt3d_test/mf96mt3d/P01/case1b/MT3D001.UCN", - "unstructured/headu.githds", - ], -) -def test_not_headfile(example_data_path, pth): - # These examples pass get_headfile_precision, but are not HeadFiles - with pytest.raises(ValueError, match="cannot read file with HeadFile"): - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - HeadFile(example_data_path / pth) +def _write_binary_layer_record(f, data, kstp=1, kper=1, totim=1.0, text="HEAD"): + """Write one single-precision binary layer record to open file f.""" + nrow, ncol = data.shape + text_bytes = text.encode("ascii").ljust(16)[:16] + header = np.array( + [(kstp, kper, totim, totim, text_bytes, ncol, nrow, 1)], + dtype=[ + ("kstp", ">>>>>> fb14357a (refactor(datafile): ignore "text" parameter, add attributes from file) for ki in kstpkper: d[ki] = hds.get_data(ki) diff --git a/flopy/utils/__init__.py b/flopy/utils/__init__.py index bd15a59b86..b07f3801e5 100644 --- a/flopy/utils/__init__.py +++ b/flopy/utils/__init__.py @@ -23,7 +23,14 @@ from .utl_import import import_optional_dependency # isort:skip from . import get_modflow as get_modflow_module -from .binaryfile import BinaryHeader, CellBudgetFile, HeadFile, HeadUFile, UcnFile +from .binaryfile import ( + BinaryHeader, + BinaryLayerFile, + CellBudgetFile, + HeadFile, + HeadUFile, + UcnFile, +) from .check import check from .flopy_io import read_fixed_var, write_fixed_var from .formattedfile import FormattedHeadFile diff --git a/flopy/utils/binaryfile/__init__.py b/flopy/utils/binaryfile/__init__.py index ca8446a6e1..1c367c8b7d 100644 --- a/flopy/utils/binaryfile/__init__.py +++ b/flopy/utils/binaryfile/__init__.py @@ -298,36 +298,85 @@ def get_headfile_precision(filename: Union[str, PathLike]): class BinaryLayerFile(LayerFile): """ - The BinaryLayerFile class is a parent class from which concrete - classes inherit. This class should not be instantiated directly. + Reads layered MODFLOW binary output files (head, drawdown, stage, etc.). + + This class can be instantiated directly and is the recommended entry + point for MF6 advanced package output (LAK stage, SFR flow, UZF depth, + MAW head, etc.) where the text label varies by package. + + Parameters + ---------- + filename : str or PathLike + Path of the binary output file. + text : str or None, optional + Text label to scope this instance to. If None (default), the label + is read from the first record in the file. If the file contains + multiple record types a warning is issued and the first type found + is used; pass ``text=`` explicitly to select a different type. + precision : {'auto', 'single', 'double'} + Floating-point precision. Default ``'auto'`` detects from the file. + verbose : bool, default False + Toggle logging output. Notes ----- - - The BinaryLayerFile class is built on a record array consisting of - headers, which are record arrays of the modflow header information - (kstp, kper, pertim, totim, text, nrow, ncol, ilay), and long ints - pointing to the 1st byte of data for the corresponding data arrays. + The class is built on a record array of headers (kstp, kper, pertim, + totim, text, nrow, ncol, ilay) and an integer array of byte offsets + pointing to the first byte of each data record. Only records whose + text label matches ``self.text`` appear in ``recordarray``; all records + in the file appear in the ``headers`` DataFrame. """ - def __init__(self, filename: Union[str, PathLike], precision, verbose, **kwargs): + def __init__( + self, + filename: Union[str, PathLike], + text: Optional[str] = None, + precision: str = "auto", + verbose: bool = False, + **kwargs, + ): + self._requested_text = text + if precision == "auto": + precision = get_headfile_precision(filename) + if precision == "unknown": + s = f"Error. Precision could not be determined for {filename}" + print(s) + raise Exception() + if not hasattr(self, "header_dtype"): + self.header_dtype = BinaryHeader.set_dtype( + bintype="Head", precision=precision + ) super().__init__(filename, precision, verbose, **kwargs) + @staticmethod + def _decode_text(text_bytes: bytes) -> str: + """Decode raw 16-byte text field to a normalised string. + + Raises EOFError on non-ASCII bytes so that LayerFile.__init__ can + convert it to a clear ValueError (wrong file format / precision). + """ + try: + return text_bytes.decode("ascii").strip().lower().replace(" ", "_") + except UnicodeDecodeError: + raise EOFError(f"non-ASCII text field: {text_bytes!r}") + def _build_index(self): """ Build the recordarray and iposarray, which maps the header information to the position in the binary file. + recordarray / iposarray contain only records whose text label matches + self.text (used by all query methods). The headers DataFrame contains + every record in the file regardless of text label. """ - header = self._get_header() - self.nrow = header["nrow"] - self.ncol = header["ncol"] - self.text_bytes = header["text"] + requested = self._requested_text + # target text in normalised form; None means auto-detect from file self.text = ( - self.text_bytes.decode("ascii").strip().lower().replace(" ", "_") + self._decode_text(requested.encode("ascii")) + if requested is not None + else None ) - if header["ilay"] > self.nlay: - self.nlay = header["ilay"] + self.text_bytes = None # set when first matching record is found if self.nrow < 0 or self.ncol < 0: raise ValueError("negative nrow, ncol") @@ -341,39 +390,79 @@ def _build_index(self): self.file.seek(0, 2) self.totalbytes = self.file.tell() self.file.seek(0, 0) + + all_headers = [] # every record → headers DataFrame + all_ipos = [] + text_types_seen: dict = {} # normalised text → count + warn_threshold = 10000000 ipos = 0 + while ipos < self.totalbytes: header = self._get_header() - self.recordarray.append(header) - if header["text"] != self.text_bytes: - warnings.warn( - "inconsistent text headers changing from " - f"{self.text_bytes!r} to {header['text']!r}", - UserWarning, - ) - if ipos == 0: - self.times.append(header["totim"]) - self.kstpkper.append((header["kstp"], header["kper"])) - else: + ipos_data = self.file.tell() # byte position of this record's data + + if header["nrow"] < 0 or header["ncol"] < 0: + raise Exception("negative nrow, ncol") + + header_text = self._decode_text(header["text"]) + text_types_seen[header_text] = text_types_seen.get(header_text, 0) + 1 + + # auto-detect: adopt the first record's text as the target + if self.text is None: + self.text = header_text + + all_headers.append(header) + all_ipos.append(ipos_data) + + if header_text == self.text: + if self.text_bytes is None: + # first matching record: capture bytes and grid dimensions + self.text_bytes = header["text"] + self.nrow = header["nrow"] + self.ncol = header["ncol"] + if self.nrow > 1 and self.nrow * self.ncol > warn_threshold: + warnings.warn( + f"Very large grid, ncol ({self.ncol}) * nrow" + f" ({self.nrow}) > {warn_threshold}" + ) + self.recordarray.append(header) + self.iposarray.append(ipos_data) totim = header["totim"] - if totim != self.times[-1]: + if not self.times or totim != self.times[-1]: self.times.append(totim) self.kstpkper.append((header["kstp"], header["kper"])) - ipos = self.file.tell() - self.iposarray.append(ipos) + databytes = self.get_databytes(header) - if ipos + databytes > self.totalbytes: - raise EOFError(f"attempting to seek {ipos + databytes}") + if ipos_data + databytes > self.totalbytes: + raise EOFError(f"attempting to seek {ipos_data + databytes}") self.file.seek(databytes, 1) ipos = self.file.tell() - # self.recordarray contains a recordarray of all the headers. + if len(text_types_seen) > 1 and self._requested_text is None: + other = sorted(t for t in text_types_seen if t != self.text) + warnings.warn( + f"file contains multiple record types: " + f"{sorted(text_types_seen)!r}; scoped to {self.text!r}. " + f"Use text= to access: {other!r}", + UserWarning, + stacklevel=2, + ) + + if not self.recordarray: + raise ValueError( + f"no records with text={self.text!r} found in file; " + f"file contains: {sorted(text_types_seen)!r}" + ) + + # convert to arrays self.recordarray = np.array(self.recordarray, dtype=self.header_dtype) self.iposarray = np.array(self.iposarray, dtype=np.int64) self.nlay = np.max(self.recordarray["ilay"]) - # provide headers as a pandas frame - self.headers = pd.DataFrame(self.recordarray, index=self.iposarray) + # headers DataFrame contains every record in the file + all_arr = np.array(all_headers, dtype=self.header_dtype) + all_ipos_arr = np.array(all_ipos, dtype=np.int64) + self.headers = pd.DataFrame(all_arr, index=all_ipos_arr) self.headers["text"] = ( self.headers["text"].str.decode("ascii", "strict").str.strip() ) @@ -409,6 +498,21 @@ def _get_header(self): header = binaryread(self.file, self.header_dtype, (1,)) return header[0] + @property + def unique_records(self) -> np.ndarray: + """ + Unique text record types present in the file. + + Returns + ------- + numpy.ndarray + Sorted array of unique, stripped text strings found across all + records in the file (e.g. ``['HEAD', 'DRAWDOWN']``). Useful for + discovering which record types are available before opening + additional instances with ``text=``. + """ + return np.sort(self.headers["text"].unique()) + def get_ts(self, idx): """ Get a time series from the binary file. @@ -519,8 +623,12 @@ class HeadFile(BinaryLayerFile): ---------- filename : str or PathLike Path of the head file. - text : str - Ignored. + text : str, default 'head' + Text label of the records to read. Defaults to ``'head'``; raises + an error if the file contains no records with that label. Pass a + different value (e.g. ``text='drawdown'``) to scope the instance + to a different record type, or use :class:`BinaryLayerFile` + directly for files whose label is not known in advance. precision : {'auto', 'single', 'double'} Precision of floating point head data in the value. Default 'auto' enables automatic detection of precision. @@ -537,6 +645,7 @@ class HeadFile(BinaryLayerFile): >>> ddnobj = bf.HeadFile('model.ddn', text='drawdown', precision='single') >>> ddnobj.headers + >>> ddnobj.unique_records >>> rec = ddnobj.get_data(totim=100.) """ @@ -544,9 +653,9 @@ class HeadFile(BinaryLayerFile): def __init__( self, filename: Union[str, PathLike], - text="head", # noqa ARG002 - precision="auto", - verbose=False, + text: str = "head", + precision: str = "auto", + verbose: bool = False, **kwargs, ): if precision == "auto": @@ -556,7 +665,9 @@ def __init__( f"Error. Precision could not be determined for {filename}" ) self.header_dtype = BinaryHeader.set_dtype(bintype="Head", precision=precision) - super().__init__(filename, precision, verbose, **kwargs) + super().__init__( + filename, text=text, precision=precision, verbose=verbose, **kwargs + ) def reverse(self, filename: Optional[PathLike] = None): """ @@ -643,9 +754,13 @@ def reverse_header(header): data.tofile(f) # if we rewrote the original file, reinitialize - if inplace: - move(target, filename) - super().__init__(filename, self.precision, self.verbose) + if filename == self.filename: + super().__init__( + self.filename, + text=self._requested_text, + precision=self.precision, + verbose=self.verbose, + ) class UcnFile(BinaryLayerFile): @@ -656,8 +771,9 @@ class UcnFile(BinaryLayerFile): ---------- filename : str or PathLike Path of the concentration file. - text : str - Ignored. + text : str, default 'concentration' + Text label of the records to read. Raises an error if the file + contains no records with that label. precision : {'auto', 'single', 'double'} Precision of floating point values. Default 'auto' enables automatic detection of precision. @@ -698,9 +814,9 @@ class UcnFile(BinaryLayerFile): def __init__( self, filename, - text="concentration", # noqa ARG002 - precision="auto", - verbose=False, + text: str = "concentration", + precision: str = "auto", + verbose: bool = False, **kwargs, ): if precision == "auto": @@ -708,8 +824,9 @@ def __init__( if precision == "unknown": raise ValueError(f"Error. Precision could not be determined for {filename}") self.header_dtype = BinaryHeader.set_dtype(bintype="Ucn", precision=precision) - super().__init__(filename, precision, verbose, **kwargs) - return + super().__init__( + filename, text=text, precision=precision, verbose=verbose, **kwargs + ) class HeadUFile(BinaryLayerFile): @@ -722,8 +839,10 @@ class HeadUFile(BinaryLayerFile): ---------- filename : str or PathLike Path of the head file - text : str - Ignored. + text : str, default 'headu' + Text label identifying the record type to read. Records not matching + this label are excluded from the query interface (times, kstpkper, + get_data, get_ts). Use BinaryLayerFile with text=None to auto-detect. precision : {'auto', 'single', 'double'} Precision of floating point values. Default 'auto' enables automatic detection of precision. @@ -759,14 +878,11 @@ class HeadUFile(BinaryLayerFile): def __init__( self, filename: Union[str, PathLike], - text="headu", # noqa ARG002 - precision="auto", - verbose=False, + text: str = "headu", + precision: str = "auto", + verbose: bool = False, **kwargs, ): - """ - Class constructor - """ if precision == "auto": precision = get_headfile_precision(filename) if precision == "unknown": @@ -774,7 +890,9 @@ def __init__( f"Error. Precision could not be determined for {filename}" ) self.header_dtype = BinaryHeader.set_dtype(bintype="Head", precision=precision) - super().__init__(filename, precision, verbose, **kwargs) + super().__init__( + filename, text=text, precision=precision, verbose=verbose, **kwargs + ) def _get_data_array(self, totim=0.0): """ diff --git a/flopy/utils/datafile.py b/flopy/utils/datafile.py index 856d8730c1..f803be6647 100644 --- a/flopy/utils/datafile.py +++ b/flopy/utils/datafile.py @@ -217,9 +217,7 @@ def __init__(self, filename: Union[str, PathLike], precision, verbose, **kwargs) # read through the file and build the pointer index self._build_index() except EOFError: - raise ValueError( - f"cannot read file with {self.__class__.__name__}" - ) + raise ValueError(f"cannot read file with {self.__class__.__name__}") # now that we read the data and know nrow and ncol, # we can make a generic modelgrid if needed From e73f99bb7720c8c71c197d7bd35beb934c2c775c Mon Sep 17 00:00:00 2001 From: Bonelli Date: Thu, 19 Feb 2026 06:21:21 -0500 Subject: [PATCH 3/5] use BinaryLayerFile in notebook --- .docs/Notebooks/mfusg_conduit_examples.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.docs/Notebooks/mfusg_conduit_examples.py b/.docs/Notebooks/mfusg_conduit_examples.py index bda7588b6e..93821a3156 100644 --- a/.docs/Notebooks/mfusg_conduit_examples.py +++ b/.docs/Notebooks/mfusg_conduit_examples.py @@ -104,7 +104,7 @@ assert success, pformat(buff) head_file = os.path.join(mf.model_ws, "ex3.clnhds") -headobj = flopy.utils.HeadFile(head_file) +headobj = flopy.utils.BinaryLayerFile(head_file) # + simtimes = headobj.get_times() @@ -298,7 +298,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.HeadFile(head_file) +headobj = flopy.utils.BinaryLayerFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) @@ -394,7 +394,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.HeadFile(head_file) +headobj = flopy.utils.BinaryLayerFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) @@ -489,7 +489,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.HeadFile(head_file) +headobj = flopy.utils.BinaryLayerFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) @@ -583,7 +583,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.HeadFile(head_file) +headobj = flopy.utils.BinaryLayerFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) From 34d63181e3e07d4abfacc3b164c4f5518b19a743 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Thu, 19 Feb 2026 09:49:47 -0500 Subject: [PATCH 4/5] fix test --- autotest/test_subwt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/autotest/test_subwt.py b/autotest/test_subwt.py index 45c0e1f5aa..453fc08f7a 100644 --- a/autotest/test_subwt.py +++ b/autotest/test_subwt.py @@ -105,7 +105,7 @@ def test_subwt(function_tmpdir, ibound_path): hds_geo = HeadFile( function_tmpdir / f"{ml.name}.swt_geostatic_stress.hds", - text="stress", + text="geostatic stress", ).get_alldata() hds_eff = HeadFile( function_tmpdir / f"{ml.name}.swt_eff_stress.hds", From 0844df48737b21ad6a324e7f964289b336141b74 Mon Sep 17 00:00:00 2001 From: Bonelli Date: Thu, 19 Feb 2026 19:45:05 -0500 Subject: [PATCH 5/5] restore substring matching --- .docs/Notebooks/mfusg_conduit_examples.py | 10 +++++----- autotest/test_subwt.py | 2 +- flopy/utils/binaryfile/__init__.py | 15 ++++++++++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/.docs/Notebooks/mfusg_conduit_examples.py b/.docs/Notebooks/mfusg_conduit_examples.py index 93821a3156..bda7588b6e 100644 --- a/.docs/Notebooks/mfusg_conduit_examples.py +++ b/.docs/Notebooks/mfusg_conduit_examples.py @@ -104,7 +104,7 @@ assert success, pformat(buff) head_file = os.path.join(mf.model_ws, "ex3.clnhds") -headobj = flopy.utils.BinaryLayerFile(head_file) +headobj = flopy.utils.HeadFile(head_file) # + simtimes = headobj.get_times() @@ -298,7 +298,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.BinaryLayerFile(head_file) +headobj = flopy.utils.HeadFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) @@ -394,7 +394,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.BinaryLayerFile(head_file) +headobj = flopy.utils.HeadFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) @@ -489,7 +489,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.BinaryLayerFile(head_file) +headobj = flopy.utils.HeadFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) @@ -583,7 +583,7 @@ # + head_file = os.path.join(mf.model_ws, f"{modelname}.clnhd") -headobj = flopy.utils.BinaryLayerFile(head_file) +headobj = flopy.utils.HeadFile(head_file) simtimes = headobj.get_times() nper = len(simtimes) diff --git a/autotest/test_subwt.py b/autotest/test_subwt.py index 453fc08f7a..45c0e1f5aa 100644 --- a/autotest/test_subwt.py +++ b/autotest/test_subwt.py @@ -105,7 +105,7 @@ def test_subwt(function_tmpdir, ibound_path): hds_geo = HeadFile( function_tmpdir / f"{ml.name}.swt_geostatic_stress.hds", - text="geostatic stress", + text="stress", ).get_alldata() hds_eff = HeadFile( function_tmpdir / f"{ml.name}.swt_eff_stress.hds", diff --git a/flopy/utils/binaryfile/__init__.py b/flopy/utils/binaryfile/__init__.py index 1c367c8b7d..f9f46de7ac 100644 --- a/flopy/utils/binaryfile/__init__.py +++ b/flopy/utils/binaryfile/__init__.py @@ -394,6 +394,7 @@ def _build_index(self): all_headers = [] # every record → headers DataFrame all_ipos = [] text_types_seen: dict = {} # normalised text → count + text_types_matched: set = set() # text labels matching the substring filter warn_threshold = 10000000 ipos = 0 @@ -414,7 +415,9 @@ def _build_index(self): all_headers.append(header) all_ipos.append(ipos_data) - if header_text == self.text: + # substring match for backward compatibility + if self.text in header_text: + text_types_matched.add(header_text) if self.text_bytes is None: # first matching record: capture bytes and grid dimensions self.text_bytes = header["text"] @@ -448,6 +451,16 @@ def _build_index(self): stacklevel=2, ) + # warn if substring matched multiple distinct text labels (ambiguous) + if len(text_types_matched) > 1: + warnings.warn( + f"text={self.text!r} (substring match) matched multiple record types: " + f"{sorted(text_types_matched)!r}. For precise filtering, use the " + f"exact label or BinaryLayerFile with text=None to auto-detect.", + UserWarning, + stacklevel=2, + ) + if not self.recordarray: raise ValueError( f"no records with text={self.text!r} found in file; "