From 733612514cbadd27ea600b3d9ed0f9e1e0fe252f Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Feb 2026 16:12:47 +0000 Subject: [PATCH 01/11] CU-869c87tt9: Add small bit to workflow to check what noteboo patching does --- .github/workflows/medcat-v2-tutorials_main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index a2b92fd4c..1e9897ac8 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -27,6 +27,9 @@ jobs: - name: Update install targets in notebooks run: | python .ci/patch_notebook_installs.py . + git status + echo "" + git diff - name: Install dependencies run: | From 99726e4bb1dce1ba344d58ab8dab4b8986e4d6de Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Feb 2026 16:57:22 +0000 Subject: [PATCH 02/11] CU-869c87xpy: Add same debug output to migration and relcat workflow --- .github/workflows/medcat-v2-tutorials_main.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index 1e9897ac8..212fff7b7 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -73,6 +73,9 @@ jobs: - name: Update install targets in notebooks run: | python .ci/patch_notebook_installs.py . + git status + echo "" + git diff - name: Install dependencies run: | From 307f674f88b0358fa412e6b33c027e1ab09705c4 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Feb 2026 17:19:34 +0000 Subject: [PATCH 03/11] CU-869c87xpy: Fix install target changers --- .../.ci/patch_notebook_installs.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/medcat-v2-tutorials/.ci/patch_notebook_installs.py b/medcat-v2-tutorials/.ci/patch_notebook_installs.py index 11f88aa53..d33a950ec 100644 --- a/medcat-v2-tutorials/.ci/patch_notebook_installs.py +++ b/medcat-v2-tutorials/.ci/patch_notebook_installs.py @@ -4,15 +4,21 @@ from functools import partial -# rel_install_path = "../medcat-v2/" -# abs_install_path = str(pathlib.Path(rel_install_path).resolve()) +rel_install_path = "../medcat-v2/" +abs_install_path = str(pathlib.Path(rel_install_path).resolve()) # Matches either: -# 1. `! pip install medcat[extras]` +# 1. `! pip install medcat[extras]~=version` # 2. `! pip install medcat[extras] @ git+...` shell_pattern = re.compile( - r'(!\s*pip\s+install\s+)(\\["\']?)medcat(\[.*?\])' - r'(\s*@\s*git\+[^"\'\s]+)?\2' + r'(!\s*pip\s+install\s+)' # group 1: the install command + r'medcat' # literal package name + r'(\[.*?\])?' # group 2: optional extras e.g. [meta-cat,spacy] + r'(?:' + r'\s*@\s*git\+[^"\'\s]+' # either a git URL + r'|' + r'\s*[~=!<>][^"\'\s]*' # or a version specifier e.g. ~=2.4.0, ==2.4.0 + r')' ) req_txt_pattern = re.compile( r'^(medcat(\[.*?\])?)\s*@\s*git\+\S+', flags=re.MULTILINE @@ -20,13 +26,8 @@ def repl_nb(m, file_path: pathlib.Path): - # extras = m[3] - old_url = m[4] - if old_url and "medcat/v" in old_url: - print(f"[WARN] {file_path} refers to alpha/tagged release: " - f"{old_url.strip()}") - # to_write = f'{m[1]}\\"{abs_install_path}{extras}\\"' - to_write = '! pip install \\"pip\\"' + extras = m[2] or "" + to_write = f'! pip install \\"{abs_install_path}{extras}\\"' print(f"[PATCHED] {file_path}\n with: '{to_write}'") return to_write @@ -58,4 +59,4 @@ def main(path: str): print(f"Path {path} does not exist.") sys.exit(1) - main(path) + main(path) \ No newline at end of file From 040ac3611026a55f2a9639a18187f416b8142f64 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Feb 2026 17:27:10 +0000 Subject: [PATCH 04/11] CU-869c87xpy: Apply notebook patch more robustly (i.e quotes) --- medcat-v2-tutorials/.ci/patch_notebook_installs.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/medcat-v2-tutorials/.ci/patch_notebook_installs.py b/medcat-v2-tutorials/.ci/patch_notebook_installs.py index d33a950ec..8a6976a74 100644 --- a/medcat-v2-tutorials/.ci/patch_notebook_installs.py +++ b/medcat-v2-tutorials/.ci/patch_notebook_installs.py @@ -12,13 +12,15 @@ # 2. `! pip install medcat[extras] @ git+...` shell_pattern = re.compile( r'(!\s*pip\s+install\s+)' # group 1: the install command - r'medcat' # literal package name - r'(\[.*?\])?' # group 2: optional extras e.g. [meta-cat,spacy] + r'(\\?"?)' # group 2: optional opening \" + r'medcat' + r'(\[.*?\])?' # group 3: optional extras r'(?:' - r'\s*@\s*git\+[^"\'\s]+' # either a git URL + r'\s*@\s*git\+[^"\'\s]+' r'|' - r'\s*[~=!<>][^"\'\s]*' # or a version specifier e.g. ~=2.4.0, ==2.4.0 + r'\s*[~=!<>][^"\'\\s]*' r')' + r'(\\?"?)' # group 4: optional closing \" ) req_txt_pattern = re.compile( r'^(medcat(\[.*?\])?)\s*@\s*git\+\S+', flags=re.MULTILINE @@ -26,7 +28,7 @@ def repl_nb(m, file_path: pathlib.Path): - extras = m[2] or "" + extras = m[3] or "" to_write = f'! pip install \\"{abs_install_path}{extras}\\"' print(f"[PATCHED] {file_path}\n with: '{to_write}'") return to_write From 0e8c07921aec4cdf782637949a7c5e880de10001 Mon Sep 17 00:00:00 2001 From: mart-r Date: Mon, 23 Feb 2026 18:07:24 +0000 Subject: [PATCH 05/11] CU-869c87xpy: Fix regex eating the ending quote in some cases --- medcat-v2-tutorials/.ci/patch_notebook_installs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/medcat-v2-tutorials/.ci/patch_notebook_installs.py b/medcat-v2-tutorials/.ci/patch_notebook_installs.py index 8a6976a74..0e16f9095 100644 --- a/medcat-v2-tutorials/.ci/patch_notebook_installs.py +++ b/medcat-v2-tutorials/.ci/patch_notebook_installs.py @@ -20,7 +20,8 @@ r'|' r'\s*[~=!<>][^"\'\\s]*' r')' - r'(\\?"?)' # group 4: optional closing \" + # only match \" (escaped quote), never a bare " + r'(\\")?' # group 4: optional closing \" ) req_txt_pattern = re.compile( r'^(medcat(\[.*?\])?)\s*@\s*git\+\S+', flags=re.MULTILINE From 81bbaa8bad25985e04b6fa33c7dd8ccfc2702395 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 26 Feb 2026 11:07:34 +0000 Subject: [PATCH 06/11] CU-869c87xpy: Add failure option for notebook patching script --- .../.ci/patch_notebook_installs.py | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/medcat-v2-tutorials/.ci/patch_notebook_installs.py b/medcat-v2-tutorials/.ci/patch_notebook_installs.py index 0e16f9095..4a201fd60 100644 --- a/medcat-v2-tutorials/.ci/patch_notebook_installs.py +++ b/medcat-v2-tutorials/.ci/patch_notebook_installs.py @@ -2,6 +2,7 @@ import pathlib import re from functools import partial +import argparse rel_install_path = "../medcat-v2/" @@ -36,7 +37,8 @@ def repl_nb(m, file_path: pathlib.Path): def do_patch(nb_path: pathlib.Path, - regex: re.Pattern = shell_pattern, repl_method=repl_nb): + regex: re.Pattern = shell_pattern, + repl_method=repl_nb) -> bool: nb_text = nb_path.read_text(encoding="utf-8") repl = partial(repl_method, file_path=nb_path) @@ -44,22 +46,36 @@ def do_patch(nb_path: pathlib.Path, if nb_text != new_text: nb_path.write_text(new_text, encoding="utf-8") + return True + return False -def main(path: str): +def main(path: str, expect_min_changes: int): + total_changes = 0 for nb_path in pathlib.Path(path).rglob("**/*.ipynb"): - do_patch(nb_path) + if do_patch(nb_path): + total_changes += 1 + if expect_min_changes >= 0 and total_changes < expect_min_changes: + print(f"Expected a minimum of {expect_min_changes} changes," + f"but only found {total_changes} changes. " + "This will force a non-zero exit status so GHA workflow " + "can fail") + sys.exit(1) if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python patch_notebook_installs.py ") - sys.exit(1) + parser = argparse.ArgumentParser() + parser.add_argument("path", help="The path to start looking at", + type=str) + parser.add_argument("--expect-min-changes", "-c", + help="Expect at lest this number of chagnes", + type=int, default=-1) + args = parser.parse_args() - path = sys.argv[1] + path = args.path if not pathlib.Path(path).exists(): print(f"Path {path} does not exist.") sys.exit(1) - main(path) \ No newline at end of file + main(path, args.expect_min_changes) From 555d0d0ff5e3aed6d28dc6007d8eaafc0153c4f2 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 26 Feb 2026 11:09:26 +0000 Subject: [PATCH 07/11] CU-869c87xpy: Remove debug output from workflow --- .github/workflows/medcat-v2-tutorials_main.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index 212fff7b7..0dc849422 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -27,9 +27,6 @@ jobs: - name: Update install targets in notebooks run: | python .ci/patch_notebook_installs.py . - git status - echo "" - git diff - name: Install dependencies run: | From 5f1d40b89950600f745e24a8aba70c8a368fed25 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 26 Feb 2026 11:10:10 +0000 Subject: [PATCH 08/11] CU-869c87xpy: Make workflow fial if/when there is not enough changes in the notebook patcher --- .github/workflows/medcat-v2-tutorials_main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index 0dc849422..8c09f160c 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -26,7 +26,7 @@ jobs: - name: Update install targets in notebooks run: | - python .ci/patch_notebook_installs.py . + python .ci/patch_notebook_installs.py . --expect-min-changes 4 - name: Install dependencies run: | From acaf99bf5ab7a0cafba00c31588357909842bd40 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 26 Feb 2026 11:34:03 +0000 Subject: [PATCH 09/11] CU-869c87xpy: Remove another debug output and fix more patches --- .github/workflows/medcat-v2-tutorials_main.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index 8c09f160c..61f297dc5 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -69,10 +69,7 @@ jobs: - name: Update install targets in notebooks run: | - python .ci/patch_notebook_installs.py . - git status - echo "" - git diff + python .ci/patch_notebook_installs.py . --expect-min-changes 4 - name: Install dependencies run: | From b3eebf90ff1c10db3d830077ff2caff30f511f75 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 26 Feb 2026 11:36:53 +0000 Subject: [PATCH 10/11] CU-869c87xpy: [TO REMOVE] Add debug output --- .github/workflows/medcat-v2-tutorials_main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index 61f297dc5..b64fc247d 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -37,6 +37,11 @@ jobs: - name: Install IPython kernel run: | python -m ipykernel install --name smoketests --user + + - name: DEBUG OUTPUT + run: | + python -c "import medcat;print('ver:', medcat.__version__);print('has:', dir(medcat))" + python -c "import medcat.utils;print('utils has:', dir(medcat.utils))" - name: Smoke test tutorial # NOTE: these need to be run separately so that order is guaranteed run: | From 1d3a3ed10f69c4aaadb1ae3264b8ed7e85b1e597 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 26 Feb 2026 12:14:04 +0000 Subject: [PATCH 11/11] Revert "CU-869c87xpy: [TO REMOVE] Add debug output" This reverts commit b3eebf90ff1c10db3d830077ff2caff30f511f75. --- .github/workflows/medcat-v2-tutorials_main.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index b64fc247d..61f297dc5 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -37,11 +37,6 @@ jobs: - name: Install IPython kernel run: | python -m ipykernel install --name smoketests --user - - - name: DEBUG OUTPUT - run: | - python -c "import medcat;print('ver:', medcat.__version__);print('has:', dir(medcat))" - python -c "import medcat.utils;print('utils has:', dir(medcat.utils))" - name: Smoke test tutorial # NOTE: these need to be run separately so that order is guaranteed run: |