diff --git a/.github/workflows/medcat-v2-tutorials_main.yml b/.github/workflows/medcat-v2-tutorials_main.yml index a2b92fd4c..61f297dc5 100644 --- a/.github/workflows/medcat-v2-tutorials_main.yml +++ b/.github/workflows/medcat-v2-tutorials_main.yml @@ -26,7 +26,7 @@ jobs: - name: Update install targets in notebooks run: | - python .ci/patch_notebook_installs.py . + python .ci/patch_notebook_installs.py . --expect-min-changes 4 - name: Install dependencies run: | @@ -69,7 +69,7 @@ jobs: - name: Update install targets in notebooks run: | - python .ci/patch_notebook_installs.py . + python .ci/patch_notebook_installs.py . --expect-min-changes 4 - name: Install dependencies run: | diff --git a/medcat-v2-tutorials/.ci/patch_notebook_installs.py b/medcat-v2-tutorials/.ci/patch_notebook_installs.py index 11f88aa53..4a201fd60 100644 --- a/medcat-v2-tutorials/.ci/patch_notebook_installs.py +++ b/medcat-v2-tutorials/.ci/patch_notebook_installs.py @@ -2,17 +2,27 @@ import pathlib import re from functools import partial +import argparse -# rel_install_path = "../medcat-v2/" -# abs_install_path = str(pathlib.Path(rel_install_path).resolve()) +rel_install_path = "../medcat-v2/" +abs_install_path = str(pathlib.Path(rel_install_path).resolve()) # Matches either: -# 1. `! pip install medcat[extras]` +# 1. `! pip install medcat[extras]~=version` # 2. `! pip install medcat[extras] @ git+...` shell_pattern = re.compile( - r'(!\s*pip\s+install\s+)(\\["\']?)medcat(\[.*?\])' - r'(\s*@\s*git\+[^"\'\s]+)?\2' + r'(!\s*pip\s+install\s+)' # group 1: the install command + r'(\\?"?)' # group 2: optional opening \" + r'medcat' + r'(\[.*?\])?' # group 3: optional extras + r'(?:' + r'\s*@\s*git\+[^"\'\s]+' + r'|' + r'\s*[~=!<>][^"\'\\s]*' + r')' + # only match \" (escaped quote), never a bare " + r'(\\")?' # group 4: optional closing \" ) req_txt_pattern = re.compile( r'^(medcat(\[.*?\])?)\s*@\s*git\+\S+', flags=re.MULTILINE @@ -20,19 +30,15 @@ def repl_nb(m, file_path: pathlib.Path): - # extras = m[3] - old_url = m[4] - if old_url and "medcat/v" in old_url: - print(f"[WARN] {file_path} refers to alpha/tagged release: " - f"{old_url.strip()}") - # to_write = f'{m[1]}\\"{abs_install_path}{extras}\\"' - to_write = '! pip install \\"pip\\"' + extras = m[3] or "" + to_write = f'! pip install \\"{abs_install_path}{extras}\\"' print(f"[PATCHED] {file_path}\n with: '{to_write}'") return to_write def do_patch(nb_path: pathlib.Path, - regex: re.Pattern = shell_pattern, repl_method=repl_nb): + regex: re.Pattern = shell_pattern, + repl_method=repl_nb) -> bool: nb_text = nb_path.read_text(encoding="utf-8") repl = partial(repl_method, file_path=nb_path) @@ -40,22 +46,36 @@ def do_patch(nb_path: pathlib.Path, if nb_text != new_text: nb_path.write_text(new_text, encoding="utf-8") + return True + return False -def main(path: str): +def main(path: str, expect_min_changes: int): + total_changes = 0 for nb_path in pathlib.Path(path).rglob("**/*.ipynb"): - do_patch(nb_path) + if do_patch(nb_path): + total_changes += 1 + if expect_min_changes >= 0 and total_changes < expect_min_changes: + print(f"Expected a minimum of {expect_min_changes} changes," + f"but only found {total_changes} changes. " + "This will force a non-zero exit status so GHA workflow " + "can fail") + sys.exit(1) if __name__ == "__main__": - if len(sys.argv) != 2: - print("Usage: python patch_notebook_installs.py ") - sys.exit(1) + parser = argparse.ArgumentParser() + parser.add_argument("path", help="The path to start looking at", + type=str) + parser.add_argument("--expect-min-changes", "-c", + help="Expect at lest this number of chagnes", + type=int, default=-1) + args = parser.parse_args() - path = sys.argv[1] + path = args.path if not pathlib.Path(path).exists(): print(f"Path {path} does not exist.") sys.exit(1) - main(path) + main(path, args.expect_min_changes)