Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/medcat-v2-tutorials_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

- name: Update install targets in notebooks
run: |
python .ci/patch_notebook_installs.py .
python .ci/patch_notebook_installs.py . --expect-min-changes 4

- name: Install dependencies
run: |
Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:

- name: Update install targets in notebooks
run: |
python .ci/patch_notebook_installs.py .
python .ci/patch_notebook_installs.py . --expect-min-changes 4

- name: Install dependencies
run: |
Expand Down
60 changes: 40 additions & 20 deletions medcat-v2-tutorials/.ci/patch_notebook_installs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,60 +2,80 @@
import pathlib
import re
from functools import partial
import argparse


# rel_install_path = "../medcat-v2/"
# abs_install_path = str(pathlib.Path(rel_install_path).resolve())
rel_install_path = "../medcat-v2/"
abs_install_path = str(pathlib.Path(rel_install_path).resolve())

# Matches either:
# 1. `! pip install medcat[extras]`
# 1. `! pip install medcat[extras]~=version`
# 2. `! pip install medcat[extras] @ git+...`
shell_pattern = re.compile(
r'(!\s*pip\s+install\s+)(\\["\']?)medcat(\[.*?\])'
r'(\s*@\s*git\+[^"\'\s]+)?\2'
r'(!\s*pip\s+install\s+)' # group 1: the install command
r'(\\?"?)' # group 2: optional opening \"
r'medcat'
r'(\[.*?\])?' # group 3: optional extras
r'(?:'
r'\s*@\s*git\+[^"\'\s]+'
r'|'
r'\s*[~=!<>][^"\'\\s]*'
r')'
# only match \" (escaped quote), never a bare "
r'(\\")?' # group 4: optional closing \"
)
req_txt_pattern = re.compile(
r'^(medcat(\[.*?\])?)\s*@\s*git\+\S+', flags=re.MULTILINE
)


def repl_nb(m, file_path: pathlib.Path):
# extras = m[3]
old_url = m[4]
if old_url and "medcat/v" in old_url:
print(f"[WARN] {file_path} refers to alpha/tagged release: "
f"{old_url.strip()}")
# to_write = f'{m[1]}\\"{abs_install_path}{extras}\\"'
to_write = '! pip install \\"pip\\"'
extras = m[3] or ""
to_write = f'! pip install \\"{abs_install_path}{extras}\\"'
print(f"[PATCHED] {file_path}\n with: '{to_write}'")
return to_write


def do_patch(nb_path: pathlib.Path,
regex: re.Pattern = shell_pattern, repl_method=repl_nb):
regex: re.Pattern = shell_pattern,
repl_method=repl_nb) -> bool:
nb_text = nb_path.read_text(encoding="utf-8")

repl = partial(repl_method, file_path=nb_path)
new_text = regex.sub(repl, nb_text)

if nb_text != new_text:
nb_path.write_text(new_text, encoding="utf-8")
return True
return False


def main(path: str):
def main(path: str, expect_min_changes: int):
total_changes = 0
for nb_path in pathlib.Path(path).rglob("**/*.ipynb"):
do_patch(nb_path)
if do_patch(nb_path):
total_changes += 1
if expect_min_changes >= 0 and total_changes < expect_min_changes:
print(f"Expected a minimum of {expect_min_changes} changes,"
f"but only found {total_changes} changes. "
"This will force a non-zero exit status so GHA workflow "
"can fail")
sys.exit(1)


if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python patch_notebook_installs.py <path>")
sys.exit(1)
parser = argparse.ArgumentParser()
parser.add_argument("path", help="The path to start looking at",
type=str)
parser.add_argument("--expect-min-changes", "-c",
help="Expect at lest this number of chagnes",
type=int, default=-1)
args = parser.parse_args()

path = sys.argv[1]
path = args.path

if not pathlib.Path(path).exists():
print(f"Path {path} does not exist.")
sys.exit(1)

main(path)
main(path, args.expect_min_changes)