forked from docling-project/docling
-
Notifications
You must be signed in to change notification settings - Fork 2
72 lines (60 loc) · 2.66 KB
/
develop.yml
File metadata and controls
72 lines (60 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
name: CI (PR → develop)
on:
pull_request:
branches: [ develop ]
workflow_dispatch: { }
jobs:
pr-tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.12', '3.13' ]
env:
# 이제 기준을 preprocessor로 둘 거니까 PYTHONPATH도 거기로 맞춰주는 게 안전
PYTHONPATH: ${{ github.workspace }}/genon/preprocessor
# DOCLING_ARTIFACTS_PATH를 설정하지 않아 자동 다운로드 사용
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- uses: astral-sh/setup-uv@v6
- run: uv python pin ${{ matrix.python-version }}
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libreoffice poppler-utils tesseract-ocr tesseract-ocr-kor
# ⬇️ 여기부터는 도커파일처럼 preprocessor 안에서 실행
- name: Sync deps (preprocessor)
working-directory: genon/preprocessor
run: |
uv sync --dev
# - name: Install additional Python packages
# working-directory: genon/preprocessor
# run: |
# uv pip install --upgrade "unstructured>=0.8.0,<0.10.0" \
# pdf2image>=1.16.0 \
# pdfminer.six>=20220524 \
# pytesseract>=0.3.10 \
# weasyprint>=60.0 \
# opencv-python>=4.8.0
- name: Download NLTK data
working-directory: genon/preprocessor
run: |
uv run python -c "import nltk; nltk.download('averaged_perceptron_tagger_eng'); nltk.download('punkt_tab')"
- name: Download custom layout model from HuggingFace
working-directory: genon/preprocessor
run: |
uv run python -c "from docling.models.layout_model import LayoutModel; from docling.datamodel.layout_model_specs import MNCAI_CUSTOM_LAYOUT; print('Downloading custom layout model...'); model_path = LayoutModel.download_models(layout_model_config=MNCAI_CUSTOM_LAYOUT, progress=True); print(f'Model downloaded to: { model_path }')"
- name: Compile sources
working-directory: genon/preprocessor
run: uv run python -m compileall -q . -x 'olefile'
- name: Import smoke
working-directory: genon/preprocessor
run: uv run python -c "import importlib; importlib.import_module('facade.attachment_processor')"
- name: Pytest (unit+smoke+regression)
working-directory: genon/preprocessor
run: uv run pytest tests -m "unit or smoke or regression" -v -ra --durations=10
- name: Build package
working-directory: genon/preprocessor
run: uv build