-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDockerfile.lithops.ocr.gpu
More file actions
96 lines (77 loc) · 2.64 KB
/
Dockerfile.lithops.ocr.gpu
File metadata and controls
96 lines (77 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Dockerfile for Comic Analysis OCR Lithops Runtime (GPU-ENABLED)
# Optimized for AWS Batch, Azure Container Instances, GCP Compute with GPU
# Includes Tesseract, EasyOCR, and PaddleOCR with GPU acceleration
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
LABEL maintainer="Comic Analysis Team"
LABEL description="Comic Analysis OCR Lithops Runtime - GPU-enabled with Tesseract, EasyOCR, PaddleOCR"
LABEL runtime_type="gpu"
# Install Python and system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3.10 \
python3-pip \
python3.10-dev \
git \
wget \
curl \
build-essential \
libgomp1 \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1-mesa-glx \
tesseract-ocr \
tesseract-ocr-eng \
libtesseract-dev \
&& rm -rf /var/lib/apt/lists/*
# Set Python 3.10 as default
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \
&& update-alternatives --install /usr/bin/pip pip /usr/bin/pip3 1
# Set working directory
WORKDIR /function
# Upgrade pip
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
# Install PyTorch with CUDA 11.8 support
RUN pip install --no-cache-dir \
torch==2.1.0 \
torchvision==0.16.0 \
--index-url https://download.pytorch.org/whl/cu118
# Install OCR packages with GPU support
RUN pip install --no-cache-dir \
pytesseract==0.3.10 \
easyocr==1.7.0 \
paddleocr==2.7.0.3 \
paddlepaddle-gpu==2.6.0.post118
# Install image processing libraries
RUN pip install --no-cache-dir \
Pillow==10.1.0 \
opencv-python-headless==4.8.1.78
# Install scientific computing packages
RUN pip install --no-cache-dir \
numpy==1.24.3 \
scipy==1.11.4
# Install Lithops
RUN pip install --no-cache-dir lithops==3.3.1
# Install HTTP client for VLM OCR methods
RUN pip install --no-cache-dir \
requests==2.31.0
# Install additional utilities
RUN pip install --no-cache-dir \
tqdm==4.66.1 \
pyyaml==6.0.1
# Copy OCR module
COPY src/version1/ocr /function/ocr
# Set Python path
ENV PYTHONPATH=/function:$PYTHONPATH
# Environment variables for model caching and GPU
ENV TRANSFORMERS_CACHE=/tmp/transformers_cache
ENV HF_HOME=/tmp/hf_home
ENV TORCH_HOME=/tmp/torch_home
ENV EASYOCR_MODULE_PATH=/tmp/easyocr
ENV CUDA_VISIBLE_DEVICES=0
# Create cache directories
RUN mkdir -p /tmp/transformers_cache /tmp/hf_home /tmp/torch_home /tmp/easyocr
# Pre-download EasyOCR English model (optional, uncomment if needed)
# RUN python -c "import easyocr; reader = easyocr.Reader(['en'], gpu=True)"
# Set entrypoint for Lithops
CMD ["python", "-c", "print('Comic Analysis OCR Lithops GPU Runtime Ready')"]