diff --git a/.github/workflows/build-windows-executable-app.yaml b/.github/workflows/build-windows-executable-app.yaml index 72eac499..70641a93 100644 --- a/.github/workflows/build-windows-executable-app.yaml +++ b/.github/workflows/build-windows-executable-app.yaml @@ -59,12 +59,14 @@ jobs: repository: t0mdavid-m/OpenMS ref: FVdeploy path: 'OpenMS' - - - name: Install Qt + + - name: Install Qt (Windows) uses: jurplel/install-qt-action@v4 with: - version: '6.8.3' + version: '6.8.3' ## Note this version is build with win64_msvc2022_64 and should always match what we use arch: 'win64_msvc2022_64' + cache: 'false' + archives: 'qtsvg qtimageformats qtbase' # https://github.com/marketplace/actions/visual-studio-shell - name: Set up Visual Studio shell @@ -112,6 +114,12 @@ jobs: rm contrib_build-Windows.tar.gz ls + - name: Add contrib to PATH + shell: bash + run: | + # Add contrib library path for runtime DLL resolution + echo "${{ github.workspace }}/OpenMS/contrib/lib" >> $GITHUB_PATH + - name: Setup ccache cache uses: actions/cache@v3 with: @@ -145,9 +153,9 @@ jobs: shell: bash run: | mkdir $GITHUB_WORKSPACE/OpenMS/bld/ + bash OpenMS/tools/ci/capture-env.sh -v $GITHUB_WORKSPACE/OpenMS/bld/CMakeCache.txt ctest --output-on-failure -V -S $GITHUB_WORKSPACE/OpenMS/tools/ci/cibuild.cmake env: - #OS_PREFIX_PATH: "${{ env.Qt5_DIR }}/lib/cmake;${{ env.Qt5_DIR }}" OPENMS_CONTRIB_LIBS: "${{ github.workspace }}/OpenMS/contrib" CI_PROVIDER: "GitHub-Actions" CMAKE_GENERATOR: "Ninja" @@ -157,6 +165,7 @@ jobs: ENABLE_TOPP_TESTING: "ON" ENABLE_CLASS_TESTING: "ON" WITH_GUI: "OFF" + WITH_PARQUET: "OFF" ADDRESS_SANITIZER: "Off" BUILD_TYPE: "Release" OPENMP: "On" @@ -199,7 +208,7 @@ jobs: build-executable: runs-on: windows-2022 needs: [build-openms, build-vue-js-component] - + steps: - name: Checkout uses: actions/checkout@v3 @@ -259,12 +268,6 @@ jobs: cp $PYTHON_DIR/DLLs/tcl86t.dll $EMBED_DIR/ cp $PYTHON_DIR/DLLs/tk86t.dll $EMBED_DIR/ - - name: Install pip - run: | - curl -O https://bootstrap.pypa.io/get-pip.py - ./python-${{ env.PYTHON_VERSION }}/python get-pip.py --no-warn-script-location - rm get-pip.py - - name: Uncomment 'import site' in python311._pth file run: | sed -i 's/#import site/import site/' python-${{ env.PYTHON_VERSION }}/python311._pth @@ -284,6 +287,15 @@ jobs: - name: Create .bat file run: | echo '@echo off' > ${{ env.APP_NAME }}.bat + echo 'setlocal EnableDelayedExpansion' > ${{ env.APP_NAME }}.bat + echo '' >> ${{ env.APP_NAME }}.bat + echo 'REM Set OpenMS data path for TOPP tools' >> ${{ env.APP_NAME }}.bat + echo 'set OPENMS_DATA_PATH=%~dp0share\OpenMS' >> ${{ env.APP_NAME }}.bat + echo '' >> ${{ env.APP_NAME }}.bat + echo 'REM Add each subfolder in share\OpenMS\THIRDPARTY to PATH' >> ${{ env.APP_NAME }}.bat + echo 'for /D %%D in ("%OPENMS_DATA_PATH%\THIRDPARTY\*") do (' >> ${{ env.APP_NAME }}.bat + echo ' set "PATH=!PATH!;%%D"' >> ${{ env.APP_NAME }}.bat + echo ')' >> ${{ env.APP_NAME }}.bat echo '' >> ${{ env.APP_NAME }}.bat echo 'REM Create .streamlit directory in user''s home if it doesn''t exist' >> ${{ env.APP_NAME }}.bat echo 'if not exist "%USERPROFILE%\.streamlit" mkdir "%USERPROFILE%\.streamlit"' >> ${{ env.APP_NAME }}.bat @@ -375,7 +387,7 @@ jobs: - + diff --git a/.github/workflows/test-win-exe-w-embed-py.yaml b/.github/workflows/test-win-exe-w-embed-py.yaml index 3af5f523..57a588ba 100644 --- a/.github/workflows/test-win-exe-w-embed-py.yaml +++ b/.github/workflows/test-win-exe-w-embed-py.yaml @@ -17,6 +17,11 @@ jobs: - name: Checkout code uses: actions/checkout@v4 + - name: Set up Python (regular distribution) + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} # Use the same version as the embeddable version + - name: Download python embeddable version run: | mkdir python-${{ env.PYTHON_VERSION }} @@ -24,22 +29,19 @@ jobs: unzip python-${{ env.PYTHON_VERSION }}-embed-amd64.zip -d python-${{ env.PYTHON_VERSION }} rm python-${{ env.PYTHON_VERSION }}-embed-amd64.zip - - name: Install pip - run: | - curl -O https://bootstrap.pypa.io/get-pip.py - ./python-${{ env.PYTHON_VERSION }}/python get-pip.py --no-warn-script-location - rm get-pip.py - - name: Uncomment 'import site' in python311._pth file run: | sed -i 's/#import site/import site/' python-${{ env.PYTHON_VERSION }}/python311._pth - + - name: Print content of python311._pth file run: | cat python-${{ env.PYTHON_VERSION }}/python311._pth - name: Install Required Packages - run: .\python-${{ env.PYTHON_VERSION }}\python -m pip install -r requirements.txt --no-warn-script-location + # Use system Python (which has development headers) to compile packages, + # installing into the embeddable Python's site-packages directory. + # The embeddable Python lacks Python.h headers needed for native extensions. + run: python -m pip install -r requirements.txt --target python-${{ env.PYTHON_VERSION }}/Lib/site-packages --upgrade --no-warn-script-location - name: Create .bat file run: | diff --git a/.gitignore b/.gitignore index d65d045e..7899d8a4 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ run_app.bat python* gdpr_consent/node_modules/ *~ +.streamlit/secrets.toml diff --git a/.streamlit/config.toml b/.streamlit/config.toml index a68c7cb8..97cf35ad 100644 --- a/.streamlit/config.toml +++ b/.streamlit/config.toml @@ -5,8 +5,12 @@ gatherUsageStats = false developmentMode = false [server] +address = "0.0.0.0" maxUploadSize = 2000 #MB port = 8501 # should be same as configured in deployment repo +enableCORS = false +enableXsrfProtection = false + [theme] # The preset Streamlit theme that your custom theme inherits from. One of "light" or "dark". diff --git a/.streamlit/secrets.toml.example b/.streamlit/secrets.toml.example new file mode 100644 index 00000000..9cc1f4b7 --- /dev/null +++ b/.streamlit/secrets.toml.example @@ -0,0 +1,8 @@ +# Streamlit Secrets Configuration +# Copy this file to secrets.toml and fill in your values. +# IMPORTANT: Never commit secrets.toml to version control! + +[admin] +# Password required to save workspaces as demo workspaces (online mode only) +# Set a strong, unique password here +password = "your-secure-admin-password-here" diff --git a/Dockerfile b/Dockerfile index 1ef20918..e89f7b81 100644 --- a/Dockerfile +++ b/Dockerfile @@ -140,6 +140,14 @@ RUN rm -rf openms-build # Prepare and run streamlit app. FROM compile-openms AS run-app + +# Install Redis server for job queue and nginx for load balancing +RUN apt-get update && apt-get install -y --no-install-recommends redis-server nginx \ + && rm -rf /var/lib/apt/lists/* + +# Create Redis data directory +RUN mkdir -p /var/lib/redis && chown redis:redis /var/lib/redis + # Create workdir and copy over all streamlit related files/folders. # note: specifying folder with slash as suffix and repeating the folder name seems important to preserve directory structure @@ -157,15 +165,79 @@ COPY src/ /app/src COPY app.py /app/app.py COPY settings.json /app/settings.json COPY default-parameters.json /app/default-parameters.json +COPY presets.json /app/presets.json # add cron job to the crontab RUN echo "0 3 * * * /root/miniforge3/envs/streamlit-env/bin/python /app/clean-up-workspaces.py >> /app/clean-up-workspaces.log 2>&1" | crontab - -# create entrypoint script to start cron service and launch streamlit app -RUN echo "#!/bin/bash" > /app/entrypoint.sh && \ - echo "source /root/miniforge3/bin/activate streamlit-env" >> /app/entrypoint.sh && \ - echo "service cron start" >> /app/entrypoint.sh && \ - echo "streamlit run app.py" >> /app/entrypoint.sh +# Set default worker count (can be overridden via environment variable) +ENV RQ_WORKER_COUNT=1 +ENV REDIS_URL=redis://localhost:6379/0 + +# Number of Streamlit server instances for load balancing (default: 1 = no load balancer) +# Set to >1 to enable nginx load balancer with multiple Streamlit instances +ENV STREAMLIT_SERVER_COUNT=1 + +# create entrypoint script to start cron, Redis, RQ workers, and Streamlit +RUN echo -e '#!/bin/bash\n\ +set -e\n\ +source /root/miniforge3/bin/activate streamlit-env\n\ +\n\ +# Start cron for workspace cleanup\n\ +service cron start\n\ +\n\ +# Start Redis server in background\n\ +echo "Starting Redis server..."\n\ +redis-server --daemonize yes --dir /var/lib/redis --appendonly no\n\ +\n\ +# Wait for Redis to be ready\n\ +until redis-cli ping > /dev/null 2>&1; do\n\ + echo "Waiting for Redis..."\n\ + sleep 1\n\ +done\n\ +echo "Redis is ready"\n\ +\n\ +# Start RQ worker(s) in background\n\ +WORKER_COUNT=${RQ_WORKER_COUNT:-1}\n\ +echo "Starting $WORKER_COUNT RQ worker(s)..."\n\ +for i in $(seq 1 $WORKER_COUNT); do\n\ + rq worker openms-workflows --url $REDIS_URL --name worker-$i &\n\ +done\n\ +\n\ +# Load balancer setup\n\ +SERVER_COUNT=${STREAMLIT_SERVER_COUNT:-1}\n\ +\n\ +if [ "$SERVER_COUNT" -gt 1 ]; then\n\ + echo "Starting $SERVER_COUNT Streamlit instances with nginx load balancer..."\n\ +\n\ + # Generate nginx upstream block\n\ + UPSTREAM_SERVERS=""\n\ + BASE_PORT=8510\n\ + for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\ + PORT=$((BASE_PORT + i))\n\ + UPSTREAM_SERVERS="${UPSTREAM_SERVERS} server 127.0.0.1:${PORT};\\n"\n\ + done\n\ +\n\ + # Write nginx config\n\ + mkdir -p /etc/nginx\n\ + echo -e "worker_processes auto;\\npid /run/nginx.pid;\\n\\nevents {\\n worker_connections 1024;\\n}\\n\\nhttp {\\n client_max_body_size 0;\\n\\n map \\$cookie_stroute \\$route_key {\\n \\x22\\x22 \\$request_id;\\n default \\$cookie_stroute;\\n }\\n\\n upstream streamlit_backend {\\n hash \\$route_key consistent;\\n${UPSTREAM_SERVERS} }\\n\\n map \\$http_upgrade \\$connection_upgrade {\\n default upgrade;\\n \\x27\\x27 close;\\n }\\n\\n server {\\n listen 0.0.0.0:8501;\\n\\n location / {\\n proxy_pass http://streamlit_backend;\\n proxy_http_version 1.1;\\n proxy_set_header Upgrade \\$http_upgrade;\\n proxy_set_header Connection \\$connection_upgrade;\\n proxy_set_header Host \\$host;\\n proxy_set_header X-Real-IP \\$remote_addr;\\n proxy_set_header X-Forwarded-For \\$proxy_add_x_forwarded_for;\\n proxy_set_header X-Forwarded-Proto \\$scheme;\\n proxy_read_timeout 86400;\\n proxy_send_timeout 86400;\\n proxy_buffering off;\\n add_header Set-Cookie \\x22stroute=\\$route_key; Path=/; HttpOnly; SameSite=Lax\\x22 always;\\n }\\n }\\n}" > /etc/nginx/nginx.conf\n\ +\n\ + # Start Streamlit instances on internal ports\n\ + for i in $(seq 0 $((SERVER_COUNT - 1))); do\n\ + PORT=$((BASE_PORT + i))\n\ + echo "Starting Streamlit instance on port $PORT..."\n\ + streamlit run app.py --server.port $PORT --server.address 0.0.0.0 &\n\ + done\n\ +\n\ + sleep 2\n\ + echo "Starting nginx load balancer on port 8501..."\n\ + exec /usr/sbin/nginx -g "daemon off;"\n\ +else\n\ + # Single instance mode (default) - run Streamlit directly on port 8501\n\ + echo "Starting Streamlit app..."\n\ + exec streamlit run app.py --server.address 0.0.0.0\n\ +fi\n\ +' > /app/entrypoint.sh # make the script executable RUN chmod +x /app/entrypoint.sh diff --git a/docker-compose.yml b/docker-compose.yml index 20098ba8..e0a3e1cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,9 @@ services: - 8501:8501 volumes: - workspaces-streamlit-template:/workspaces-streamlit-template - command: streamlit run openms-streamlit-template/app.py + environment: + # Number of Streamlit server instances (default: 1 = no load balancer). + # Set to >1 to enable nginx load balancing across multiple Streamlit instances. + - STREAMLIT_SERVER_COUNT=1 volumes: workspaces-streamlit-template: diff --git a/docs/REDIS_QUEUE_IMPLEMENTATION_PLAN.md b/docs/REDIS_QUEUE_IMPLEMENTATION_PLAN.md new file mode 100644 index 00000000..4cefc037 --- /dev/null +++ b/docs/REDIS_QUEUE_IMPLEMENTATION_PLAN.md @@ -0,0 +1,1609 @@ +# Redis Queue Implementation Plan for Online Mode + +## Overview + +This document outlines the implementation plan for introducing a Redis-based job queueing system to the OpenMS Streamlit Template's **online mode only**. This system will replace the current `multiprocessing.Process` approach with a more robust, scalable queue architecture suitable for production deployments. + +**Important:** The existing multiprocessing system remains completely unchanged for offline/local deployments (including the Windows installer). Redis queue is purely additive and only activates in online Docker deployments. + +--- + +## Design Principles + +### Plug & Play Architecture + +The Redis queue system is designed with minimal changes to existing code: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ WorkflowManager │ +│ │ +│ start_workflow() │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ if online_mode AND redis_available: │ │ +│ │ → Submit to Redis Queue (new code) │ │ +│ │ else: │ │ +│ │ → multiprocessing.Process (existing code, unchanged)│ │ +│ └─────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +**Key Design Decisions:** +1. **Zero changes to local mode**: Windows installer and local development work exactly as before +2. **Graceful fallback**: If Redis is unavailable, automatically uses multiprocessing +3. **Feature flag**: Can be disabled via `queue_settings.enabled = false` +4. **Same execution logic**: The `execution()` method code is identical - only the process spawning differs + +### Offline Mode (Windows Installer) Compatibility + +The Windows installer built from GitHub Actions uses local mode with no Redis: + +| Mode | Queue System | Process Model | Use Case | +|------|--------------|---------------|----------| +| **Local** (`online_deployment: false`) | None | `multiprocessing.Process` | Windows installer, local dev | +| **Online** (`online_deployment: true`) | Redis + RQ | RQ Worker | Docker deployment | + +**No code changes required for offline mode.** The detection happens automatically: + +```python +# In WorkflowManager +def start_workflow(self): + if self._is_online_mode() and self._queue_manager.is_available: + self._start_workflow_queued() # Redis queue + else: + self._start_workflow_local() # Existing multiprocessing (unchanged) +``` + +--- + +## Current Architecture + +### How Workflows Execute Today + +``` +┌─────────────────────┐ ┌──────────────────────────┐ +│ Streamlit UI │ │ Detached Process │ +│ (Browser) │ │ (Same Container) │ +├─────────────────────┤ ├──────────────────────────┤ +│ 1. User clicks │─────→│ multiprocessing.Process │ +│ "Start Workflow" │ │ │ +│ 2. Monitor log file │ │ • Runs workflow_process()│ +│ 3. Poll for PID │ │ • Executes TOPP tools │ +│ removal │ │ • Logs to files │ +└─────────────────────┘ │ • Deletes PID on done │ + └──────────────────────────┘ +``` + +**Key Files:** +- `/src/workflow/WorkflowManager.py:25-38` - Process spawning +- `/src/workflow/StreamlitUI.py:989-1057` - Execution UI/monitoring +- `/src/workflow/CommandExecutor.py:28-61` - Command execution + +**Limitations of Current Approach:** +1. No job persistence across container restarts +2. No visibility into queue depth or worker health +3. Limited scalability (single container) +4. No job retry mechanism on failure +5. No priority queuing +6. Difficult to add job timeouts + +--- + +## Proposed Architecture + +### Single-Container Redis Queue System + +All components run within the same Docker container, ensuring identical environments for the web app and worker processes. + +``` +┌────────────────────────────────────────────────────────────┐ +│ Docker Container │ +│ │ +│ ┌─────────────────┐ ┌─────────────┐ ┌────────────┐ │ +│ │ Streamlit App │───→│ Redis Server│←───│ RQ Worker │ │ +│ │ (Main Process) │ │ (localhost) │ │ (Background)│ │ +│ └─────────────────┘ └─────────────┘ └────────────┘ │ +│ │ ↑ │ │ +│ │ Submit jobs │ Poll jobs │ │ +│ └──────────────────────┴──────────────────┘ │ +│ │ +│ All processes share: pyOpenMS, TOPP tools, Python env │ +└────────────────────────────────────────────────────────────┘ +``` + +### Technology Stack + +| Component | Technology | Rationale | +|-----------|------------|-----------| +| Message Broker | **Redis** (embedded) | Fast, simple, runs as background process | +| Task Queue | **RQ (Redis Queue)** | Lightweight, Python-native, simpler than Celery | +| Job Monitoring | **rq-dashboard** (optional) | Can run in same container if needed | + +**Why Single Container?** +- **Environment consistency**: Worker has identical pyOpenMS/TOPP installation +- **Simpler deployment**: One image, one container, no orchestration complexity +- **No networking issues**: All communication via localhost +- **Easier debugging**: All logs in one place +- **Lower resource overhead**: No container-to-container communication + +**Why RQ over Celery?** +- Simpler configuration (fewer moving parts) +- Lower memory footprint +- Native Python job serialization +- Perfect for single-container deployment +- Easier to debug and maintain + +--- + +## Implementation Plan + +### Phase 1: Infrastructure Setup (Single Container) + +#### 1.1 Update Dockerfile + +**File:** `/Dockerfile` + +Add Redis server installation and modify the entrypoint to start all services. + +```dockerfile +# === Add to the run-app stage (around line 130) === + +# Install Redis server +RUN apt-get update && apt-get install -y --no-install-recommends \ + redis-server \ + && rm -rf /var/lib/apt/lists/* + +# Install Python Redis client and RQ +RUN pip install rq redis + +# Create Redis data directory +RUN mkdir -p /var/lib/redis && chown redis:redis /var/lib/redis + +# === Replace the entrypoint script section (around line 160-170) === + +# Create entrypoint script that starts all services +RUN echo '#!/bin/bash\n\ +set -e\n\ +\n\ +# Start cron for workspace cleanup\n\ +service cron start\n\ +\n\ +# Start Redis server in background\n\ +echo "Starting Redis server..."\n\ +redis-server --daemonize yes --dir /var/lib/redis --appendonly yes\n\ +\n\ +# Wait for Redis to be ready\n\ +until redis-cli ping > /dev/null 2>&1; do\n\ + echo "Waiting for Redis..."\n\ + sleep 1\n\ +done\n\ +echo "Redis is ready"\n\ +\n\ +# Start RQ worker(s) in background\n\ +echo "Starting RQ worker..."\n\ +cd /openms-streamlit-template\n\ +rq worker openms-workflows --url redis://localhost:6379/0 &\n\ +\n\ +# Optionally start RQ dashboard (uncomment if needed)\n\ +# rq-dashboard --redis-url redis://localhost:6379/0 --port 9181 &\n\ +\n\ +# Start Streamlit (foreground - main process)\n\ +echo "Starting Streamlit app..."\n\ +exec streamlit run app.py\n\ +' > /entrypoint.sh && chmod +x /entrypoint.sh + +ENTRYPOINT ["/entrypoint.sh"] +``` + +#### 1.2 Update Requirements + +**File:** `/requirements.txt` (additions) + +``` +rq>=1.16.0 +redis>=5.0.0 +rq-dashboard>=0.6.0 # Optional: for web-based queue monitoring +``` + +#### 1.3 Docker Compose (Minimal Changes) + +**File:** `/docker-compose.yml` + +The docker-compose.yml requires minimal changes - just add environment variable: + +```yaml +services: + openms-streamlit-template: + build: + context: . + dockerfile: Dockerfile + args: + GITHUB_TOKEN: $GITHUB_TOKEN + image: openms_streamlit_template + container_name: openms-streamlit-template + restart: always + ports: + - 8501:8501 + # - 9181:9181 # Uncomment to expose RQ dashboard + volumes: + - workspaces-streamlit-template:/workspaces-streamlit-template + environment: + - REDIS_URL=redis://localhost:6379/0 + # command is handled by entrypoint.sh + +volumes: + workspaces-streamlit-template: +``` + +#### 1.4 Alternative: Supervisor for Process Management (Optional) + +For more robust process management, use `supervisord`: + +**File:** `/supervisord.conf` + +```ini +[supervisord] +nodaemon=true +user=root + +[program:redis] +command=redis-server --dir /var/lib/redis --appendonly yes +autostart=true +autorestart=true +priority=10 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:rq-worker] +command=rq worker openms-workflows --url redis://localhost:6379/0 +directory=/openms-streamlit-template +autostart=true +autorestart=true +priority=20 +startsecs=5 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 + +[program:streamlit] +command=streamlit run app.py +directory=/openms-streamlit-template +autostart=true +autorestart=true +priority=30 +startsecs=10 +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +``` + +Then update Dockerfile: +```dockerfile +RUN apt-get update && apt-get install -y supervisor +COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf +CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] +``` + +--- + +### Phase 2: Core Queue Implementation + +#### 2.1 Create Queue Manager Module + +**New File:** `/src/workflow/QueueManager.py` + +```python +""" +Redis Queue Manager for Online Mode Workflow Execution + +This module provides job queueing functionality for online deployments, +replacing the multiprocessing approach with Redis-backed job queues. +""" + +import os +from typing import Optional, Callable, Any +from dataclasses import dataclass +from enum import Enum +from redis import Redis +from rq import Queue, Worker +from rq.job import Job +import json +from pathlib import Path +import streamlit as st + + +class JobStatus(Enum): + """Job status enumeration matching RQ states""" + QUEUED = "queued" + STARTED = "started" + FINISHED = "finished" + FAILED = "failed" + DEFERRED = "deferred" + CANCELED = "canceled" + + +@dataclass +class JobInfo: + """Container for job information""" + job_id: str + status: JobStatus + progress: float # 0.0 to 1.0 + current_step: str + result: Optional[Any] = None + error: Optional[str] = None + enqueued_at: Optional[str] = None + started_at: Optional[str] = None + ended_at: Optional[str] = None + + +class QueueManager: + """ + Manages Redis Queue operations for workflow execution. + + Only active in online mode. Falls back to direct execution in local mode. + Redis runs on localhost within the same container. + """ + + QUEUE_NAME = "openms-workflows" + # Redis runs locally in the same container + REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + + def __init__(self): + self._redis: Optional[Redis] = None + self._queue: Optional[Queue] = None + self._is_online = self._check_online_mode() + + if self._is_online: + self._init_redis() + + def _check_online_mode(self) -> bool: + """Check if running in online mode""" + if "settings" in st.session_state: + return st.session_state.settings.get("online_deployment", False) + + # Fallback: check settings file + try: + with open("settings.json", "r") as f: + settings = json.load(f) + return settings.get("online_deployment", False) + except Exception: + return False + + def _init_redis(self) -> None: + """Initialize Redis connection and queue""" + try: + self._redis = Redis.from_url(self.REDIS_URL) + self._redis.ping() # Test connection + self._queue = Queue(self.QUEUE_NAME, connection=self._redis) + except Exception as e: + st.error(f"Failed to connect to Redis: {e}") + self._redis = None + self._queue = None + + @property + def is_available(self) -> bool: + """Check if queue system is available""" + return self._is_online and self._queue is not None + + def submit_job( + self, + func: Callable, + args: tuple = (), + kwargs: dict = None, + job_id: Optional[str] = None, + timeout: int = 3600, # 1 hour default + result_ttl: int = 86400, # 24 hours + description: str = "" + ) -> Optional[str]: + """ + Submit a job to the queue. + + Args: + func: The function to execute + args: Positional arguments for the function + kwargs: Keyword arguments for the function + job_id: Optional custom job ID (defaults to UUID) + timeout: Job timeout in seconds + result_ttl: How long to keep results + description: Human-readable job description + + Returns: + Job ID if successful, None otherwise + """ + if not self.is_available: + return None + + kwargs = kwargs or {} + + try: + job = self._queue.enqueue( + func, + args=args, + kwargs=kwargs, + job_id=job_id, + job_timeout=timeout, + result_ttl=result_ttl, + description=description, + meta={"description": description} + ) + return job.id + except Exception as e: + st.error(f"Failed to submit job: {e}") + return None + + def get_job_info(self, job_id: str) -> Optional[JobInfo]: + """ + Get information about a job. + + Args: + job_id: The job ID to query + + Returns: + JobInfo object or None if not found + """ + if not self.is_available: + return None + + try: + job = Job.fetch(job_id, connection=self._redis) + + # Map RQ status to our enum + status_map = { + "queued": JobStatus.QUEUED, + "started": JobStatus.STARTED, + "finished": JobStatus.FINISHED, + "failed": JobStatus.FAILED, + "deferred": JobStatus.DEFERRED, + "canceled": JobStatus.CANCELED, + } + + status = status_map.get(job.get_status(), JobStatus.QUEUED) + + # Get progress from job meta + meta = job.meta or {} + progress = meta.get("progress", 0.0) + current_step = meta.get("current_step", "") + + return JobInfo( + job_id=job.id, + status=status, + progress=progress, + current_step=current_step, + result=job.result if status == JobStatus.FINISHED else None, + error=str(job.exc_info) if job.exc_info else None, + enqueued_at=str(job.enqueued_at) if job.enqueued_at else None, + started_at=str(job.started_at) if job.started_at else None, + ended_at=str(job.ended_at) if job.ended_at else None, + ) + except Exception: + return None + + def cancel_job(self, job_id: str) -> bool: + """ + Cancel a queued or running job. + + Args: + job_id: The job ID to cancel + + Returns: + True if successfully canceled + """ + if not self.is_available: + return False + + try: + job = Job.fetch(job_id, connection=self._redis) + job.cancel() + return True + except Exception: + return False + + def get_queue_stats(self) -> dict: + """ + Get queue statistics. + + Returns: + Dictionary with queue stats + """ + if not self.is_available: + return {} + + try: + return { + "queued": len(self._queue), + "started": len(self._queue.started_job_registry), + "finished": len(self._queue.finished_job_registry), + "failed": len(self._queue.failed_job_registry), + "workers": Worker.count(queue=self._queue), + } + except Exception: + return {} + + def update_job_progress( + self, + job: Job, + progress: float, + current_step: str = "" + ) -> None: + """ + Update job progress (call from within worker). + + Args: + job: The current RQ job object + progress: Progress value 0.0 to 1.0 + current_step: Description of current step + """ + job.meta["progress"] = min(max(progress, 0.0), 1.0) + job.meta["current_step"] = current_step + job.save_meta() + + def store_job_id(self, workflow_dir: Path, job_id: str) -> None: + """Store job ID in workflow directory for recovery""" + job_file = workflow_dir / ".job_id" + job_file.write_text(job_id) + + def load_job_id(self, workflow_dir: Path) -> Optional[str]: + """Load job ID from workflow directory""" + job_file = workflow_dir / ".job_id" + if job_file.exists(): + return job_file.read_text().strip() + return None + + def clear_job_id(self, workflow_dir: Path) -> None: + """Clear stored job ID""" + job_file = workflow_dir / ".job_id" + if job_file.exists(): + job_file.unlink() +``` + +#### 2.2 Create Worker Tasks Module + +**New File:** `/src/workflow/tasks.py` + +```python +""" +Worker tasks for Redis Queue execution. + +These functions are executed by RQ workers and should not import Streamlit. +""" + +import sys +import json +from pathlib import Path +from typing import Any +from rq import get_current_job + +# Add src to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from src.workflow.CommandExecutor import CommandExecutor +from src.workflow.FileManager import FileManager +from src.workflow.ParameterManager import ParameterManager +from src.workflow.Logger import Logger + + +def execute_workflow( + workflow_dir: str, + workflow_class: str, + workflow_module: str, +) -> dict: + """ + Execute a workflow in the worker process. + + Args: + workflow_dir: Path to the workflow directory + workflow_class: Name of the Workflow class + workflow_module: Module path containing the Workflow class + + Returns: + Dictionary with execution results + """ + job = get_current_job() + workflow_path = Path(workflow_dir) + + try: + # Update progress + _update_progress(job, 0.0, "Initializing workflow...") + + # Load the workflow class dynamically + import importlib + module = importlib.import_module(workflow_module) + WorkflowClass = getattr(module, workflow_class) + + # Initialize workflow components (non-Streamlit mode) + workflow_path = Path(workflow_dir) + + # Create a minimal workflow instance for execution + # The workflow will read params from the saved params.json + params_file = workflow_path / "params.json" + if params_file.exists(): + with open(params_file, "r") as f: + params = json.load(f) + else: + params = {} + + # Initialize executor and logger + logger = Logger(workflow_path) + file_manager = FileManager(workflow_path, params) + executor = CommandExecutor(workflow_path, logger) + + # Create workflow instance with components + workflow = WorkflowClass.__new__(WorkflowClass) + workflow.workflow_dir = workflow_path + workflow.params = params + workflow.logger = logger + workflow.file_manager = file_manager + workflow.executor = executor + + # Inject progress callback + workflow._job = job + workflow._update_progress = lambda p, s: _update_progress(job, p, s) + + _update_progress(job, 0.1, "Starting workflow execution...") + + # Execute the workflow + workflow.execution() + + _update_progress(job, 1.0, "Workflow completed") + + return { + "success": True, + "workflow_dir": str(workflow_path), + "message": "Workflow completed successfully" + } + + except Exception as e: + import traceback + error_msg = f"Workflow failed: {str(e)}\n{traceback.format_exc()}" + + # Log error to workflow logs + try: + log_file = workflow_path / "logs" / "all.log" + log_file.parent.mkdir(parents=True, exist_ok=True) + with open(log_file, "a") as f: + f.write(f"\n\nERROR: {error_msg}\n") + except Exception: + pass + + return { + "success": False, + "workflow_dir": str(workflow_path), + "error": error_msg + } + + +def _update_progress(job, progress: float, step: str) -> None: + """Update job progress metadata""" + if job: + job.meta["progress"] = progress + job.meta["current_step"] = step + job.save_meta() +``` + +--- + +### Phase 3: Integration with Existing Code + +#### 3.1 Modify WorkflowManager + +**File:** `/src/workflow/WorkflowManager.py` + +Add queue support while maintaining backward compatibility: + +```python +""" +Modified WorkflowManager with Redis Queue support for online mode. +""" + +import multiprocessing +from pathlib import Path +from typing import Optional +import json +import streamlit as st + +from src.workflow.StreamlitUI import StreamlitUI +from src.workflow.CommandExecutor import CommandExecutor +from src.workflow.FileManager import FileManager +from src.workflow.ParameterManager import ParameterManager +from src.workflow.Logger import Logger + + +class WorkflowManager(StreamlitUI): + """ + Base class for workflow management with dual execution modes: + - Online mode: Uses Redis Queue for job execution + - Local mode: Uses multiprocessing (existing behavior) + """ + + def __init__( + self, + name: str, + st_session_state: dict + ) -> None: + self.name = name + self.params = st_session_state + + # Initialize paths + self.workflow_dir = Path( + st.session_state.workspace, + self.name.replace(" ", "-").lower() + ) + self.workflow_dir.mkdir(parents=True, exist_ok=True) + + # Initialize components + self.logger = Logger(self.workflow_dir) + self.file_manager = FileManager(self.workflow_dir, self.params) + self.executor = CommandExecutor(self.workflow_dir, self.logger) + self.parameter_manager = ParameterManager(self.workflow_dir, self.params) + + # Initialize StreamlitUI + super().__init__( + self.workflow_dir, + self.logger, + self.executor, + self.parameter_manager, + self.file_manager + ) + + # Initialize queue manager for online mode + self._queue_manager: Optional['QueueManager'] = None + if self._is_online_mode(): + self._init_queue_manager() + + def _is_online_mode(self) -> bool: + """Check if running in online deployment mode""" + return st.session_state.get("settings", {}).get("online_deployment", False) + + def _init_queue_manager(self) -> None: + """Initialize queue manager for online mode""" + try: + from src.workflow.QueueManager import QueueManager + self._queue_manager = QueueManager() + except ImportError: + pass # Queue not available, will use fallback + + def start_workflow(self) -> None: + """ + Starts workflow execution. + + Online mode: Submits to Redis queue + Local mode: Spawns multiprocessing.Process (existing behavior) + """ + # Save current parameters before execution + self.parameter_manager.save_parameters() + + if self._queue_manager and self._queue_manager.is_available: + self._start_workflow_queued() + else: + self._start_workflow_local() + + def _start_workflow_queued(self) -> None: + """Submit workflow to Redis queue (online mode)""" + from src.workflow.tasks import execute_workflow + + # Generate job ID based on workspace + job_id = f"workflow-{self.workflow_dir.name}-{Path(st.session_state.workspace).name}" + + # Submit job + submitted_id = self._queue_manager.submit_job( + func=execute_workflow, + kwargs={ + "workflow_dir": str(self.workflow_dir), + "workflow_class": self.__class__.__name__, + "workflow_module": self.__class__.__module__, + }, + job_id=job_id, + timeout=7200, # 2 hour timeout + description=f"Workflow: {self.name}" + ) + + if submitted_id: + # Store job ID for status checking + self._queue_manager.store_job_id(self.workflow_dir, submitted_id) + st.success(f"Workflow submitted to queue (Job ID: {submitted_id})") + else: + st.error("Failed to submit workflow to queue") + + def _start_workflow_local(self) -> None: + """Start workflow as local process (existing behavior)""" + workflow_process = multiprocessing.Process(target=self.workflow_process) + workflow_process.start() + + # Create PID directory and file + self.executor.pid_dir.mkdir(parents=True, exist_ok=True) + Path(self.executor.pid_dir, str(workflow_process.pid)).touch() + + def workflow_process(self) -> None: + """ + Main workflow execution method. + Override in subclass to define workflow logic. + """ + self.logger.log("Starting workflow...") + self.execution() + self.logger.log("WORKFLOW FINISHED") + + def get_workflow_status(self) -> dict: + """ + Get current workflow execution status. + + Returns: + Dictionary with status information + """ + if self._queue_manager and self._queue_manager.is_available: + job_id = self._queue_manager.load_job_id(self.workflow_dir) + if job_id: + job_info = self._queue_manager.get_job_info(job_id) + if job_info: + return { + "running": job_info.status.value in ["queued", "started"], + "status": job_info.status.value, + "progress": job_info.progress, + "current_step": job_info.current_step, + "job_id": job_id, + } + + # Fallback: check PID files (local mode) + pid_dir = self.executor.pid_dir + if pid_dir.exists() and list(pid_dir.iterdir()): + return { + "running": True, + "status": "running", + "progress": None, + "current_step": None, + "job_id": None, + } + + return { + "running": False, + "status": "idle", + "progress": None, + "current_step": None, + "job_id": None, + } + + def stop_workflow(self) -> bool: + """ + Stop a running workflow. + + Returns: + True if successfully stopped + """ + if self._queue_manager and self._queue_manager.is_available: + job_id = self._queue_manager.load_job_id(self.workflow_dir) + if job_id: + success = self._queue_manager.cancel_job(job_id) + if success: + self._queue_manager.clear_job_id(self.workflow_dir) + return success + + # Fallback: kill local process + return self._stop_local_workflow() + + def _stop_local_workflow(self) -> bool: + """Stop locally running workflow process""" + import os + import signal + + pid_dir = self.executor.pid_dir + if not pid_dir.exists(): + return False + + for pid_file in pid_dir.iterdir(): + try: + pid = int(pid_file.name) + os.kill(pid, signal.SIGTERM) + pid_file.unlink() + except (ValueError, ProcessLookupError, PermissionError): + pid_file.unlink() # Clean up stale PID file + + return True + + # Abstract methods to override + def upload(self) -> None: + """Override to define file upload UI""" + pass + + def configure(self) -> None: + """Override to define parameter configuration UI""" + pass + + def execution(self) -> None: + """Override to define workflow execution logic""" + pass + + def results(self) -> None: + """Override to define results display""" + pass +``` + +#### 3.2 Update StreamlitUI Execution Section + +**File:** `/src/workflow/StreamlitUI.py` + +Modify the `show_execution_section()` method to show queue status: + +```python +def show_execution_section(self) -> None: + """ + Display workflow execution section with queue status for online mode. + """ + st.header("Workflow Execution") + + # Get workflow status + status = self.get_workflow_status() if hasattr(self, 'get_workflow_status') else {} + is_running = status.get("running", False) + + # Show queue status for online mode + if status.get("job_id"): + self._show_queue_status(status) + + # Execution controls + col1, col2 = st.columns(2) + + with col1: + if is_running: + if st.button("Stop Workflow", type="secondary", use_container_width=True): + if hasattr(self, 'stop_workflow'): + self.stop_workflow() + st.rerun() + else: + if st.button("Start Workflow", type="primary", use_container_width=True): + if hasattr(self, 'start_workflow'): + self.start_workflow() + st.rerun() + + with col2: + log_level = st.selectbox( + "Log Level", + ["minimal", "commands and run times", "all"], + key="log-level-select" + ) + + # Show logs + self._show_logs(log_level, is_running) + + +def _show_queue_status(self, status: dict) -> None: + """Display queue job status""" + job_status = status.get("status", "unknown") + progress = status.get("progress") + current_step = status.get("current_step", "") + + # Status indicator + status_colors = { + "queued": "🟡", + "started": "🔵", + "finished": "🟢", + "failed": "🔴", + } + + status_icon = status_colors.get(job_status, "⚪") + st.markdown(f"**Job Status:** {status_icon} {job_status.capitalize()}") + + # Progress bar + if progress is not None and job_status == "started": + st.progress(progress, text=current_step or "Processing...") + + # Job ID + with st.expander("Job Details"): + st.code(f"Job ID: {status.get('job_id', 'N/A')}") +``` + +--- + +### Phase 4: Configuration & Environment + +#### 4.1 Update Settings Schema + +**File:** `/settings.json` (additions) + +```json +{ + "online_deployment": false, + "queue_settings": { + "enabled": true, + "redis_url": "redis://localhost:6379/0", + "default_timeout": 7200, + "max_retries": 3, + "result_ttl": 86400 + } +} +``` + +#### 4.2 Environment Variables + +These are set automatically in the container (localhost since same container): + +``` +REDIS_URL=redis://localhost:6379/0 +RQ_QUEUE_NAME=openms-workflows +RQ_WORKER_TIMEOUT=7200 +``` + +--- + +### Phase 5: Monitoring & Operations + +#### 5.1 Queue Health Check Endpoint + +**New File:** `/src/workflow/health.py` + +```python +"""Health check utilities for queue monitoring""" + +import os +from redis import Redis + + +def check_redis_health() -> dict: + """Check Redis connection health""" + redis_url = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + + try: + redis = Redis.from_url(redis_url) + redis.ping() + info = redis.info() + + return { + "status": "healthy", + "connected_clients": info.get("connected_clients", 0), + "used_memory": info.get("used_memory_human", "unknown"), + "uptime_days": info.get("uptime_in_days", 0), + } + except Exception as e: + return { + "status": "unhealthy", + "error": str(e), + } + + +def check_worker_health() -> dict: + """Check RQ worker health""" + from rq import Worker, Queue + + redis_url = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + + try: + redis = Redis.from_url(redis_url) + queue = Queue("openms-workflows", connection=redis) + workers = Worker.all(connection=redis) + + return { + "status": "healthy", + "worker_count": len(workers), + "queue_length": len(queue), + "workers": [ + { + "name": w.name, + "state": w.get_state(), + "current_job": w.get_current_job_id(), + } + for w in workers + ] + } + except Exception as e: + return { + "status": "unhealthy", + "error": str(e), + } +``` + +#### 5.2 Admin Dashboard Page (Optional) + +**New File:** `/content/admin_queue.py` + +```python +"""Queue administration page for online deployments""" + +import streamlit as st +from src.common.common import page_setup + +page_setup() + +# Only show in online mode +if not st.session_state.settings.get("online_deployment", False): + st.warning("Queue administration is only available in online mode.") + st.stop() + +st.title("Queue Administration") + +from src.workflow.health import check_redis_health, check_worker_health + +# Redis Health +st.subheader("Redis Status") +redis_health = check_redis_health() +if redis_health["status"] == "healthy": + st.success("Redis: Connected") + col1, col2, col3 = st.columns(3) + col1.metric("Clients", redis_health.get("connected_clients", 0)) + col2.metric("Memory", redis_health.get("used_memory", "N/A")) + col3.metric("Uptime (days)", redis_health.get("uptime_days", 0)) +else: + st.error(f"Redis: {redis_health.get('error', 'Disconnected')}") + +# Worker Health +st.subheader("Worker Status") +worker_health = check_worker_health() +if worker_health["status"] == "healthy": + st.success(f"Workers: {worker_health.get('worker_count', 0)} active") + st.metric("Queue Length", worker_health.get("queue_length", 0)) + + if worker_health.get("workers"): + st.write("**Active Workers:**") + for worker in worker_health["workers"]: + state_emoji = "🟢" if worker["state"] == "busy" else "🟡" + st.write(f"{state_emoji} {worker['name']} - {worker['state']}") +else: + st.error(f"Workers: {worker_health.get('error', 'No workers')}") + +# Link to RQ Dashboard +st.subheader("Detailed Monitoring") +st.markdown("[Open RQ Dashboard](http://localhost:9181)") +``` + +--- + +## File Summary + +### New Files to Create + +| File | Purpose | +|------|---------| +| `/src/workflow/QueueManager.py` | Redis queue interaction layer | +| `/src/workflow/tasks.py` | Worker task definitions | +| `/src/workflow/health.py` | Health check utilities | +| `/content/admin_queue.py` | Admin dashboard page (optional) | +| `/supervisord.conf` | Process manager config (optional) | + +### Files to Modify + +| File | Changes | +|------|---------| +| `/Dockerfile` | Install Redis server, RQ, update entrypoint | +| `/docker-compose.yml` | Minor: add REDIS_URL env var | +| `/requirements.txt` | Add `rq`, `redis` packages | +| `/src/workflow/WorkflowManager.py` | Add queue submission logic | +| `/src/workflow/StreamlitUI.py` | Add queue status display | +| `/settings.json` | Add queue configuration section | + +--- + +## Configuring Worker Count + +### Why Multiple Workers? + +Each RQ worker can process **one job at a time**. With a single worker, users must wait for the previous workflow to complete before theirs can start. Multiple workers allow parallel execution. + +| Workers | Concurrent Jobs | Use Case | +|---------|-----------------|----------| +| 1 | 1 | Development, low-traffic deployments | +| 2-3 | 2-3 | Small team, moderate usage | +| 4-8 | 4-8 | Production, high traffic | + +### Configuration Methods + +#### Method 1: Environment Variable (Recommended) + +Set `RQ_WORKER_COUNT` in docker-compose.yml or the entrypoint: + +```yaml +# docker-compose.yml +environment: + - REDIS_URL=redis://localhost:6379/0 + - RQ_WORKER_COUNT=3 # Number of workers to start +``` + +Update entrypoint.sh to read this variable: + +```bash +#!/bin/bash +# ... Redis startup ... + +# Start RQ workers based on environment variable +WORKER_COUNT=${RQ_WORKER_COUNT:-1} +echo "Starting $WORKER_COUNT RQ worker(s)..." + +for i in $(seq 1 $WORKER_COUNT); do + rq worker openms-workflows --url redis://localhost:6379/0 --name worker-$i & +done + +# Start Streamlit +exec streamlit run app.py +``` + +#### Method 2: Supervisord Configuration + +For more robust process management with automatic restart: + +```ini +# supervisord.conf +[program:rq-worker] +command=rq worker openms-workflows --url redis://localhost:6379/0 +directory=/openms-streamlit-template +numprocs=%(ENV_RQ_WORKER_COUNT)s # Read from environment +process_name=worker-%(process_num)02d +autostart=true +autorestart=true +startsecs=5 +``` + +#### Method 3: Settings File + +Add to settings.json for runtime configuration: + +```json +{ + "queue_settings": { + "worker_count": 2 + } +} +``` + +### Resource Considerations + +Each worker consumes memory for: +- Python interpreter (~100-200MB base) +- pyOpenMS/TOPP tools during execution (~500MB-2GB depending on workflow) +- Input/output file processing + +**Recommended formula:** +``` +max_workers = (available_memory - 2GB) / 1.5GB +``` + +Example: 8GB container → max 4 workers + +--- + +## User Experience: Queue Status Display + +### What Users See When Queued + +When a user starts a workflow and it enters the queue, they need clear feedback: + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Workflow Execution │ +├─────────────────────────────────────────────────────────────┤ +│ │ +│ 🟡 Status: Queued │ +│ │ +│ Your workflow is #3 in the queue │ +│ Estimated wait: ~5-10 minutes │ +│ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ Queue Position ███░░░░░░░░░░░░░░░░░ 3 of 5 │ │ +│ └─────────────────────────────────────────────────────┘ │ +│ │ +│ [Cancel Workflow] │ +│ │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Status States and UI Feedback + +| Status | Icon | Message | UI Elements | +|--------|------|---------|-------------| +| **Queued** | 🟡 | "Your workflow is #N in queue" | Position indicator, cancel button | +| **Starting** | 🔵 | "Workflow is starting..." | Spinner | +| **Running** | 🔵 | "Workflow in progress" | Progress bar, log viewer, stop button | +| **Completed** | 🟢 | "Workflow completed successfully" | View results button | +| **Failed** | 🔴 | "Workflow failed" | Error details, retry button | +| **Cancelled** | ⚪ | "Workflow was cancelled" | Restart button | + +### Implementation: Queue Status UI + +**File:** `/src/workflow/StreamlitUI.py` (additions) + +```python +def _show_queue_status(self, status: dict) -> None: + """Display detailed queue status to user""" + job_status = status.get("status", "unknown") + + # Status icons and colors + status_display = { + "queued": ("🟡", "Queued", "warning"), + "started": ("🔵", "Running", "info"), + "finished": ("🟢", "Completed", "success"), + "failed": ("🔴", "Failed", "error"), + "canceled": ("⚪", "Cancelled", "secondary"), + } + + icon, label, color = status_display.get(job_status, ("⚪", "Unknown", "secondary")) + + # Main status display + st.markdown(f"### {icon} Status: {label}") + + # Queue-specific information + if job_status == "queued": + queue_position = status.get("queue_position", "?") + queue_length = status.get("queue_length", "?") + + st.info(f"Your workflow is **#{queue_position}** in the queue ({queue_length} total)") + + # Visual queue indicator + if isinstance(queue_position, int) and isinstance(queue_length, int): + progress = 1 - (queue_position / max(queue_length, 1)) + st.progress(progress, text=f"Position {queue_position} of {queue_length}") + + # Estimate wait time (rough: 5 min per job ahead) + if isinstance(queue_position, int): + wait_min = (queue_position - 1) * 5 + if wait_min > 0: + st.caption(f"Estimated wait: ~{wait_min}-{wait_min + 10} minutes") + + # Running status with progress + elif job_status == "started": + progress = status.get("progress", 0) + current_step = status.get("current_step", "Processing...") + + st.progress(progress, text=current_step) + + # Expandable job details + with st.expander("Job Details", expanded=False): + st.code(f"""Job ID: {status.get('job_id', 'N/A')} +Submitted: {status.get('enqueued_at', 'N/A')} +Started: {status.get('started_at', 'N/A')} +Workers Active: {status.get('active_workers', 'N/A')}""") +``` + +--- + +## Sidebar Metrics (Online Mode) + +### Metrics to Display + +In online mode, enhance the existing CPU/RAM sidebar with queue metrics: + +``` +┌─────────────────────────┐ +│ System Status │ +├─────────────────────────┤ +│ CPU ████░░░░ 45% │ +│ RAM ██████░░ 72% │ +├─────────────────────────┤ +│ Queue Status │ +├─────────────────────────┤ +│ Workers 2/3 busy │ +│ Queued 5 jobs │ +│ Running 2 jobs │ +└─────────────────────────┘ +``` + +### Implementation: Sidebar Queue Metrics + +**File:** `/src/common/common.py` (additions to `render_sidebar()`) + +```python +def render_queue_metrics() -> None: + """Display queue metrics in sidebar (online mode only)""" + if not st.session_state.settings.get("online_deployment", False): + return + + try: + from src.workflow.QueueManager import QueueManager + qm = QueueManager() + + if not qm.is_available: + return + + stats = qm.get_queue_stats() + if not stats: + return + + st.sidebar.markdown("---") + st.sidebar.markdown("**Queue Status**") + + # Worker status + total_workers = stats.get("workers", 0) + busy_workers = stats.get("started", 0) + + col1, col2 = st.sidebar.columns(2) + col1.metric("Workers", f"{busy_workers}/{total_workers}", + delta=None, + help="Active workers / Total workers") + + # Queue depth + queued = stats.get("queued", 0) + col2.metric("Queued", queued, + delta=None, + help="Jobs waiting in queue") + + # Visual indicator + if total_workers > 0: + utilization = busy_workers / total_workers + st.sidebar.progress(utilization, text=f"{int(utilization*100)}% utilized") + + # Warning if queue is backing up + if queued > total_workers * 2: + st.sidebar.warning(f"High queue depth: {queued} jobs waiting") + + except Exception: + pass # Silently fail if queue not available + + +def render_sidebar() -> None: + """Existing sidebar render function - add queue metrics""" + # ... existing sidebar code ... + + # Add queue metrics for online mode + render_queue_metrics() +``` + +### Available Metrics + +| Metric | Description | Source | +|--------|-------------|--------| +| **Workers Total** | Number of RQ workers running | `Worker.count()` | +| **Workers Busy** | Workers currently processing | `started_job_registry` | +| **Queue Depth** | Jobs waiting to be processed | `len(queue)` | +| **Jobs Running** | Jobs currently being processed | `started_job_registry` | +| **Jobs Completed** | Recent completed jobs | `finished_job_registry` | +| **Jobs Failed** | Recent failed jobs | `failed_job_registry` | +| **Avg Wait Time** | Average time in queue | Calculated from job metadata | +| **Avg Run Time** | Average execution time | Calculated from job metadata | + +### Extended Metrics (Optional) + +For more detailed monitoring, add a dedicated metrics endpoint: + +```python +def get_detailed_queue_metrics() -> dict: + """Get comprehensive queue metrics""" + from rq import Queue, Worker + from redis import Redis + + redis = Redis.from_url(os.environ.get("REDIS_URL", "redis://localhost:6379/0")) + queue = Queue("openms-workflows", connection=redis) + workers = Worker.all(connection=redis) + + return { + # Capacity + "total_workers": len(workers), + "idle_workers": len([w for w in workers if w.get_state() == "idle"]), + "busy_workers": len([w for w in workers if w.get_state() == "busy"]), + + # Queue state + "queued_jobs": len(queue), + "started_jobs": len(queue.started_job_registry), + "finished_jobs_24h": len(queue.finished_job_registry), + "failed_jobs_24h": len(queue.failed_job_registry), + + # Performance (if tracking) + "avg_wait_time_sec": _calculate_avg_wait_time(queue), + "avg_run_time_sec": _calculate_avg_run_time(queue), + + # Health + "redis_connected": redis.ping(), + "redis_memory_mb": redis.info().get("used_memory_human", "N/A"), + } +``` + +--- + +## Deployment Considerations + +### Scaling Workers (Within Container) + +You can run multiple RQ workers within the same container by modifying the entrypoint: + +```bash +# Start multiple workers (in entrypoint.sh) +WORKER_COUNT=${RQ_WORKER_COUNT:-1} +for i in $(seq 1 $WORKER_COUNT); do + rq worker openms-workflows --url redis://localhost:6379/0 --name worker-$i & +done +``` + +Or with supervisord, add multiple worker programs: + +```ini +[program:rq-worker] +command=rq worker openms-workflows --url redis://localhost:6379/0 +numprocs=%(ENV_RQ_WORKER_COUNT)s +process_name=%(program_name)s-%(process_num)02d +``` + +### Redis Persistence + +Redis data is persisted using AOF (Append Only File): +```bash +redis-server --appendonly yes --dir /var/lib/redis +``` + +For container restarts, mount the Redis data directory: +```yaml +volumes: + - redis-data:/var/lib/redis +``` + +### Resource Limits + +```yaml +# In docker-compose.yml +openms-streamlit-template: + deploy: + resources: + limits: + cpus: '4' + memory: 8G +``` + +### Monitoring + +- **RQ Dashboard**: Enable in entrypoint, access at port 9181 +- **Redis CLI**: `docker exec -it openms-streamlit-template redis-cli` +- **Worker Status**: `docker exec -it openms-streamlit-template rq info` +- **All Logs**: `docker logs openms-streamlit-template` + +--- + +## Migration Path + +### Phase 1: Infrastructure +- Update Dockerfile to install Redis server and RQ +- Create entrypoint script to start all services +- Update requirements.txt +- Build and verify container starts correctly with all services + +### Phase 2: Core Implementation +- Implement QueueManager class +- Implement worker tasks module +- Add health check utilities + +### Phase 3: Integration +- Modify WorkflowManager to use queue in online mode +- Update StreamlitUI for queue status display +- Test execution flow end-to-end + +### Phase 4: Testing & Polish +- Comprehensive testing across all scenarios +- Verify local mode still works unchanged +- Documentation updates + +--- + +## Rollback Plan + +If issues arise, the system can fall back to local execution: + +1. Set `queue_settings.enabled = false` in settings.json +2. Or remove REDIS_URL environment variable +3. The WorkflowManager will automatically use multiprocessing fallback + +The entrypoint can also be modified to skip Redis/RQ startup entirely if needed. + +--- + +## Future Enhancements + +1. **Priority Queues**: Separate queues for different workflow types +2. **Job Scheduling**: Delayed job execution +3. **Email Notifications**: Notify users when long jobs complete +4. **Job Dependencies**: Chain workflows together +5. **Resource Quotas**: Limit jobs per user/workspace +6. **Multi-Container Scaling**: If needed later, extract workers to separate containers + +--- + +## Appendix: Testing Checklist + +- [ ] Container starts with Redis, RQ worker, and Streamlit all running +- [ ] `redis-cli ping` returns PONG inside container +- [ ] `rq info` shows worker registered +- [ ] Job submission from Streamlit succeeds +- [ ] Job status updates in real-time +- [ ] Job completion triggers correct callbacks +- [ ] Job cancellation works +- [ ] Failed jobs are handled gracefully +- [ ] Local mode (non-Docker) still works with multiprocessing fallback +- [ ] Workspace cleanup cron still functions correctly +- [ ] Logs are written correctly from worker +- [ ] Multiple concurrent jobs execute properly +- [ ] Container restart recovers Redis state (if persistence enabled) diff --git a/example-data/workspaces/example_demo/mzML-files/.gitkeep b/example-data/workspaces/example_demo/mzML-files/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/example-data/workspaces/example_demo/params.json b/example-data/workspaces/example_demo/params.json new file mode 100644 index 00000000..7c084f8b --- /dev/null +++ b/example-data/workspaces/example_demo/params.json @@ -0,0 +1,10 @@ +{ + "example-workflow-selected-mzML-files": [], + "image-format": "svg", + "2D-map-intensity-cutoff": 5000, + + "example-x-dimension": 10, + "example-y-dimension": 5, + + "controllo": false +} diff --git a/gdpr_consent/dist/bundle.js b/gdpr_consent/dist/bundle.js index 2d2d8142..86144573 100644 --- a/gdpr_consent/dist/bundle.js +++ b/gdpr_consent/dist/bundle.js @@ -235,7 +235,7 @@ eval("__webpack_require__.r(__webpack_exports__);\n/* harmony export */ __webpac /***/ ((__unused_webpack_module, __webpack_exports__, __webpack_require__) => { "use strict"; -eval("__webpack_require__.r(__webpack_exports__);\n/* harmony import */ var streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! streamlit-component-lib */ \"./node_modules/streamlit-component-lib/dist/index.js\");\nvar __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n};\nvar __generator = (undefined && undefined.__generator) || function (thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n};\n\n// Defines the configuration for Klaro\nvar klaroConfig = {\n mustConsent: true,\n acceptAll: true,\n services: []\n};\n// This will make klaroConfig globally accessible\nwindow.klaroConfig = klaroConfig;\n// Function to safely access the Klaro manager\nfunction getKlaroManager() {\n var _a;\n return ((_a = window.klaro) === null || _a === void 0 ? void 0 : _a.getManager) ? window.klaro.getManager() : null;\n}\n// Waits until Klaro Manager is available\nfunction waitForKlaroManager() {\n return __awaiter(this, arguments, void 0, function (maxWaitTime, interval) {\n var startTime, klaroManager;\n if (maxWaitTime === void 0) { maxWaitTime = 5000; }\n if (interval === void 0) { interval = 100; }\n return __generator(this, function (_a) {\n switch (_a.label) {\n case 0:\n startTime = Date.now();\n _a.label = 1;\n case 1:\n if (!(Date.now() - startTime < maxWaitTime)) return [3 /*break*/, 3];\n klaroManager = getKlaroManager();\n if (klaroManager) {\n return [2 /*return*/, klaroManager];\n }\n return [4 /*yield*/, new Promise(function (resolve) { return setTimeout(resolve, interval); })];\n case 2:\n _a.sent();\n return [3 /*break*/, 1];\n case 3: throw new Error(\"Klaro manager did not become available within the allowed time.\");\n }\n });\n });\n}\n// Helper function to handle unknown errors\nfunction handleError(error) {\n if (error instanceof Error) {\n console.error(\"Error:\", error.message);\n }\n else {\n console.error(\"Unknown error:\", error);\n }\n}\n// Tracking was accepted\nfunction callback() {\n return __awaiter(this, void 0, void 0, function () {\n var manager, return_vals, _i, _a, service, error_1;\n return __generator(this, function (_b) {\n switch (_b.label) {\n case 0:\n _b.trys.push([0, 2, , 3]);\n return [4 /*yield*/, waitForKlaroManager()];\n case 1:\n manager = _b.sent();\n if (manager.confirmed) {\n return_vals = {};\n for (_i = 0, _a = klaroConfig.services; _i < _a.length; _i++) {\n service = _a[_i];\n return_vals[service.name] = manager.getConsent(service.name);\n }\n streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentValue(return_vals);\n }\n return [3 /*break*/, 3];\n case 2:\n error_1 = _b.sent();\n handleError(error_1);\n return [3 /*break*/, 3];\n case 3: return [2 /*return*/];\n }\n });\n });\n}\n// Stores if the component has been rendered before\nvar rendered = false;\nfunction onRender(event) {\n // Klaro does not work if embedded multiple times\n if (rendered) {\n return;\n }\n rendered = true;\n var data = event.detail;\n if (data.args['google_analytics']) {\n klaroConfig.services.push({\n name: 'google-analytics',\n cookies: [\n /^_ga(_.*)?/ // we delete the Google Analytics cookies if the user declines its use\n ],\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['piwik_pro']) {\n klaroConfig.services.push({\n name: 'piwik-pro',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n // Create a new script element\n var script = document.createElement('script');\n // Set the necessary attributes\n script.defer = true;\n script.type = 'application/javascript';\n script.src = 'https://cdn.kiprotect.com/klaro/v0.7/klaro.js';\n // Set the klaro config\n script.setAttribute('data-config', 'klaroConfig');\n // Append the script to the head or body\n document.head.appendChild(script);\n}\n// Attach our `onRender` handler to Streamlit's render event.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.events.addEventListener(streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.RENDER_EVENT, onRender);\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentReady();\n// Finally, tell Streamlit to update the initial height.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setFrameHeight(1000);\n\n\n//# sourceURL=webpack://gdpr_consent/./src/main.ts?"); +eval("__webpack_require__.r(__webpack_exports__);\n/* harmony import */ var streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__ = __webpack_require__(/*! streamlit-component-lib */ \"./node_modules/streamlit-component-lib/dist/index.js\");\nvar __awaiter = (undefined && undefined.__awaiter) || function (thisArg, _arguments, P, generator) {\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\n return new (P || (P = Promise))(function (resolve, reject) {\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\n step((generator = generator.apply(thisArg, _arguments || [])).next());\n });\n};\nvar __generator = (undefined && undefined.__generator) || function (thisArg, body) {\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\n function verb(n) { return function (v) { return step([n, v]); }; }\n function step(op) {\n if (f) throw new TypeError(\"Generator is already executing.\");\n while (g && (g = 0, op[0] && (_ = 0)), _) try {\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\n if (y = 0, t) op = [op[0] & 2, t.value];\n switch (op[0]) {\n case 0: case 1: t = op; break;\n case 4: _.label++; return { value: op[1], done: false };\n case 5: _.label++; y = op[1]; op = [0]; continue;\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\n default:\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\n if (t[2]) _.ops.pop();\n _.trys.pop(); continue;\n }\n op = body.call(thisArg, _);\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\n }\n};\n\n// Defines the configuration for Klaro\nvar klaroConfig = {\n mustConsent: true,\n acceptAll: true,\n services: []\n};\n// This will make klaroConfig globally accessible\nwindow.klaroConfig = klaroConfig;\n// Function to safely access the Klaro manager\nfunction getKlaroManager() {\n var _a;\n return ((_a = window.klaro) === null || _a === void 0 ? void 0 : _a.getManager) ? window.klaro.getManager() : null;\n}\n// Waits until Klaro Manager is available\nfunction waitForKlaroManager() {\n return __awaiter(this, arguments, void 0, function (maxWaitTime, interval) {\n var startTime, klaroManager;\n if (maxWaitTime === void 0) { maxWaitTime = 5000; }\n if (interval === void 0) { interval = 100; }\n return __generator(this, function (_a) {\n switch (_a.label) {\n case 0:\n startTime = Date.now();\n _a.label = 1;\n case 1:\n if (!(Date.now() - startTime < maxWaitTime)) return [3 /*break*/, 3];\n klaroManager = getKlaroManager();\n if (klaroManager) {\n return [2 /*return*/, klaroManager];\n }\n return [4 /*yield*/, new Promise(function (resolve) { return setTimeout(resolve, interval); })];\n case 2:\n _a.sent();\n return [3 /*break*/, 1];\n case 3: throw new Error(\"Klaro manager did not become available within the allowed time.\");\n }\n });\n });\n}\n// Helper function to handle unknown errors\nfunction handleError(error) {\n if (error instanceof Error) {\n console.error(\"Error:\", error.message);\n }\n else {\n console.error(\"Unknown error:\", error);\n }\n}\n// Tracking was accepted\nfunction callback() {\n return __awaiter(this, void 0, void 0, function () {\n var manager, return_vals, _i, _a, service, error_1;\n return __generator(this, function (_b) {\n switch (_b.label) {\n case 0:\n _b.trys.push([0, 2, , 3]);\n return [4 /*yield*/, waitForKlaroManager()];\n case 1:\n manager = _b.sent();\n if (manager.confirmed) {\n return_vals = {};\n for (_i = 0, _a = klaroConfig.services; _i < _a.length; _i++) {\n service = _a[_i];\n return_vals[service.name] = manager.getConsent(service.name);\n }\n streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentValue(return_vals);\n }\n return [3 /*break*/, 3];\n case 2:\n error_1 = _b.sent();\n handleError(error_1);\n return [3 /*break*/, 3];\n case 3: return [2 /*return*/];\n }\n });\n });\n}\n// Stores if the component has been rendered before\nvar rendered = false;\nfunction onRender(event) {\n // Klaro does not work if embedded multiple times\n if (rendered) {\n return;\n }\n rendered = true;\n var data = event.detail;\n if (data.args['google_analytics']) {\n klaroConfig.services.push({\n name: 'google-analytics',\n cookies: [\n /^_ga(_.*)?/ // we delete the Google Analytics cookies if the user declines its use\n ],\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['piwik_pro']) {\n klaroConfig.services.push({\n name: 'piwik-pro',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n if (data.args['matomo']) {\n klaroConfig.services.push({\n name: 'matomo',\n purposes: ['analytics'],\n onAccept: callback,\n onDecline: callback,\n });\n }\n // Create a new script element\n var script = document.createElement('script');\n // Set the necessary attributes\n script.defer = true;\n script.type = 'application/javascript';\n script.src = 'https://cdn.kiprotect.com/klaro/v0.7/klaro.js';\n // Set the klaro config\n script.setAttribute('data-config', 'klaroConfig');\n // Append the script to the head or body\n document.head.appendChild(script);\n}\n// Attach our `onRender` handler to Streamlit's render event.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.events.addEventListener(streamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.RENDER_EVENT, onRender);\n// Tell Streamlit we're ready to start receiving data. We won't get our\n// first RENDER_EVENT until we call this function.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setComponentReady();\n// Finally, tell Streamlit to update the initial height.\nstreamlit_component_lib__WEBPACK_IMPORTED_MODULE_0__.Streamlit.setFrameHeight(1000);\n\n\n//# sourceURL=webpack://gdpr_consent/./src/main.ts?"); /***/ }), diff --git a/gdpr_consent/src/main.ts b/gdpr_consent/src/main.ts index f7219ff1..059fef89 100644 --- a/gdpr_consent/src/main.ts +++ b/gdpr_consent/src/main.ts @@ -114,6 +114,16 @@ function onRender(event: Event): void { } ) } + if (data.args['matomo']) { + klaroConfig.services.push( + { + name: 'matomo', + purposes: ['analytics'], + onAccept: callback, + onDecline: callback, + } + ) + } // Create a new script element var script = document.createElement('script') diff --git a/hooks/hook-analytics.py b/hooks/hook-analytics.py index 6b8b2dab..c47f0c23 100644 --- a/hooks/hook-analytics.py +++ b/hooks/hook-analytics.py @@ -56,6 +56,21 @@ def piwik_pro_body(piwik_tag): """ +def matomo_head(matomo_url, matomo_tag): + return f""" + + + + """ + + if __name__ == '__main__': # Load configuration @@ -79,6 +94,12 @@ def piwik_pro_body(piwik_tag): piwik_tag = settings['analytics']['piwik-pro']['tag'] index = patch_body(index, piwik_pro_body(piwik_tag)) + # Configure matomo tag manager + if settings['analytics']['matomo']['enabled']: + matomo_url = settings['analytics']['matomo']['url'] + matomo_tag = settings['analytics']['matomo']['tag'] + index = patch_head(index, matomo_head(matomo_url, matomo_tag)) + # Save index.html with open(index_path, 'w') as f: f.write(index) \ No newline at end of file diff --git a/presets.json b/presets.json new file mode 100644 index 00000000..1099493d --- /dev/null +++ b/presets.json @@ -0,0 +1,45 @@ +{ + "topp-workflow": { + "High Sensitivity": { + "_description": "Optimized for detecting low-abundance features with higher noise tolerance", + "FeatureFinderMetabo": { + "algorithm:common:noise_threshold_int": 500.0, + "algorithm:common:chrom_peak_snr": 2.0, + "algorithm:mtd:mass_error_ppm": 15.0 + } + }, + "High Specificity": { + "_description": "Strict parameters for high-confidence feature detection", + "FeatureFinderMetabo": { + "algorithm:common:noise_threshold_int": 5000.0, + "algorithm:common:chrom_peak_snr": 5.0, + "algorithm:mtd:mass_error_ppm": 5.0 + } + }, + "Fast Analysis": { + "_description": "Faster processing with relaxed parameters for quick exploration", + "FeatureFinderMetabo": { + "algorithm:common:noise_threshold_int": 2000.0, + "algorithm:ffm:isotope_filtering_model": "none" + }, + "FeatureLinkerUnlabeledKD": { + "algorithm:link:rt_tol": 60.0, + "algorithm:link:mz_tol": 15.0 + } + }, + "Metabolomics Default": { + "_description": "Balanced parameters for general metabolomics analysis", + "FeatureFinderMetabo": { + "algorithm:common:noise_threshold_int": 1000.0, + "algorithm:common:chrom_peak_snr": 3.0, + "algorithm:mtd:mass_error_ppm": 10.0, + "algorithm:ffm:charge_lower_bound": 1, + "algorithm:ffm:charge_upper_bound": 3 + }, + "FeatureLinkerUnlabeledKD": { + "algorithm:link:rt_tol": 30.0, + "algorithm:link:mz_tol": 10.0 + } + } + } +} diff --git a/requirements.txt b/requirements.txt index 3237a816..7f91951b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,9 +16,9 @@ cachetools==5.5.2 # via streamlit captcha==0.7.1 # via src (pyproject.toml) -certifi==2025.1.31 +certifi==2025.8.3 # via requests -charset-normalizer==3.4.1 +charset-normalizer==3.4.3 # via requests click==8.1.8 # via streamlit @@ -26,11 +26,11 @@ contourpy==1.3.1 # via matplotlib cycler==0.12.1 # via matplotlib -fonttools==4.56.0 +fonttools==4.59.2 # via matplotlib gitdb==4.0.12 # via gitpython -gitpython==3.1.44 +gitpython==3.1.45 # via streamlit idna==3.10 # via requests @@ -48,7 +48,7 @@ markupsafe==3.0.2 # via jinja2 matplotlib==3.10.1 # via pyopenms -narwhals==1.32.0 +narwhals==2.5.0 # via altair numpy>=2.0 # via @@ -59,7 +59,7 @@ numpy>=2.0 # pyopenms # src (pyproject.toml) # streamlit -packaging==24.2 +packaging==25.0 # via # altair # matplotlib @@ -77,7 +77,7 @@ pillow==11.1.0 # streamlit plotly==5.22.0 # via src (pyproject.toml) -protobuf==5.29.4 +protobuf==6.32.0 # via streamlit psutil==7.0.0 # via src (pyproject.toml) @@ -111,7 +111,7 @@ six==1.17.0 # via python-dateutil smmap==5.0.2 # via gitdb -streamlit==1.49.0 +streamlit==1.49.1 # via # src (pyproject.toml) # streamlit-js-eval @@ -139,3 +139,7 @@ watchdog==6.0.0 xlsxwriter scipy>=1.15 polars>=1.0.0 + +# Redis Queue dependencies (for online mode) +redis>=5.0.0 +rq>=1.16.0 diff --git a/settings.json b/settings.json index aa0875b3..23b2f554 100644 --- a/settings.json +++ b/settings.json @@ -9,12 +9,31 @@ "tag": "" }, "piwik-pro": { + "enabled": false, + "tag": "" + }, + "matomo": { "enabled": true, - "tag": "57690c44-d635-43b0-ab43-f8bd3064ca06" + "url": "https://cdn.matomo.cloud/openms.matomo.cloud", + "tag": "yDGK8bfY" } }, "online_deployment": false, "enable_workspaces": true, "test": true, - "workspaces_dir": ".." + "workspaces_dir": "..", + "queue_settings": { + "default_timeout": 7200, + "result_ttl": 86400 + }, + "demo_workspaces": { + "enabled": true, + "source_dirs": [ + "example-data/workspaces" + ] + }, + "max_threads": { + "local": 4, + "online": 2 + } } \ No newline at end of file diff --git a/src/common/admin.py b/src/common/admin.py new file mode 100644 index 00000000..746fe2ec --- /dev/null +++ b/src/common/admin.py @@ -0,0 +1,151 @@ +""" +Admin utilities for the Streamlit template. + +Provides functionality for admin-only operations like saving workspaces as demos. +""" + +import hmac +import shutil +from pathlib import Path + +import streamlit as st + + +def is_admin_configured() -> bool: + """ + Check if admin password is configured in Streamlit secrets. + + Returns: + bool: True if admin password is configured, False otherwise. + """ + try: + return bool(st.secrets.get("admin", {}).get("password")) + except (FileNotFoundError, KeyError): + return False + + +def verify_admin_password(password: str) -> bool: + """ + Verify the provided password against the configured admin password. + + Uses constant-time comparison to prevent timing attacks. + + Args: + password: The password to verify. + + Returns: + bool: True if password matches, False otherwise. + """ + if not is_admin_configured(): + return False + + try: + stored_password = st.secrets["admin"]["password"] + # Use constant-time comparison for security + return hmac.compare_digest(password, stored_password) + except (FileNotFoundError, KeyError): + return False + + +def get_demo_target_dir() -> Path: + """ + Get the directory where demo workspaces are stored. + + Returns: + Path: The demo workspaces directory. + """ + return Path("example-data/workspaces") + + +def demo_exists(demo_name: str) -> bool: + """ + Check if a demo workspace with the given name already exists. + + Args: + demo_name: Name of the demo to check. + + Returns: + bool: True if demo exists, False otherwise. + """ + target_dir = get_demo_target_dir() + demo_path = target_dir / demo_name + return demo_path.exists() + + +def _remove_directory_with_symlinks(path: Path) -> None: + """ + Remove a directory that may contain symlinks. + + Handles symlinks properly by removing them without following. + + Args: + path: Path to the directory to remove. + """ + if not path.exists(): + return + + for item in path.rglob("*"): + if item.is_symlink(): + item.unlink() + + # Now remove the rest normally + if path.exists(): + shutil.rmtree(path) + + +def save_workspace_as_demo(workspace_path: Path, demo_name: str) -> tuple[bool, str]: + """ + Save the current workspace as a demo workspace. + + Copies all files from the workspace to the demo directory, following symlinks + to copy actual file contents rather than symlink references. + + Args: + workspace_path: Path to the source workspace. + demo_name: Name for the new demo workspace. + + Returns: + tuple[bool, str]: (success, message) tuple indicating result. + """ + # Deferred import to avoid circular dependency with common.py + from src.common.common import is_safe_workspace_name + + # Validate demo name + if not demo_name: + return False, "Demo name cannot be empty." + + if not is_safe_workspace_name(demo_name): + return False, "Invalid demo name. Avoid path separators and special characters." + + # Validate source workspace exists + if not workspace_path.exists(): + return False, "Source workspace does not exist." + + # Get target directory + target_dir = get_demo_target_dir() + demo_path = target_dir / demo_name + + try: + # Ensure parent directory exists + target_dir.mkdir(parents=True, exist_ok=True) + + # Remove existing demo if it exists (handles symlinks properly) + if demo_path.exists(): + _remove_directory_with_symlinks(demo_path) + + # Copy workspace to demo directory, following symlinks to get actual files + shutil.copytree( + workspace_path, + demo_path, + symlinks=False, # Follow symlinks, copy actual files + dirs_exist_ok=False + ) + + return True, f"Workspace saved as demo '{demo_name}' successfully." + + except PermissionError: + return False, "Permission denied. Cannot write to demo directory." + except OSError as e: + return False, f"Failed to save demo: {str(e)}" + except Exception as e: + return False, f"Unexpected error: {str(e)}" diff --git a/src/common/captcha_.py b/src/common/captcha_.py index 2da672d1..498b1336 100644 --- a/src/common/captcha_.py +++ b/src/common/captcha_.py @@ -208,12 +208,13 @@ def captcha_control(): # Check if consent for tracking was given ga = st.session_state.settings['analytics']['google-analytics']['enabled'] pp = st.session_state.settings['analytics']['piwik-pro']['enabled'] - if (ga or pp) and (st.session_state.tracking_consent is None): + mt = st.session_state.settings['analytics']['matomo']['enabled'] + if (ga or pp or mt) and (st.session_state.tracking_consent is None): consent_component = st_components.declare_component("gdpr_consent", path=Path("gdpr_consent")) with st.spinner(): # Ask for consent st.session_state.tracking_consent = consent_component( - google_analytics=ga, piwik_pro=pp + google_analytics=ga, piwik_pro=pp, matomo=mt ) if st.session_state.tracking_consent is None: # No response by user yet @@ -244,21 +245,25 @@ def captcha_control(): st.image(data) c1, c2 = st.columns([70, 30]) capta2_text = st.empty() - capta2_text = c1.text_input("Enter captcha text", max_chars=5) + capta2_text = c1.text_input("Enter captcha text", max_chars=5, key="captcha_input") c2.markdown("##") if c2.form_submit_button("Verify the code", type="primary"): capta2_text = capta2_text.replace(" ", "") # if the captcha is correct, the controllo session state is set to True if st.session_state["Captcha"].lower() == capta2_text.lower().strip(): del st.session_state["Captcha"] + if "captcha_input" in st.session_state: + del st.session_state["captcha_input"] col1.empty() st.session_state["controllo"] = True st.rerun() else: # if the captcha is wrong, the controllo session state is set to False and the captcha is regenerated - st.error("🚨 Captch is wrong") + st.error("🚨 CAPTCHA is wrong") del st.session_state["Captcha"] del st.session_state["controllo"] + if "captcha_input" in st.session_state: + del st.session_state["captcha_input"] st.rerun() else: # wait for the button click diff --git a/src/common/common.py b/src/common/common.py index f42f3e1d..99410ef5 100644 --- a/src/common/common.py +++ b/src/common/common.py @@ -21,11 +21,168 @@ TK_AVAILABLE = False from src.common.captcha_ import captcha_control +from src.common.admin import ( + is_admin_configured, + verify_admin_password, + demo_exists, + save_workspace_as_demo, +) # Detect system platform OS_PLATFORM = sys.platform +def is_safe_workspace_name(name: str) -> bool: + """ + Check if a workspace name is safe (no path traversal characters). + + Args: + name: The workspace name to validate. + + Returns: + bool: True if safe, False if contains path separators or parent references. + """ + if not name: + return False + # Reject path separators and parent directory references + return "/" not in name and "\\" not in name and name not in ("..", ".") + + +def get_demo_source_dirs() -> list[Path]: + """ + Get list of demo workspace source directories from settings. + + Supports both legacy 'source_dir' (string) and new 'source_dirs' (array) formats. + Non-existent directories are silently skipped. + + Returns: + list[Path]: List of existing source directory paths. + """ + settings = st.session_state.get("settings", {}) + demo_config = settings.get("demo_workspaces", {}) + + if not demo_config.get("enabled", False): + return [] + + # Support both source_dirs (array) and source_dir (string) for backward compatibility + if "source_dirs" in demo_config: + dirs = demo_config["source_dirs"] + if isinstance(dirs, str): + dirs = [dirs] + elif "source_dir" in demo_config: + dirs = [demo_config["source_dir"]] + else: + dirs = ["example-data/workspaces"] + + # Return only existing directories + return [Path(d) for d in dirs if Path(d).exists()] + + +def get_available_demo_workspaces() -> list[str]: + """ + Get a list of available demo workspaces from all configured source directories. + + When the same demo name exists in multiple directories, the first occurrence wins. + + Returns: + list[str]: List of unique demo workspace names. + """ + seen = set() + demos = [] + + for source_dir in get_demo_source_dirs(): + for p in source_dir.iterdir(): + if p.is_dir() and p.name not in seen: + seen.add(p.name) + demos.append(p.name) + + return demos + + +def find_demo_workspace_path(demo_name: str) -> Path | None: + """ + Find the source path for a demo workspace by searching all configured directories. + + Directories are searched in order; the first match is returned. + + Args: + demo_name: Name of the demo workspace to find. + + Returns: + Path to the demo workspace, or None if not found or name is unsafe. + """ + # Validate against path traversal attacks + if not is_safe_workspace_name(demo_name): + return None + + for source_dir in get_demo_source_dirs(): + demo_path = source_dir / demo_name + if demo_path.exists() and demo_path.is_dir(): + return demo_path + return None + + +def _symlink_tree(source: Path, target: Path) -> None: + """ + Recursively create directory structure and symlink files from source to target. + + Creates real directories but symlinks individual files, allowing users to + add new files to workspace directories without affecting the original. + params.json and .ini files are copied instead of symlinked so they can be + modified independently. + + Args: + source: Source directory path. + target: Target directory path. + """ + target.mkdir(parents=True, exist_ok=True) + for item in source.iterdir(): + target_item = target / item.name + if item.is_dir(): + _symlink_tree(item, target_item) + elif item.name == "params.json" or item.suffix == ".ini": + # Copy config files so they can be modified independently + shutil.copy2(item, target_item) + else: + # Create symlink to the source file + target_item.symlink_to(item.resolve()) + + +def copy_demo_workspace(demo_name: str, target_path: Path) -> bool: + """ + Copy a demo workspace to the target path. + + On Linux, creates symlinks to demo files instead of copying them. + On other platforms, copies files normally. + + Searches all configured source directories for the demo (first match wins). + + Args: + demo_name: Name of the demo workspace to copy. + target_path: Destination path for the workspace. + + Returns: + bool: True if copy was successful, False otherwise. + """ + demo_path = find_demo_workspace_path(demo_name) + + if demo_path is None: + return False + + try: + if target_path.exists(): + shutil.rmtree(target_path) + + # Use symlinks on Linux for efficiency + if OS_PLATFORM == "linux": + _symlink_tree(demo_path, target_path) + else: + shutil.copytree(demo_path, target_path) + return True + except Exception: + return False + + @st.fragment(run_every=5) def monitor_hardware(): cpu_progress = psutil.cpu_percent(interval=None) / 100 @@ -40,6 +197,50 @@ def monitor_hardware(): st.caption(f"Last fetched at: {time.strftime('%H:%M:%S')}") +@st.fragment(run_every=5) +def monitor_queue(): + """Display queue metrics in sidebar (online mode only)""" + try: + from src.workflow.health import get_queue_metrics + + metrics = get_queue_metrics() + if not metrics.get("available", False): + return + + st.markdown("---") + st.markdown("**Queue Status**") + + total_workers = metrics.get("total_workers", 0) + busy_workers = metrics.get("busy_workers", 0) + queued_jobs = metrics.get("queued_jobs", 0) + + col1, col2 = st.columns(2) + col1.metric( + "Workers", + f"{busy_workers}/{total_workers}", + help="Busy workers / Total workers" + ) + col2.metric( + "Queued", + queued_jobs, + help="Jobs waiting in queue" + ) + + # Utilization progress bar + if total_workers > 0: + utilization = busy_workers / total_workers + st.progress(utilization, text=f"{int(utilization * 100)}% utilized") + + # Warning if queue is backing up + if queued_jobs > total_workers * 2 and total_workers > 0: + st.warning(f"High queue depth: {queued_jobs} jobs waiting") + + st.caption(f"Last fetched at: {time.strftime('%H:%M:%S')}") + + except Exception: + pass # Silently fail if queue not available + + def load_params(default: bool = False) -> dict[str, Any]: """ Load parameters from a JSON file and return a dictionary containing them. @@ -204,6 +405,23 @@ def page_setup(page: str = "") -> dict[str, Any]: width=1, height=1, ) + if (st.session_state.settings["analytics"]["matomo"]["enabled"]) and ( + st.session_state.tracking_consent["matomo"] == True + ): + html( + """ + + + + + + """, + width=1, + height=1, + ) # Determine the workspace for the current session if ("workspace" not in st.session_state) or ( @@ -237,10 +455,30 @@ def page_setup(page: str = "") -> dict[str, Any]: # Check if workspace logic is enabled if st.session_state.settings["enable_workspaces"]: + # Get available demo workspaces using helper function + available_demos = get_available_demo_workspaces() + if "workspace" in st.query_params: - st.session_state.workspace = Path( - workspaces_dir, st.query_params.workspace - ) + requested_workspace = st.query_params.workspace + + # Validate workspace name against path traversal + if not is_safe_workspace_name(requested_workspace): + # Invalid workspace name - fall back to new UUID workspace + workspace_id = str(uuid.uuid1()) + st.session_state.workspace = Path(workspaces_dir, workspace_id) + st.query_params.workspace = workspace_id + # Check if the requested workspace is a demo workspace (online mode) + elif st.session_state.location == "online" and requested_workspace in available_demos: + # Create a new UUID workspace and copy demo contents + workspace_id = str(uuid.uuid1()) + st.session_state.workspace = Path(workspaces_dir, workspace_id) + st.query_params.workspace = workspace_id + # Copy demo workspace contents using helper function + copy_demo_workspace(requested_workspace, st.session_state.workspace) + else: + st.session_state.workspace = Path( + workspaces_dir, requested_workspace + ) elif st.session_state.location == "online": workspace_id = str(uuid.uuid1()) st.session_state.workspace = Path(workspaces_dir, workspace_id) @@ -253,6 +491,12 @@ def page_setup(page: str = "") -> dict[str, Any]: # Use default workspace when workspace feature is disabled st.session_state.workspace = Path(workspaces_dir, "default") + # For local mode with workspaces disabled, copy demo workspaces if they don't exist + for demo_name in get_available_demo_workspaces(): + target = Path(workspaces_dir, demo_name) + if not target.exists(): + copy_demo_workspace(demo_name, target) + if st.session_state.location != "online": # not any captcha so, controllo should be true st.session_state["controllo"] = True @@ -396,8 +640,134 @@ def change_workspace(): st.query_params.workspace = "default" st.rerun() + # Demo workspace loader for online mode + if st.session_state.location == "online": + available_demos = get_available_demo_workspaces() + if available_demos: + with st.expander("🎮 **Demo Data**"): + st.caption("Load example data to explore the app") + selected_demo = st.selectbox( + "Select demo dataset", + available_demos, + key="selected-demo-workspace" + ) + if st.button("Load Demo Data"): + demo_path = find_demo_workspace_path(selected_demo) + if demo_path: + # Link or copy demo files to current workspace + for item in demo_path.iterdir(): + target = st.session_state.workspace / item.name + if item.is_dir(): + if target.exists(): + shutil.rmtree(target) + # Use symlinks on Linux for efficiency + if OS_PLATFORM == "linux": + _symlink_tree(item, target) + else: + shutil.copytree(item, target) + else: + if target.exists(): + target.unlink() + # Copy config files so they can be modified independently + if OS_PLATFORM == "linux" and item.name != "params.json" and item.suffix != ".ini": + target.symlink_to(item.resolve()) + else: + shutil.copy2(item, target) + st.success(f"Demo data '{selected_demo}' loaded!") + time.sleep(1) + st.rerun() + + # Save as Demo section (online mode only) + with st.expander("💾 **Save as Demo**"): + st.caption("Save current workspace as a demo for others to use") + + demo_name_input = st.text_input( + "Demo name", + key="save-demo-name", + placeholder="e.g., workshop-2024", + help="Name for the demo workspace (no spaces or special characters)" + ) + + # Check if demo already exists + demo_name_clean = demo_name_input.strip() if demo_name_input else "" + existing_demo = demo_exists(demo_name_clean) if demo_name_clean else False + + if existing_demo: + st.warning(f"Demo '{demo_name_clean}' already exists and will be overwritten.") + confirm_overwrite = st.checkbox( + "Confirm overwrite", + key="confirm-demo-overwrite" + ) + else: + confirm_overwrite = True # No confirmation needed for new demos + + if st.button("Save as Demo", key="save-demo-btn", disabled=not demo_name_clean): + if not is_admin_configured(): + st.error( + "Admin not configured. Create `.streamlit/secrets.toml` with " + "an `[admin]` section containing `password = \"your-password\"`" + ) + elif existing_demo and not confirm_overwrite: + st.error("Please confirm overwrite to continue.") + else: + # Show password dialog + st.session_state["show_admin_password_dialog"] = True + + # Password dialog (shown after clicking Save as Demo) + if st.session_state.get("show_admin_password_dialog", False): + admin_password = st.text_input( + "Admin password", + type="password", + key="admin-password-input", + help="Enter the admin password to save this workspace as a demo" + ) + + col1, col2 = st.columns(2) + with col1: + if st.button("Confirm", key="confirm-save-demo"): + if verify_admin_password(admin_password): + success, message = save_workspace_as_demo( + st.session_state.workspace, + demo_name_clean + ) + if success: + st.success(message) + st.session_state["show_admin_password_dialog"] = False + time.sleep(1) + st.rerun() + else: + st.error(message) + else: + st.error("Invalid admin password.") + + with col2: + if st.button("Cancel", key="cancel-save-demo"): + st.session_state["show_admin_password_dialog"] = False + st.rerun() + + # All pages have settings, workflow indicator and logo + with st.expander("⚙️ **Settings**"): + img_formats = ["svg", "png", "jpeg", "webp"] + st.selectbox( + "image export format", + img_formats, + img_formats.index(params["image-format"]), + key="image-format", + ) + st.markdown("## Spectrum Plotting") + st.selectbox("Bin Peaks", ["auto", True, False], key="spectrum_bin_peaks") + if st.session_state["spectrum_bin_peaks"] == True: + st.number_input( + "Number of Bins (m/z)", 1, 10000, 50, key="spectrum_num_bins" + ) + else: + st.session_state["spectrum_num_bins"] = 50 + with st.expander("📊 **Resource Utilization**"): monitor_hardware() + # Show queue metrics in online mode + if st.session_state.settings.get("online_deployment", False): + monitor_queue() # Display OpenMS WebApp Template Version from settings.json with st.container(): diff --git a/src/fileupload.py b/src/fileupload.py index ef35218e..01760912 100644 --- a/src/fileupload.py +++ b/src/fileupload.py @@ -3,7 +3,7 @@ import streamlit as st -from src.common.common import reset_directory +from src.common.common import reset_directory, OS_PLATFORM @st.cache_data @@ -73,6 +73,8 @@ def load_example_mzML_files() -> None: """ Copies example mzML files to the mzML directory. + On Linux, creates symlinks to example files instead of copying them. + Args: None @@ -80,9 +82,15 @@ def load_example_mzML_files() -> None: None """ mzML_dir = Path(st.session_state.workspace, "mzML-files") - # Copy files from example-data/mzML to workspace mzML directory, add to selected files + # Copy or symlink files from example-data/mzML to workspace mzML directory for f in Path("example-data", "mzML").glob("*.mzML"): - shutil.copy(f, mzML_dir) + target = mzML_dir / f.name + if OS_PLATFORM == "linux": + if target.exists(): + target.unlink() + target.symlink_to(f.resolve()) + else: + shutil.copy(f, mzML_dir) st.success("Example mzML files loaded!") diff --git a/src/workflow/CommandExecutor.py b/src/workflow/CommandExecutor.py index 4c469f71..b1c00793 100644 --- a/src/workflow/CommandExecutor.py +++ b/src/workflow/CommandExecutor.py @@ -9,6 +9,7 @@ import sys import importlib.util import json +import streamlit as st class CommandExecutor: """ @@ -25,30 +26,71 @@ def __init__(self, workflow_dir: Path, logger: Logger, parameter_manager: Parame self.logger = logger self.parameter_manager = parameter_manager + def _get_max_threads(self) -> int: + """ + Get max threads for current deployment mode. + + In local mode, reads from parameter manager (persisted params.json). + In online mode, uses the configured value directly from settings. + + Returns: + int: Maximum number of threads to use for parallel processing (minimum 1). + """ + settings = st.session_state.get("settings", {}) + max_threads_config = settings.get("max_threads", {"local": 4, "online": 2}) + + if settings.get("online_deployment", False): + value = max_threads_config.get("online", 2) + else: + default = max_threads_config.get("local", 4) + params = self.parameter_manager.get_parameters_from_json() + value = params.get("max_threads", default) + + return max(1, int(value)) + def run_multiple_commands( self, commands: list[str] - ) -> None: + ) -> bool: """ Executes multiple shell commands concurrently in separate threads. This method leverages threading to run each command in parallel, improving - efficiency for batch command execution. Execution time and command results are - logged if specified. + efficiency for batch command execution. The number of concurrent commands + is limited by the max_threads setting, which is distributed between + parallel command execution and per-tool thread allocation. Args: commands (list[str]): A list where each element is a list representing a command and its arguments. + + Returns: + bool: True if all commands succeeded, False if any failed. """ + # Get thread settings and calculate distribution + max_threads = self._get_max_threads() + num_commands = len(commands) + parallel_commands = min(num_commands, max_threads) + # Log the start of command execution - self.logger.log(f"Running {len(commands)} commands in parallel...", 1) + self.logger.log(f"Running {num_commands} commands (max {parallel_commands} parallel, {max_threads} total threads)...", 1) start_time = time.time() + results = [] + lock = threading.Lock() + semaphore = threading.Semaphore(parallel_commands) + + def run_and_track(cmd): + with semaphore: + success = self.run_command(cmd) + with lock: + results.append(success) + # Initialize a list to keep track of threads threads = [] # Start a new thread for each command for cmd in commands: - thread = threading.Thread(target=self.run_command, args=(cmd,)) + thread = threading.Thread(target=run_and_track, args=(cmd,)) thread.start() threads.append(thread) @@ -58,9 +100,11 @@ def run_multiple_commands( # Calculate and log the total execution time end_time = time.time() - self.logger.log(f"Total time to run {len(commands)} commands: {end_time - start_time:.2f} seconds", 1) + self.logger.log(f"Total time to run {num_commands} commands: {end_time - start_time:.2f} seconds", 1) + + return all(results) - def run_command(self, command: list[str]) -> None: + def run_command(self, command: list[str]) -> bool: """ Executes a specified shell command and logs its execution details. @@ -92,33 +136,45 @@ def run_command(self, command: list[str]) -> None: # User can close the Streamlit app and return to a running workflow later pid_file_path = self.pid_dir / str(child_pid) pid_file_path.touch() - + + # Buffer for stderr - will only be written to minimal log if process fails + stderr_buffer: list[str] = [] + # Real-time output capture - self._stream_output(process) - + self._stream_output(process, stderr_buffer) + # Wait for process completion process.wait() - + # Cleanup PID file pid_file_path.unlink() end_time = time.time() execution_time = end_time - start_time - + # Log completion self.logger.log(f"Process finished:\n"+' '.join(command)+f"\nTotal time to run command: {execution_time:.2f} seconds", 1) - + # Check for errors if process.returncode != 0: - self.logger.log(f"ERRORS OCCURRED: Process exited with code {process.returncode}", 2) + # Write buffered stderr to minimal log only on failure + for line in stderr_buffer: + self.logger.log(f"STDERR: {line}", 0) + self.logger.log(f"ERROR: Command failed with exit code {process.returncode}: {command[0]}", 0) + return False + return True - def _stream_output(self, process: subprocess.Popen) -> None: + def _stream_output(self, process: subprocess.Popen, stderr_buffer: list[str]) -> None: """ Streams stdout and stderr from a running process in real-time to the logger. This method runs in the workflow process, not the GUI thread, so it's safe to block. - + + Stderr is buffered and only logged to the detailed log (level 2) during execution. + The caller is responsible for writing buffered stderr to minimal log if the process fails. + Args: process: The subprocess.Popen object to stream from + stderr_buffer: A list to accumulate stderr lines for conditional logging """ def read_stdout(): """Read stdout in real-time""" @@ -132,32 +188,35 @@ def read_stdout(): self.logger.log(f"Error reading stdout: {e}", 2) finally: process.stdout.close() - + def read_stderr(): - """Read stderr in real-time""" + """Read stderr in real-time, buffering for conditional minimal log output""" try: for line in iter(process.stderr.readline, ''): if line: - self.logger.log(f"STDERR: {line.rstrip()}", 2) + stderr_line = line.rstrip() + stderr_buffer.append(stderr_line) + # Log to detailed log only during execution + self.logger.log(f"STDERR: {stderr_line}", 2) if process.poll() is not None: break except Exception as e: self.logger.log(f"Error reading stderr: {e}", 2) finally: process.stderr.close() - + # Start threads to read stdout and stderr simultaneously stdout_thread = threading.Thread(target=read_stdout, daemon=True) stderr_thread = threading.Thread(target=read_stderr, daemon=True) - + stdout_thread.start() stderr_thread.start() - + # Wait for both threads to complete stdout_thread.join() stderr_thread.join() - def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> None: + def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> bool: """ Constructs and executes commands for the specified tool OpenMS TOPP tool based on the given input and output configurations. Ensures that all input/output file lists @@ -176,6 +235,9 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> N input_output (dict): A dictionary specifying the input/output parameter names (as key) and their corresponding file paths (as value). custom_params (dict): A dictionary of custom parameters to pass to the tool. + Returns: + bool: True if all commands succeeded, False if any failed. + Raises: ValueError: If the lengths of input/output file lists are inconsistent, except for single string inputs. @@ -192,6 +254,11 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> N else: n_processes = max(io_lengths) + # Calculate threads per command based on max_threads setting + max_threads = self._get_max_threads() + parallel_commands = min(n_processes, max_threads) + threads_per_command = max(1, max_threads // parallel_commands) + commands = [] # Load parameters for non-defaults @@ -217,25 +284,33 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> N # Add non-default TOPP tool parameters if tool in params.keys(): for k, v in params[tool].items(): - if isinstance(v, str): - if "\n" in v: - command += [f"-{k}"] + v.split("\n") - elif v == 'true': - command += [f"-{k}"] - elif v == 'false': - pass - else: - command += [f"-{k}", v] + # Boolean flag handling: 'true' -> flag only, 'false' -> skip entirely + if isinstance(v, str) and v == 'true': + command += [f"-{k}"] + elif isinstance(v, str) and v == 'false': + pass + elif v == "" or v is None: + # Empty string or None: flag only (no value) + command += [f"-{k}"] else: - command += [f"-{k}", str(v)] + command += [f"-{k}"] + # Note: 0 and 0.0 are valid values, so use explicit check above + if isinstance(v, str) and "\n" in v: + command += v.split("\n") + else: + command += [str(v)] # Add custom parameters for k, v in custom_params.items(): command += [f"-{k}"] - if v: + # Skip only empty strings (pass flag with no value) + # Note: 0 and 0.0 are valid values, so use explicit check + if v != "" and v is not None: if isinstance(v, list): command += [str(x) for x in v] else: command += [str(v)] + # Add threads parameter for TOPP tools + command += ["-threads", str(threads_per_command)] commands.append(command) # check if a ini file has been written, if yes use it (contains custom defaults) @@ -245,9 +320,9 @@ def run_topp(self, tool: str, input_output: dict, custom_params: dict = {}) -> N # Run command(s) if len(commands) == 1: - self.run_command(commands[0]) + return self.run_command(commands[0]) elif len(commands) > 1: - self.run_multiple_commands(commands) + return self.run_multiple_commands(commands) else: raise Exception("No commands to execute.") diff --git a/src/workflow/ParameterManager.py b/src/workflow/ParameterManager.py index 85a6e06c..3b761af1 100644 --- a/src/workflow/ParameterManager.py +++ b/src/workflow/ParameterManager.py @@ -1,6 +1,7 @@ import pyopenms as poms import json import shutil +import subprocess import streamlit as st from pathlib import Path @@ -16,14 +17,36 @@ class ParameterManager: params_file (Path): Path to the JSON file where parameters are saved. param_prefix (str): Prefix for general parameter keys in Streamlit's session state. topp_param_prefix (str): Prefix for TOPP tool parameter keys in Streamlit's session state. + workflow_name (str): Name of the workflow, used for loading presets. """ # Methods related to parameter handling - def __init__(self, workflow_dir: Path): + def __init__(self, workflow_dir: Path, workflow_name: str = None): self.ini_dir = Path(workflow_dir, "ini") self.ini_dir.mkdir(parents=True, exist_ok=True) self.params_file = Path(workflow_dir, "params.json") self.param_prefix = f"{workflow_dir.stem}-param-" self.topp_param_prefix = f"{workflow_dir.stem}-TOPP-" + # Store workflow name for preset loading; default to directory stem if not provided + self.workflow_name = workflow_name or workflow_dir.stem + + def create_ini(self, tool: str) -> bool: + """ + Create an ini file for a TOPP tool if it doesn't exist. + + Args: + tool: Name of the TOPP tool (e.g., "CometAdapter") + + Returns: + True if ini file exists (created or already existed), False if creation failed + """ + ini_path = Path(self.ini_dir, tool + ".ini") + if ini_path.exists(): + return True + try: + subprocess.call([tool, "-write_ini", str(ini_path)]) + except FileNotFoundError: + return False + return ini_path.exists() def save_parameters(self) -> None: """ @@ -54,22 +77,31 @@ def save_parameters(self) -> None: ) # for each TOPP tool, open the ini file for tool in current_topp_tools: + if not self.create_ini(tool): + # Could not create ini file - skip this tool + continue + ini_path = Path(self.ini_dir, f"{tool}.ini") if tool not in json_params: json_params[tool] = {} # load the param object param = poms.Param() - poms.ParamXMLFile().load(str(Path(self.ini_dir, f"{tool}.ini")), param) + poms.ParamXMLFile().load(str(ini_path), param) # get all session state param keys and values for this tool for key, value in st.session_state.items(): if key.startswith(f"{self.topp_param_prefix}{tool}:1:"): + # Skip display keys used by multiselect widgets + if key.endswith("_display"): + continue # get ini_key ini_key = key.replace(self.topp_param_prefix, "").encode() # get ini (default) value by ini_key ini_value = param.getValue(ini_key) - # check if value is different from default + is_list_param = isinstance(ini_value, list) + # check if value is different from default OR is an empty list parameter if ( - (ini_value != value) + (ini_value != value) or (key.split(":1:")[1] in json_params[tool]) + or (is_list_param and not value) # Always save empty list params ): if isinstance(value, str): value = value.strip() @@ -100,10 +132,161 @@ def get_parameters_from_json(self) -> dict: st.error("**ERROR**: Attempting to load an invalid JSON parameter file. Reset to defaults.") return {} + def get_topp_parameters(self, tool: str) -> dict: + """ + Get all parameters for a TOPP tool, merging defaults with user values. + + Args: + tool: Name of the TOPP tool (e.g., "CometAdapter") + + Returns: + Dict with parameter names as keys (without tool prefix) and their values. + Returns empty dict if ini file doesn't exist. + """ + ini_path = Path(self.ini_dir, f"{tool}.ini") + if not ini_path.exists(): + return {} + + # Load defaults from ini file + param = poms.Param() + poms.ParamXMLFile().load(str(ini_path), param) + + # Build dict from ini (extract short key names) + prefix = f"{tool}:1:" + full_params = {} + for key in param.keys(): + key_str = key.decode() if isinstance(key, bytes) else str(key) + if prefix in key_str: + short_key = key_str.split(prefix, 1)[1] + full_params[short_key] = param.getValue(key) + + # Override with user-modified values from JSON + user_params = self.get_parameters_from_json().get(tool, {}) + full_params.update(user_params) + + return full_params + def reset_to_default_parameters(self) -> None: """ Resets the parameters to their default values by deleting the custom parameters JSON file. """ # Delete custom params json file - self.params_file.unlink(missing_ok=True) \ No newline at end of file + self.params_file.unlink(missing_ok=True) + + def load_presets(self) -> dict: + """ + Load preset definitions from presets.json file. + + Returns: + dict: Dictionary of presets for the current workflow, or empty dict if + presets.json doesn't exist or has no presets for this workflow. + """ + presets_file = Path("presets.json") + if not presets_file.exists(): + return {} + + try: + with open(presets_file, "r", encoding="utf-8") as f: + all_presets = json.load(f) + except (json.JSONDecodeError, IOError): + return {} + + # Normalize workflow name to match preset keys (lowercase with hyphens) + workflow_key = self.workflow_name.replace(" ", "-").lower() + return all_presets.get(workflow_key, {}) + + def get_preset_names(self) -> list: + """ + Get list of available preset names for the current workflow. + + Returns: + list: List of preset names (strings), excluding special keys like _description. + """ + presets = self.load_presets() + return [name for name in presets.keys() if not name.startswith("_")] + + def get_preset_description(self, preset_name: str) -> str: + """ + Get the description for a specific preset. + + Args: + preset_name: Name of the preset + + Returns: + str: Description text for the preset, or empty string if not found. + """ + presets = self.load_presets() + preset = presets.get(preset_name, {}) + return preset.get("_description", "") + + def apply_preset(self, preset_name: str) -> bool: + """ + Apply a preset by updating params.json and clearing relevant session_state keys. + + Uses the "delete-then-rerun" pattern: instead of overwriting session_state + values (which widgets may not reflect immediately due to fragment caching), + we delete the keys so widgets re-initialize fresh from params.json on rerun. + + Args: + preset_name: Name of the preset to apply + + Returns: + bool: True if preset was applied successfully, False otherwise. + """ + presets = self.load_presets() + preset = presets.get(preset_name) + if not preset: + return False + + # Load existing parameters + current_params = self.get_parameters_from_json() + + # Collect keys to delete from session_state + keys_to_delete = [] + + for key, value in preset.items(): + # Skip description key + if key == "_description": + continue + + if key == "_general": + # Handle general workflow parameters + for param_name, param_value in value.items(): + session_key = f"{self.param_prefix}{param_name}" + keys_to_delete.append(session_key) + current_params[param_name] = param_value + elif isinstance(value, dict) and not key.startswith("_"): + # Handle TOPP tool parameters + tool_name = key + if tool_name not in current_params: + current_params[tool_name] = {} + for param_name, param_value in value.items(): + session_key = f"{self.topp_param_prefix}{tool_name}:1:{param_name}" + keys_to_delete.append(session_key) + current_params[tool_name][param_name] = param_value + + # Delete affected keys from session_state so widgets re-initialize fresh + for session_key in keys_to_delete: + if session_key in st.session_state: + del st.session_state[session_key] + + # Save updated parameters to file + with open(self.params_file, "w", encoding="utf-8") as f: + json.dump(current_params, f, indent=4) + + return True + + def clear_parameter_session_state(self) -> None: + """ + Clear all parameter-related keys from session_state. + + This forces widgets to re-initialize from params.json or defaults + on the next rerun, rather than using potentially stale session_state values. + """ + keys_to_delete = [ + key for key in list(st.session_state.keys()) + if key.startswith(self.param_prefix) or key.startswith(self.topp_param_prefix) + ] + for key in keys_to_delete: + del st.session_state[key] \ No newline at end of file diff --git a/src/workflow/QueueManager.py b/src/workflow/QueueManager.py new file mode 100644 index 00000000..68878bce --- /dev/null +++ b/src/workflow/QueueManager.py @@ -0,0 +1,284 @@ +""" +Redis Queue Manager for Online Mode Workflow Execution + +This module provides job queueing functionality for online deployments, +replacing the multiprocessing approach with Redis-backed job queues. +Only activates when running in online mode with Redis available. +""" + +import os +import json +from typing import Optional, Callable, Any +from dataclasses import dataclass +from enum import Enum +from pathlib import Path + + +class JobStatus(Enum): + """Job status enumeration matching RQ states""" + QUEUED = "queued" + STARTED = "started" + FINISHED = "finished" + FAILED = "failed" + DEFERRED = "deferred" + CANCELED = "canceled" + + +@dataclass +class JobInfo: + """Container for job information""" + job_id: str + status: JobStatus + progress: float # 0.0 to 1.0 + current_step: str + queue_position: Optional[int] = None + queue_length: Optional[int] = None + result: Optional[Any] = None + error: Optional[str] = None + enqueued_at: Optional[str] = None + started_at: Optional[str] = None + ended_at: Optional[str] = None + + +class QueueManager: + """ + Manages Redis Queue operations for workflow execution. + + Only active in online mode. Falls back to direct execution in local mode. + Redis runs on localhost within the same container. + """ + + QUEUE_NAME = "openms-workflows" + # Redis runs locally in the same container + REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + + def __init__(self): + self._redis = None + self._queue = None + self._is_online = self._check_online_mode() + self._init_attempted = False + + if self._is_online: + self._init_redis() + + def _check_online_mode(self) -> bool: + """Check if running in online mode""" + # Check environment variable first (set in Docker) + if os.environ.get("REDIS_URL"): + return True + + # Fallback: check settings file + try: + with open("settings.json", "r") as f: + settings = json.load(f) + return settings.get("online_deployment", False) + except Exception: + return False + + def _init_redis(self) -> None: + """Initialize Redis connection and queue""" + if self._init_attempted: + return + self._init_attempted = True + + try: + from redis import Redis + from rq import Queue + + self._redis = Redis.from_url(self.REDIS_URL) + self._redis.ping() # Test connection + self._queue = Queue(self.QUEUE_NAME, connection=self._redis) + except ImportError: + # Redis/RQ packages not installed + self._redis = None + self._queue = None + except Exception: + # Redis server not available + self._redis = None + self._queue = None + + @property + def is_available(self) -> bool: + """Check if queue system is available""" + return self._is_online and self._queue is not None + + def submit_job( + self, + func: Callable, + args: tuple = (), + kwargs: dict = None, + job_id: Optional[str] = None, + timeout: int = 7200, # 2 hour default + result_ttl: int = 86400, # 24 hours + description: str = "" + ) -> Optional[str]: + """ + Submit a job to the queue. + + Args: + func: The function to execute + args: Positional arguments for the function + kwargs: Keyword arguments for the function + job_id: Optional custom job ID (defaults to UUID) + timeout: Job timeout in seconds + result_ttl: How long to keep results + description: Human-readable job description + + Returns: + Job ID if successful, None otherwise + """ + if not self.is_available: + return None + + kwargs = kwargs or {} + + try: + job = self._queue.enqueue( + func, + args=args, + kwargs=kwargs, + job_id=job_id, + job_timeout=timeout, + result_ttl=result_ttl, + description=description, + meta={"description": description, "progress": 0.0, "current_step": ""} + ) + return job.id + except Exception: + return None + + def get_job_info(self, job_id: str) -> Optional[JobInfo]: + """ + Get information about a job. + + Args: + job_id: The job ID to query + + Returns: + JobInfo object or None if not found + """ + if not self.is_available: + return None + + try: + from rq.job import Job + + job = Job.fetch(job_id, connection=self._redis) + + # Map RQ status to our enum + status_map = { + "queued": JobStatus.QUEUED, + "started": JobStatus.STARTED, + "finished": JobStatus.FINISHED, + "failed": JobStatus.FAILED, + "deferred": JobStatus.DEFERRED, + "canceled": JobStatus.CANCELED, + } + + status = status_map.get(job.get_status(), JobStatus.QUEUED) + + # Get progress from job meta + meta = job.meta or {} + progress = meta.get("progress", 0.0) + current_step = meta.get("current_step", "") + + # Calculate queue position if queued + queue_position = None + queue_length = None + if status == JobStatus.QUEUED: + queue_position = self._get_job_position(job_id) + queue_length = len(self._queue) + + return JobInfo( + job_id=job.id, + status=status, + progress=progress, + current_step=current_step, + queue_position=queue_position, + queue_length=queue_length, + result=job.result if status == JobStatus.FINISHED else None, + error=str(job.exc_info) if job.exc_info else None, + enqueued_at=str(job.enqueued_at) if job.enqueued_at else None, + started_at=str(job.started_at) if job.started_at else None, + ended_at=str(job.ended_at) if job.ended_at else None, + ) + except Exception: + return None + + def _get_job_position(self, job_id: str) -> Optional[int]: + """Get position of a job in the queue (1-indexed)""" + try: + job_ids = self._queue.job_ids + if job_id in job_ids: + return job_ids.index(job_id) + 1 + return None + except Exception: + return None + + def cancel_job(self, job_id: str) -> bool: + """ + Cancel a queued or running job. + + Args: + job_id: The job ID to cancel + + Returns: + True if successfully canceled + """ + if not self.is_available: + return False + + try: + from rq.job import Job + + job = Job.fetch(job_id, connection=self._redis) + job.cancel() + return True + except Exception: + return False + + def get_queue_stats(self) -> dict: + """ + Get queue statistics. + + Returns: + Dictionary with queue stats + """ + if not self.is_available: + return {} + + try: + from rq import Worker + + workers = Worker.all(connection=self._redis) + busy_workers = len([w for w in workers if w.get_state() == "busy"]) + + return { + "queued": len(self._queue), + "started": len(self._queue.started_job_registry), + "finished": len(self._queue.finished_job_registry), + "failed": len(self._queue.failed_job_registry), + "workers": len(workers), + "busy_workers": busy_workers, + "idle_workers": len(workers) - busy_workers, + } + except Exception: + return {} + + def store_job_id(self, workflow_dir: Path, job_id: str) -> None: + """Store job ID in workflow directory for recovery""" + job_file = Path(workflow_dir) / ".job_id" + job_file.write_text(job_id) + + def load_job_id(self, workflow_dir: Path) -> Optional[str]: + """Load job ID from workflow directory""" + job_file = Path(workflow_dir) / ".job_id" + if job_file.exists(): + return job_file.read_text().strip() + return None + + def clear_job_id(self, workflow_dir: Path) -> None: + """Clear stored job ID""" + job_file = Path(workflow_dir) / ".job_id" + if job_file.exists(): + job_file.unlink() diff --git a/src/workflow/StreamlitUI.py b/src/workflow/StreamlitUI.py index bea9139b..1945d51d 100644 --- a/src/workflow/StreamlitUI.py +++ b/src/workflow/StreamlitUI.py @@ -3,7 +3,7 @@ from pathlib import Path import shutil import subprocess -from typing import Any, Union, List +from typing import Any, Union, List, Literal, Callable import json import os import re @@ -13,7 +13,6 @@ from io import BytesIO import zipfile from datetime import datetime -from streamlit_js_eval import streamlit_js_eval from src.common.common import ( OS_PLATFORM, @@ -306,13 +305,13 @@ def upload_widget( elif not fallback: st.warning(f"No **{name}** files!") - @st.fragment def select_input_file( self, key: str, name: str = "", multiple: bool = False, display_file_path: bool = False, + reactive: bool = False, ) -> None: """ Presents a widget for selecting input files from those that have been uploaded. @@ -323,7 +322,22 @@ def select_input_file( name (str, optional): The display name for the selection widget. Defaults to the key if not provided. multiple (bool, optional): If True, allows multiple files to be selected. display_file_path (bool, optional): If True, displays the full file path in the selection widget. + reactive (bool, optional): If True, widget changes trigger the parent + section to re-render, enabling conditional UI based on this widget's + value. Use for widgets that control visibility of other UI elements. + Default is False (widget changes are isolated for performance). """ + if reactive: + self._select_input_file_impl(key, name, multiple, display_file_path, reactive) + else: + self._select_input_file_fragmented(key, name, multiple, display_file_path, reactive) + + @st.fragment + def _select_input_file_fragmented(self, key, name, multiple, display_file_path, reactive): + self._select_input_file_impl(key, name, multiple, display_file_path, reactive) + + def _select_input_file_impl(self, key, name, multiple, display_file_path, reactive): + """Internal implementation of select_input_file - contains all the widget logic.""" if not name: name = f"**{key}**" path = Path(self.workflow_dir, "input-files", key) @@ -352,9 +366,9 @@ def select_input_file( widget_type=widget_type, options=options, display_file_path=display_file_path, + reactive=reactive, ) - @st.fragment def input_widget( self, key: str, @@ -367,6 +381,8 @@ def input_widget( max_value: Union[int, float] = None, step_size: Union[int, float] = 1, display_file_path: bool = False, + on_change: Callable = None, + reactive: bool = False, ) -> None: """ Creates and displays a Streamlit widget for user input based on specified @@ -388,7 +404,42 @@ def input_widget( max_value (Union[int, float], optional): Maximum value for number/slider widgets. step_size (Union[int, float], optional): Step size for number/slider widgets. display_file_path (bool, optional): Whether to display the full file path for file options. + reactive (bool, optional): If True, widget changes trigger the parent + section to re-render, enabling conditional UI based on this widget's + value. Use for widgets that control visibility of other UI elements. + Default is False (widget changes are isolated for performance). """ + if reactive: + # Render directly in parent context - changes trigger parent rerun + self._input_widget_impl( + key, default, name, help, widget_type, options, + min_value, max_value, step_size, display_file_path, on_change + ) + else: + # Render in isolated fragment - changes don't affect parent + self._input_widget_fragmented( + key, default, name, help, widget_type, options, + min_value, max_value, step_size, display_file_path, on_change + ) + + @st.fragment + def _input_widget_fragmented( + self, key, default, name, help, widget_type, + options, min_value, max_value, step_size, + display_file_path, on_change + ): + self._input_widget_impl( + key, default, name, help, widget_type, + options, min_value, max_value, step_size, + display_file_path, on_change + ) + + def _input_widget_impl( + self, key, default, name, help, widget_type, + options, min_value, max_value, step_size, + display_file_path, on_change + ): + """Internal implementation of input_widget - contains all the widget logic.""" def format_files(input: Any) -> List[str]: if not display_file_path and Path(input).exists(): @@ -410,10 +461,10 @@ def format_files(input: Any) -> List[str]: key = f"{self.parameter_manager.param_prefix}{key}" if widget_type == "text": - st.text_input(name, value=value, key=key, help=help) + st.text_input(name, value=value, key=key, help=help, on_change=on_change) elif widget_type == "textarea": - st.text_area(name, value=value, key=key, help=help) + st.text_area(name, value=value, key=key, help=help, on_change=on_change) elif widget_type == "number": number_type = float if isinstance(value, float) else int @@ -432,10 +483,11 @@ def format_files(input: Any) -> List[str]: format=None, key=key, help=help, + on_change=on_change, ) elif widget_type == "checkbox": - st.checkbox(name, value=value, key=key, help=help) + st.checkbox(name, value=value, key=key, help=help, on_change=on_change) elif widget_type == "selectbox": if options is not None: @@ -446,6 +498,7 @@ def format_files(input: Any) -> List[str]: key=key, format_func=format_files, help=help, + on_change=on_change, ) else: st.warning(f"Select widget '{name}' requires options parameter") @@ -459,6 +512,7 @@ def format_files(input: Any) -> List[str]: key=key, format_func=format_files, help=help, + on_change=on_change, ) else: st.warning(f"Select widget '{name}' requires options parameter") @@ -480,6 +534,7 @@ def format_files(input: Any) -> List[str]: key=key, format=None, help=help, + on_change=on_change, ) else: st.warning( @@ -487,47 +542,66 @@ def format_files(input: Any) -> List[str]: ) elif widget_type == "password": - st.text_input(name, value=value, type="password", key=key, help=help) + st.text_input(name, value=value, type="password", key=key, help=help, on_change=on_change) elif widget_type == "auto": # Auto-determine widget type based on value if isinstance(value, bool): - st.checkbox(name, value=value, key=key, help=help) + st.checkbox(name, value=value, key=key, help=help, on_change=on_change) elif isinstance(value, (int, float)): - self.input_widget( + self._input_widget_impl( key, value, - widget_type="number", name=name, + help=help, + widget_type="number", + options=None, min_value=min_value, max_value=max_value, step_size=step_size, - help=help, + display_file_path=False, + on_change=on_change, ) elif (isinstance(value, str) or value == None) and options is not None: - self.input_widget( + self._input_widget_impl( key, value, - widget_type="selectbox", name=name, - options=options, help=help, + widget_type="selectbox", + options=options, + min_value=None, + max_value=None, + step_size=1, + display_file_path=False, + on_change=on_change, ) elif isinstance(value, list) and options is not None: - self.input_widget( + self._input_widget_impl( key, value, - widget_type="multiselect", name=name, - options=options, help=help, + widget_type="multiselect", + options=options, + min_value=None, + max_value=None, + step_size=1, + display_file_path=False, + on_change=on_change, ) elif isinstance(value, bool): - self.input_widget( - key, value, widget_type="checkbox", name=name, help=help + self._input_widget_impl( + key, value, name=name, help=help, widget_type="checkbox", + options=None, min_value=None, max_value=None, step_size=1, + display_file_path=False, on_change=on_change ) else: - self.input_widget(key, value, widget_type="text", name=name, help=help) + self._input_widget_impl( + key, value, name=name, help=help, widget_type="text", + options=None, min_value=None, max_value=None, step_size=1, + display_file_path=False, on_change=on_change + ) else: st.error(f"Unsupported widget type '{widget_type}'") @@ -569,12 +643,11 @@ def input_TOPP( # write defaults ini files ini_file_path = Path(self.parameter_manager.ini_dir, f"{topp_tool_name}.ini") - if not ini_file_path.exists(): - try: - subprocess.call([topp_tool_name, "-write_ini", str(ini_file_path)]) - except FileNotFoundError: - st.error(f"TOPP tool **'{topp_tool_name}'** not found.") - return + ini_existed = ini_file_path.exists() + if not self.parameter_manager.create_ini(topp_tool_name): + st.error(f"TOPP tool **'{topp_tool_name}'** not found.") + return + if not ini_existed: # update custom defaults if necessary if custom_defaults: param = poms.Param() @@ -588,31 +661,55 @@ def input_TOPP( # read into Param object param = poms.Param() poms.ParamXMLFile().load(str(ini_file_path), param) + + def _matches_parameter(pattern: str, key: bytes) -> bool: + """ + Match pattern against TOPP parameter key using suffix matching. + + Key format: b"ToolName:1:section:subsection:param_name" + + Returns True if pattern matches the end of the param path, + bounded by ':' or start of path. + """ + pattern = pattern.lstrip(":") # Strip legacy leading colon + key_str = key.decode() + + # Extract param path after "ToolName:1:" + parts = key_str.split(":") + param_path = ":".join(parts[2:]) if len(parts) > 2 else key_str + + # Check if pattern matches as a suffix, bounded by ':' or start + return param_path == pattern or param_path.endswith(":" + pattern) + + # Always apply base exclusions (input/output files, standard excludes) + excluded_keys = [ + "log", + "debug", + "threads", + "no_progress", + "force", + "version", + "test", + ] + exclude_parameters + + valid_keys = [ + key + for key in param.keys() + if not ( + b"input file" in param.getTags(key) + or b"output file" in param.getTags(key) + or any([_matches_parameter(k, key) for k in excluded_keys]) + ) + ] + + # Track which keys are "included" (shown by default) vs "non-included" (advanced only) if include_parameters: - valid_keys = [ - key - for key in param.keys() - if any([k.encode() in key for k in include_parameters]) - ] + included_keys = { + key for key in valid_keys + if any([_matches_parameter(k, key) for k in include_parameters]) + } else: - excluded_keys = [ - "log", - "debug", - "threads", - "no_progress", - "force", - "version", - "test", - ] + exclude_parameters - valid_keys = [ - key - for key in param.keys() - if not ( - b"input file" in param.getTags(key) - or b"output file" in param.getTags(key) - or any([k.encode() in key for k in excluded_keys]) - ) - ] + included_keys = set(valid_keys) # All are included when no filter specified params = [] for key in valid_keys: entry = param.getEntry(key) @@ -620,9 +717,11 @@ def input_TOPP( "name": entry.name.decode(), "key": key, "value": entry.value, + "original_is_list": isinstance(entry.value, list), "valid_strings": [v.decode() for v in entry.valid_strings], "description": entry.description.decode(), "advanced": (b"advanced" in param.getTags(key)), + "non_included": key not in included_keys, "section_description": param.getSectionDescription( ":".join(key.decode().split(":")[:-1]) ), @@ -646,14 +745,18 @@ def input_TOPP( p["value"] = custom_defaults[name] elif name in custom_defaults: p["value"] = custom_defaults[name] + # Ensure list parameters stay as lists after loading from JSON + # (JSON may store single-item lists as strings) + if p["original_is_list"] and isinstance(p["value"], str): + p["value"] = p["value"].split("\n") if p["value"] else [] # Split into subsections if required param_sections = {} section_descriptions = {} if display_subsections: for p in params: - # Skip adavnaced parameters if not selected - if not st.session_state["advanced"] and p["advanced"]: + # Skip advanced/non-included parameters if toggle not enabled + if not st.session_state["advanced"] and (p["advanced"] or p["non_included"]): continue # Add section description to section_descriptions dictionary if it exists if p["section_description"]: @@ -667,7 +770,11 @@ def input_TOPP( param_sections[p["sections"]] = [p] else: # Simply put all parameters in "all" section if no subsections required - param_sections["all"] = params + # Filter advanced/non-included parameters if toggle not enabled + param_sections["all"] = [ + p for p in params + if st.session_state["advanced"] or (not p["advanced"] and not p["non_included"]) + ] # Display tool name if required if display_tool_name: @@ -759,19 +866,38 @@ def display_TOPP_params(params: dict, num_cols): v.decode() if isinstance(v, bytes) else v for v in p["value"] ] - valid_entries_info = '' + + # Use multiselect when valid_strings are available for better UX if len(p['valid_strings']) > 0: - valid_entries_info = ( - " Valid entries are: " - + ', '.join(sorted(p['valid_strings'])) + # Filter current values to only include valid options + current_values = [v for v in p["value"] if v in p['valid_strings']] + + # Use a display key for multiselect (stores list), sync to main key (stores string) + display_key = f"{key}_display" + + def on_multiselect_change(dk=display_key, tk=key): + st.session_state[tk] = "\n".join(st.session_state[dk]) + + cols[i].multiselect( + name, + options=sorted(p['valid_strings']), + default=current_values, + help=p["description"], + key=display_key, + on_change=on_multiselect_change, ) - cols[i].text_area( - name, - value="\n".join([str(val) for val in p["value"]]), - help=p["description"] + " Separate entries using the \"Enter\" key." + valid_entries_info, - key=key, - ) + # Ensure main key has string value for ParameterManager + if key not in st.session_state: + st.session_state[key] = "\n".join(current_values) + else: + # Fall back to text_area for freeform list input + cols[i].text_area( + name, + value="\n".join([str(val) for val in p["value"]]), + help=p["description"] + " Separate entries using the \"Enter\" key.", + key=key, + ) # increment number of columns, create new cols object if end of line is reached i += 1 @@ -938,6 +1064,45 @@ def zip_and_download_files(self, directory: str): use_container_width=True, ) + def preset_buttons(self, num_cols: int = 4) -> None: + """ + Renders a grid of preset buttons for the current workflow. + + When a preset button is clicked, the preset parameters are applied to the + session state and saved to params.json, then the page is reloaded. + + Args: + num_cols: Number of columns for the button grid. Defaults to 4. + """ + preset_names = self.parameter_manager.get_preset_names() + if not preset_names: + return + + st.markdown("---") + st.markdown("**Parameter Presets**") + st.caption("Click a preset to apply optimized parameters") + + # Create button grid + cols = st.columns(num_cols) + for i, preset_name in enumerate(preset_names): + col_idx = i % num_cols + description = self.parameter_manager.get_preset_description(preset_name) + with cols[col_idx]: + if st.button( + preset_name, + key=f"preset_{preset_name}", + help=description if description else None, + use_container_width=True, + ): + if self.parameter_manager.apply_preset(preset_name): + st.toast(f"Applied preset: {preset_name}") + st.rerun() + else: + st.error(f"Failed to apply preset: {preset_name}") + # Start new row if needed + if col_idx == num_cols - 1 and i < len(preset_names) - 1: + cols = st.columns(num_cols) + def file_upload_section(self, custom_upload_function) -> None: custom_upload_function() c1, _ = st.columns(2) @@ -947,6 +1112,22 @@ def file_upload_section(self, custom_upload_function) -> None: def parameter_section(self, custom_parameter_function) -> None: st.toggle("Show advanced parameters", value=False, key="advanced") + # Display threads configuration for local mode only + if not st.session_state.settings.get("online_deployment", False): + max_threads_config = st.session_state.settings.get("max_threads", {}) + default_threads = max_threads_config.get("local", 4) + self.input_widget( + key="max_threads", + default=default_threads, + name="Threads", + widget_type="number", + min_value=1, + help="Maximum threads for parallel processing. Threads are distributed between parallel commands and per-tool thread allocation." + ) + + # Display preset buttons if presets are available for this workflow + self.preset_buttons() + custom_parameter_function() # File Import / Export section @@ -955,11 +1136,13 @@ def parameter_section(self, custom_parameter_function) -> None: with cols[0]: if st.button( "⚠️ Load default parameters", - help="Reset paramter section to default.", + help="Reset parameter section to default.", use_container_width=True, ): self.parameter_manager.reset_to_default_parameters() - streamlit_js_eval(js_expressions="parent.window.location.reload()") + self.parameter_manager.clear_parameter_session_state() + st.toast("Parameters reset to defaults") + st.rerun() with cols[1]: if self.parameter_manager.params_file.exists(): with open(self.parameter_manager.params_file, "rb") as f: @@ -983,14 +1166,23 @@ def parameter_section(self, custom_parameter_function) -> None: with cols[2]: up = st.file_uploader( - "⬆️ Import parameters", help="Reset parameter section to default." + "⬆️ Import parameters", + help="Import previously exported parameters.", + key="param_import_uploader" ) if up is not None: with open(self.parameter_manager.params_file, "w") as f: f.write(up.read().decode("utf-8")) - streamlit_js_eval(js_expressions="parent.window.location.reload()") + self.parameter_manager.clear_parameter_session_state() + st.toast("Parameters imported") + st.rerun() - def execution_section(self, start_workflow_function) -> None: + def execution_section( + self, + start_workflow_function, + get_status_function=None, + stop_workflow_function=None + ) -> None: with st.expander("**Summary**"): st.markdown(self.export_parameters_markdown()) @@ -999,26 +1191,48 @@ def execution_section(self, start_workflow_function) -> None: log_level = c1.selectbox( "log details", ["minimal", "commands and run times", "all"], key="log_level" ) - + # Real-time display options if "log_lines_count" not in st.session_state: st.session_state.log_lines_count = 100 - + log_lines_count = c2.selectbox( "lines to show", [50, 100, 200, 500, "all"], index=1, key="log_lines_select" ) if log_lines_count != "all": st.session_state.log_lines_count = log_lines_count - - pid_exists = self.executor.pid_dir.exists() + + # Get workflow status (supports both queue and local modes) + status = {} + if get_status_function: + status = get_status_function() + + # Determine if workflow is running + is_running = status.get("running", False) + job_status = status.get("status", "idle") + + # Fallback to PID check for backward compatibility + pid_exists = self.executor.pid_dir.exists() and list(self.executor.pid_dir.iterdir()) + if not is_running and pid_exists: + is_running = True + job_status = "running" + log_path = Path(self.workflow_dir, "logs", log_level.replace(" ", "-") + ".log") log_path_complete = Path(self.workflow_dir, "logs", 'all' + ".log") log_exists = log_path.exists() and log_path_complete.exists() - if pid_exists: + # Show queue status if available (online mode) + if status.get("job_id"): + self._show_queue_status(status) + + # Control buttons + if is_running: if c1.button("Stop Workflow", type="primary", use_container_width=True): - self.executor.stop() + if stop_workflow_function: + stop_workflow_function() + else: + self.executor.stop() st.rerun() elif c1.button("Start Workflow", type="primary", use_container_width=True): start_workflow_function() @@ -1027,16 +1241,23 @@ def execution_section(self, start_workflow_function) -> None: st.rerun() error_box = st.empty() - with st.status("", expanded=True) as status: - - if pid_exists: - status.update( - label="**Workflow running...**", state='running', + with st.status("", expanded=True) as status_widget: + + if is_running: + # Real-time display during execution + run_label = "**Workflow running...**" + if job_status == "queued": + pos = status.get("queue_position", "?") + run_label = f"**Waiting in queue (position {pos})...**" + + status_widget.update( + label=run_label, state='running', expanded=True ) if log_exists: + # Parse progress from complete log percentage = -1 - label = None + progress_label = None with open(log_path_complete, "r", encoding="utf-8") as f: for line in reversed(f.readlines()): line = line.strip() @@ -1045,13 +1266,12 @@ def execution_section(self, start_workflow_function) -> None: percentage = float(match.group(1))/100 match = re.search(r"Progress of\s+'([^']+)'", line) if match: - label = match.group(1) - break + progress_label = match.group(1) + break elif "Process finished:" in line: break if 0 <= percentage <= 1: - st.progress(percentage, text=label) - + st.progress(percentage, text=progress_label) with open(log_path, "r", encoding="utf-8") as f: lines = f.readlines() @@ -1068,8 +1288,8 @@ def execution_section(self, start_workflow_function) -> None: time.sleep(1) st.rerun() - elif log_exists and not pid_exists: - status.update(state='complete', expanded=True) + elif log_exists: + status_widget.update(state='complete', expanded=True) # Static display after completion st.markdown( f"**Workflow log file: {datetime.fromtimestamp(log_path.stat().st_ctime).strftime('%Y-%m-%d %H:%M')} CET**" @@ -1077,20 +1297,70 @@ def execution_section(self, start_workflow_function) -> None: with open(log_path, "r", encoding="utf-8") as f: content = f.read() # Check if workflow finished successfully - if not "WORKFLOW FINISHED" in content: - status.update( - label='Workflow completed.', state='error', + if "WORKFLOW FINISHED" not in content: + status_widget.update( + label='Workflow completed.', state='error', expanded=True ) error_box.error("**Errors occurred, check log file.**") else: - status.update( + status_widget.update( label='Workflow completed.', state='complete', expanded=True ) code_box = st.container(key='log') code_box.code(content, language="neon", line_numbers=False) + def _show_queue_status(self, status: dict) -> None: + """Display queue job status for online mode""" + job_status = status.get("status", "unknown") + + # Status icons + status_display = { + "queued": ("Queued", "info"), + "started": ("Running", "info"), + "finished": ("Completed", "success"), + "failed": ("Failed", "error"), + "canceled": ("Cancelled", "warning"), + } + + label, msg_type = status_display.get(job_status, ("Unknown", "info")) + + # Queue-specific information + if job_status == "queued": + queue_position = status.get("queue_position", "?") + queue_length = status.get("queue_length", "?") + st.info(f"**Status: {label}** - Your workflow is #{queue_position} in the queue ({queue_length} total jobs)") + + elif job_status == "started": + current_step = status.get("current_step", "Processing...") + st.info(f"**Status: {label}** - {current_step}") + + elif job_status == "finished": + # Check if the job result indicates success or failure + job_result = status.get("result") + if job_result and isinstance(job_result, dict) and job_result.get("success") is False: + st.error("**Status: Completed with errors**") + error_msg = job_result.get("error", "Unknown error") + if error_msg: + with st.expander("Error Details", expanded=True): + st.code(error_msg) + else: + st.success(f"**Status: {label}**") + + elif job_status == "failed": + st.error(f"**Status: {label}**") + job_error = status.get("error") + if job_error: + with st.expander("Error Details", expanded=True): + st.code(job_error) + + # Expandable job details + with st.expander("Job Details", expanded=False): + st.code(f"""Job ID: {status.get('job_id', 'N/A')} +Submitted: {status.get('enqueued_at', 'N/A')} +Started: {status.get('started_at', 'N/A')}""") + def results_section(self, custom_results_function) -> None: custom_results_function() diff --git a/src/workflow/WorkflowManager.py b/src/workflow/WorkflowManager.py index abca391f..1a3109c8 100644 --- a/src/workflow/WorkflowManager.py +++ b/src/workflow/WorkflowManager.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Optional from .Logger import Logger from .ParameterManager import ParameterManager from .CommandExecutor import CommandExecutor @@ -6,7 +7,9 @@ from .FileManager import FileManager import multiprocessing import shutil +import time import traceback +import streamlit as st class WorkflowManager: # Core workflow logic using the above classes @@ -21,18 +24,72 @@ def __init__(self, name: str, workspace: str, share_cache: bool = False): self.file_manager = FileManager(self.workflow_dir, cache_path) self.logger = Logger(self.workflow_dir) - self.parameter_manager = ParameterManager(self.workflow_dir) + self.parameter_manager = ParameterManager(self.workflow_dir, workflow_name=name) self.executor = CommandExecutor(self.workflow_dir, self.logger, self.parameter_manager) self.ui = StreamlitUI(self.workflow_dir, self.logger, self.executor, self.parameter_manager) self.params = self.parameter_manager.get_parameters_from_json() + # Initialize queue manager for online mode + self._queue_manager: Optional['QueueManager'] = None + if self._is_online_mode(): + self._init_queue_manager() + + def _is_online_mode(self) -> bool: + """Check if running in online deployment mode""" + return st.session_state.get("settings", {}).get("online_deployment", False) + + def _init_queue_manager(self) -> None: + """Initialize queue manager for online mode""" + try: + from .QueueManager import QueueManager + self._queue_manager = QueueManager() + except ImportError: + pass # Queue not available, will use fallback + def start_workflow(self) -> None: """ - Starts the workflow process and adds its process id to the pid directory. - The workflow itself needs to be a process, otherwise streamlit will wait for everything to finish before updating the UI again. + Starts the workflow process. + + Online mode: Submits to Redis queue + Local mode: Spawns multiprocessing.Process (existing behavior) """ + if self._queue_manager and self._queue_manager.is_available: + self._start_workflow_queued() + else: + self._start_workflow_local() + + def _start_workflow_queued(self) -> None: + """Submit workflow to Redis queue (online mode)""" + from .tasks import execute_workflow + + # Generate unique job ID based on workflow directory + job_id = f"workflow-{self.workflow_dir.name}-{int(time.time())}" + + # Submit job to queue + submitted_id = self._queue_manager.submit_job( + func=execute_workflow, + kwargs={ + "workflow_dir": str(self.workflow_dir), + "workflow_class": self.__class__.__name__, + "workflow_module": self.__class__.__module__, + }, + job_id=job_id, + timeout=7200, # 2 hour timeout + description=f"Workflow: {self.name}" + ) + + if submitted_id: + # Store job ID for status checking + self._queue_manager.store_job_id(self.workflow_dir, submitted_id) + else: + # Fallback to local execution if queue submission fails + st.warning("Queue submission failed, running locally...") + self._start_workflow_local() + + def _start_workflow_local(self) -> None: + """Start workflow as local process (existing behavior for local mode)""" # Catch double presses of the button while app is in frozen state - if self.executor.pid_dir.exists(): + if self.executor.pid_dir.exists(): return # Delete the log file if it already exists @@ -54,14 +111,118 @@ def workflow_process(self) -> None: if results_dir.exists(): shutil.rmtree(results_dir) results_dir.mkdir(parents=True) - self.execution() - self.logger.log("WORKFLOW FINISHED") + success = self.execution() + if success: + self.logger.log("WORKFLOW FINISHED") except Exception as e: self.logger.log(f"ERROR: {e}") self.logger.log("".join(traceback.format_exception(e))) # Delete pid dir path to indicate workflow is done shutil.rmtree(self.executor.pid_dir, ignore_errors=True) + def get_workflow_status(self) -> dict: + """ + Get current workflow execution status. + + Returns: + Dictionary with status information including: + - running: bool indicating if workflow is running + - status: string status (queued, started, finished, failed, idle) + - progress: float 0-1 for queue jobs, None for local + - current_step: string description of current step + - job_id: job ID for queue jobs, None for local + - queue_position: position in queue (1-indexed), None if not queued + - queue_length: total jobs in queue, None if not queued + """ + # Check queue status first (online mode) + if self._queue_manager and self._queue_manager.is_available: + job_id = self._queue_manager.load_job_id(self.workflow_dir) + if job_id: + job_info = self._queue_manager.get_job_info(job_id) + if job_info: + is_running = job_info.status.value in ["queued", "started"] + return { + "running": is_running, + "status": job_info.status.value, + "progress": job_info.progress, + "current_step": job_info.current_step, + "job_id": job_id, + "queue_position": job_info.queue_position, + "queue_length": job_info.queue_length, + "enqueued_at": job_info.enqueued_at, + "started_at": job_info.started_at, + "result": job_info.result, + "error": job_info.error, + } + else: + # Job not found, clear the stored job ID + self._queue_manager.clear_job_id(self.workflow_dir) + + # Fallback: check PID files (local mode) + pid_dir = self.executor.pid_dir + if pid_dir.exists() and list(pid_dir.iterdir()): + return { + "running": True, + "status": "running", + "progress": None, + "current_step": None, + "job_id": None, + "queue_position": None, + "queue_length": None, + } + + return { + "running": False, + "status": "idle", + "progress": None, + "current_step": None, + "job_id": None, + "queue_position": None, + "queue_length": None, + } + + def stop_workflow(self) -> bool: + """ + Stop a running workflow. + + Returns: + True if successfully stopped + """ + # Try to cancel queue job first (online mode) + if self._queue_manager and self._queue_manager.is_available: + job_id = self._queue_manager.load_job_id(self.workflow_dir) + if job_id: + success = self._queue_manager.cancel_job(job_id) + if success: + self._queue_manager.clear_job_id(self.workflow_dir) + return True + + # Fallback: stop local process + return self._stop_local_workflow() + + def _stop_local_workflow(self) -> bool: + """Stop locally running workflow process""" + import os + import signal + + pid_dir = self.executor.pid_dir + if not pid_dir.exists(): + return False + + stopped = False + for pid_file in pid_dir.iterdir(): + try: + pid = int(pid_file.name) + os.kill(pid, signal.SIGTERM) + pid_file.unlink() + stopped = True + except (ValueError, ProcessLookupError, PermissionError): + pid_file.unlink() # Clean up stale PID file + + # Clean up the pid directory + shutil.rmtree(pid_dir, ignore_errors=True) + return stopped + def show_file_upload_section(self) -> None: """ Shows the file upload section of the UI with content defined in self.upload(). @@ -78,7 +239,11 @@ def show_execution_section(self) -> None: """ Shows the execution section of the UI with content defined in self.execution(). """ - self.ui.execution_section(self.start_workflow) + self.ui.execution_section( + start_workflow_function=self.start_workflow, + get_status_function=self.get_workflow_status, + stop_workflow_function=self.stop_workflow + ) def show_results_section(self) -> None: """ @@ -104,14 +269,15 @@ def configure(self) -> None: ################################### pass - def execution(self) -> None: + def execution(self) -> bool: """ - Add your workflow steps here + Add your workflow steps here. + Returns True on success, False on error. """ ################################### # Add your workflow steps here ################################### - pass + return True def results(self) -> None: """ diff --git a/src/workflow/health.py b/src/workflow/health.py new file mode 100644 index 00000000..6473f9e7 --- /dev/null +++ b/src/workflow/health.py @@ -0,0 +1,129 @@ +""" +Health check utilities for Redis queue monitoring. + +Provides functions to check Redis and worker health status +for display in the sidebar metrics. +""" + +import os + + +def check_redis_health() -> dict: + """ + Check Redis connection health. + + Returns: + Dictionary with health status and metrics + """ + redis_url = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + + try: + from redis import Redis + + redis = Redis.from_url(redis_url) + redis.ping() + info = redis.info() + + return { + "status": "healthy", + "connected_clients": info.get("connected_clients", 0), + "used_memory": info.get("used_memory_human", "unknown"), + "uptime_days": info.get("uptime_in_days", 0), + } + except ImportError: + return { + "status": "unavailable", + "error": "redis package not installed", + } + except Exception as e: + return { + "status": "unhealthy", + "error": str(e), + } + + +def check_worker_health() -> dict: + """ + Check RQ worker health. + + Returns: + Dictionary with worker status and metrics + """ + redis_url = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + + try: + from redis import Redis + from rq import Worker, Queue + + redis = Redis.from_url(redis_url) + queue = Queue("openms-workflows", connection=redis) + workers = Worker.all(connection=redis) + + busy_workers = [w for w in workers if w.get_state() == "busy"] + idle_workers = [w for w in workers if w.get_state() == "idle"] + + return { + "status": "healthy", + "worker_count": len(workers), + "busy_workers": len(busy_workers), + "idle_workers": len(idle_workers), + "queue_length": len(queue), + "workers": [ + { + "name": w.name, + "state": w.get_state(), + "current_job": w.get_current_job_id(), + } + for w in workers + ] + } + except ImportError: + return { + "status": "unavailable", + "error": "rq package not installed", + } + except Exception as e: + return { + "status": "unhealthy", + "error": str(e), + } + + +def get_queue_metrics() -> dict: + """ + Get comprehensive queue metrics for sidebar display. + + Returns: + Dictionary with all queue metrics or empty dict if unavailable + """ + # Only attempt if REDIS_URL is set (online mode) + if not os.environ.get("REDIS_URL"): + return {} + + try: + from redis import Redis + from rq import Worker, Queue + + redis_url = os.environ.get("REDIS_URL", "redis://localhost:6379/0") + redis = Redis.from_url(redis_url) + + # Test connection + redis.ping() + + queue = Queue("openms-workflows", connection=redis) + workers = Worker.all(connection=redis) + + busy_count = len([w for w in workers if w.get_state() == "busy"]) + + return { + "available": True, + "total_workers": len(workers), + "busy_workers": busy_count, + "idle_workers": len(workers) - busy_count, + "queued_jobs": len(queue), + "started_jobs": len(queue.started_job_registry), + "finished_jobs": len(queue.finished_job_registry), + "failed_jobs": len(queue.failed_job_registry), + } + except Exception: + return {"available": False} diff --git a/src/workflow/tasks.py b/src/workflow/tasks.py new file mode 100644 index 00000000..7c283bc6 --- /dev/null +++ b/src/workflow/tasks.py @@ -0,0 +1,159 @@ +""" +Worker tasks for Redis Queue execution. + +These functions are executed by RQ workers and should not import Streamlit. +This module must be importable without Streamlit being available. +""" + +import sys +import json +import shutil +import traceback +from pathlib import Path + + +def execute_workflow( + workflow_dir: str, + workflow_class: str, + workflow_module: str, +) -> dict: + """ + Execute a workflow in the worker process. + + This function is called by the RQ worker to execute a workflow. + It reconstructs the workflow object and calls its execution() method. + + Args: + workflow_dir: Path to the workflow directory + workflow_class: Name of the Workflow class + workflow_module: Module path containing the Workflow class + + Returns: + Dictionary with execution results + """ + try: + from rq import get_current_job + job = get_current_job() + except Exception: + job = None + + workflow_path = Path(workflow_dir) + + try: + # Update progress + _update_progress(job, 0.0, "Initializing workflow...") + + # Import required modules + from src.workflow.CommandExecutor import CommandExecutor + from src.workflow.FileManager import FileManager + from src.workflow.ParameterManager import ParameterManager + from src.workflow.Logger import Logger + + # Load the workflow class dynamically + import importlib + module = importlib.import_module(workflow_module) + WorkflowClass = getattr(module, workflow_class) + + _update_progress(job, 0.05, "Loading parameters...") + + # Delete the log file if it already exists + shutil.rmtree(Path(workflow_path, "logs"), ignore_errors=True) + + # Load parameters from saved params.json + params_file = workflow_path / "params.json" + if params_file.exists(): + with open(params_file, "r") as f: + params = json.load(f) + else: + params = {} + + # Initialize workflow components + logger = Logger(workflow_path) + file_manager = FileManager(workflow_path) + parameter_manager = ParameterManager(workflow_path) + executor = CommandExecutor(workflow_path, logger, parameter_manager) + executor.pid_dir.mkdir(parents=True, exist_ok=True) + + _update_progress(job, 0.1, "Starting workflow execution...") + + # Create workflow instance + # We need to bypass the normal __init__ which requires Streamlit + workflow = object.__new__(WorkflowClass) + workflow.name = workflow_path.name + workflow.workflow_dir = workflow_path + workflow.file_manager = file_manager + workflow.logger = logger + workflow.parameter_manager = parameter_manager + workflow.executor = executor + workflow.params = params + + # Store job reference for progress updates + workflow._rq_job = job + + # Clear results directory + results_dir = workflow_path / "results" + if results_dir.exists(): + shutil.rmtree(results_dir) + results_dir.mkdir(parents=True) + + # Log workflow start + logger.log("STARTING WORKFLOW") + + _update_progress(job, 0.15, "Executing workflow steps...") + + # Execute the workflow + workflow.execution() + + # Log workflow completion + logger.log("WORKFLOW FINISHED") + + _update_progress(job, 1.0, "Workflow completed") + + # Clean up pid directory (in case it was created by accident) + shutil.rmtree(executor.pid_dir, ignore_errors=True) + + return { + "success": True, + "workflow_dir": str(workflow_path), + "message": "Workflow completed successfully" + } + + except Exception as e: + error_msg = f"Workflow failed: {str(e)}\n{traceback.format_exc()}" + + # Log error to workflow logs + try: + log_dir = workflow_path / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + + for log_name in ["minimal.log", "commands-and-run-times.log", "all.log"]: + log_file = log_dir / log_name + with open(log_file, "a") as f: + f.write(f"\n\nERROR: {str(e)}\n") + f.write(traceback.format_exc()) + except Exception: + pass + + # Clean up pid directory + try: + pid_dir = workflow_path / "pids" + shutil.rmtree(pid_dir, ignore_errors=True) + except Exception: + pass + + return { + "success": False, + "workflow_dir": str(workflow_path), + "error": error_msg + } + + +def _update_progress(job, progress: float, step: str) -> None: + """Update job progress metadata""" + if job is not None: + try: + job.meta["progress"] = progress + job.meta["current_step"] = step + job.save_meta() + except Exception: + pass # Ignore errors updating progress diff --git a/tests/test_parameter_presets.py b/tests/test_parameter_presets.py new file mode 100644 index 00000000..5104abc3 --- /dev/null +++ b/tests/test_parameter_presets.py @@ -0,0 +1,390 @@ +""" +Tests for the parameter presets functionality. + +This module verifies that the preset system correctly loads preset definitions, +retrieves preset names and descriptions, and applies presets to session state. +""" +import os +import sys +import json +import pytest +import tempfile +from pathlib import Path +from unittest.mock import patch, MagicMock + +# Add project root to path for imports +PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(PROJECT_ROOT) + +# Create mock for streamlit before importing ParameterManager +mock_streamlit = MagicMock() +mock_streamlit.session_state = {} +sys.modules['streamlit'] = mock_streamlit + +# Create mock for pyopenms +mock_pyopenms = MagicMock() +mock_pyopenms.__version__ = "2.9.1" +sys.modules['pyopenms'] = mock_pyopenms + +# Now import after mocks are set up +from src.workflow.ParameterManager import ParameterManager + + +@pytest.fixture +def temp_workflow_dir(): + """Create a temporary workflow directory for testing.""" + with tempfile.TemporaryDirectory() as tmpdir: + workflow_dir = Path(tmpdir) / "test-workflow" + workflow_dir.mkdir() + ini_dir = workflow_dir / "ini" + ini_dir.mkdir() + yield workflow_dir + + +@pytest.fixture +def sample_presets(): + """Sample presets data for testing.""" + return { + "test-workflow": { + "Preset A": { + "_description": "Description for Preset A", + "ToolA": { + "param1": 10.0, + "param2": "value2" + }, + "ToolB": { + "param3": 5 + } + }, + "Preset B": { + "_description": "Description for Preset B", + "ToolA": { + "param1": 20.0 + }, + "_general": { + "general_param": "general_value" + } + }, + "Preset No Description": { + "ToolA": { + "param1": 30.0 + } + } + }, + "other-workflow": { + "Other Preset": { + "_description": "This belongs to another workflow" + } + } + } + + +@pytest.fixture(autouse=True) +def reset_streamlit_state(): + """Reset mock streamlit session state before each test.""" + mock_streamlit.session_state.clear() + yield + + +@pytest.fixture +def temp_cwd(): + """Change to a temporary directory for tests that create files. + + This prevents tests from affecting the actual project root's presets.json. + """ + original_cwd = os.getcwd() + with tempfile.TemporaryDirectory() as tmpdir: + os.chdir(tmpdir) + yield tmpdir + os.chdir(original_cwd) + + +class TestParameterManagerPresets: + """Tests for ParameterManager preset methods.""" + + def test_load_presets_returns_empty_when_file_missing(self, temp_workflow_dir, temp_cwd): + """Test that load_presets returns empty dict when presets.json doesn't exist.""" + pm = ParameterManager(temp_workflow_dir) + presets = pm.load_presets() + assert presets == {} + + def test_load_presets_returns_workflow_presets(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that load_presets returns presets for the correct workflow.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + presets = pm.load_presets() + + assert "Preset A" in presets + assert "Preset B" in presets + assert "Other Preset" not in presets # From different workflow + + def test_load_presets_handles_invalid_json(self, temp_workflow_dir, temp_cwd): + """Test that load_presets handles malformed JSON gracefully.""" + with open("presets.json", "w") as f: + f.write("{ invalid json }") + + pm = ParameterManager(temp_workflow_dir) + presets = pm.load_presets() + + assert presets == {} + + def test_get_preset_names(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that get_preset_names returns correct list.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + names = pm.get_preset_names() + + assert "Preset A" in names + assert "Preset B" in names + assert "Preset No Description" in names + assert "_description" not in names # Special keys excluded + + def test_get_preset_names_empty_when_no_presets(self, temp_workflow_dir, temp_cwd): + """Test that get_preset_names returns empty list when no presets exist.""" + pm = ParameterManager(temp_workflow_dir) + names = pm.get_preset_names() + assert names == [] + + def test_get_preset_description(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that get_preset_description returns correct description.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + + desc_a = pm.get_preset_description("Preset A") + assert desc_a == "Description for Preset A" + + desc_b = pm.get_preset_description("Preset B") + assert desc_b == "Description for Preset B" + + def test_get_preset_description_empty_when_missing(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that get_preset_description returns empty string when no description.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + desc = pm.get_preset_description("Preset No Description") + + assert desc == "" + + def test_get_preset_description_empty_for_nonexistent_preset(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that get_preset_description returns empty string for nonexistent preset.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + desc = pm.get_preset_description("Nonexistent Preset") + + assert desc == "" + + def test_apply_preset_deletes_session_state_keys(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that apply_preset deletes session_state keys instead of setting them.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + + # Pre-populate session_state with old values + mock_streamlit.session_state[f"{pm.topp_param_prefix}ToolA:1:param1"] = 999.0 + mock_streamlit.session_state[f"{pm.topp_param_prefix}ToolA:1:param2"] = "old_value" + mock_streamlit.session_state[f"{pm.topp_param_prefix}ToolB:1:param3"] = 999 + + result = pm.apply_preset("Preset A") + + assert result is True + + # Keys should be DELETED (not set to new values) so widgets re-initialize fresh + assert f"{pm.topp_param_prefix}ToolA:1:param1" not in mock_streamlit.session_state + assert f"{pm.topp_param_prefix}ToolA:1:param2" not in mock_streamlit.session_state + assert f"{pm.topp_param_prefix}ToolB:1:param3" not in mock_streamlit.session_state + + def test_apply_preset_deletes_general_param_keys(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that apply_preset deletes _general parameter keys from session_state.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + + # Pre-populate session_state with old value + mock_streamlit.session_state[f"{pm.param_prefix}general_param"] = "old_value" + + result = pm.apply_preset("Preset B") + + assert result is True + + # Key should be DELETED so widget re-initializes fresh + assert f"{pm.param_prefix}general_param" not in mock_streamlit.session_state + + def test_apply_preset_saves_to_params_file(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that apply_preset saves parameters to params.json.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + pm.apply_preset("Preset A") + + # Check params.json was created with correct content + assert pm.params_file.exists() + + with open(pm.params_file, "r") as f: + saved_params = json.load(f) + + assert "ToolA" in saved_params + assert saved_params["ToolA"]["param1"] == 10.0 + assert saved_params["ToolA"]["param2"] == "value2" + + def test_apply_preset_returns_false_for_nonexistent(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that apply_preset returns False for nonexistent preset.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + result = pm.apply_preset("Nonexistent Preset") + + assert result is False + + def test_apply_preset_preserves_existing_params(self, temp_workflow_dir, sample_presets, temp_cwd): + """Test that apply_preset preserves existing parameters not in the preset.""" + with open("presets.json", "w") as f: + json.dump(sample_presets, f) + + pm = ParameterManager(temp_workflow_dir) + + # Create existing params + existing_params = { + "existing_param": "existing_value", + "ToolA": { + "existing_tool_param": "value" + } + } + with open(pm.params_file, "w") as f: + json.dump(existing_params, f) + + pm.apply_preset("Preset A") + + with open(pm.params_file, "r") as f: + saved_params = json.load(f) + + # Existing params should be preserved + assert saved_params["existing_param"] == "existing_value" + # New params from preset should be added + assert saved_params["ToolA"]["param1"] == 10.0 + + def test_clear_parameter_session_state(self, temp_workflow_dir): + """Test that clear_parameter_session_state removes all parameter keys.""" + pm = ParameterManager(temp_workflow_dir) + + # Add various keys to session_state + mock_streamlit.session_state[f"{pm.param_prefix}param1"] = "value1" + mock_streamlit.session_state[f"{pm.param_prefix}param2"] = "value2" + mock_streamlit.session_state[f"{pm.topp_param_prefix}Tool:1:param"] = 10.0 + mock_streamlit.session_state["unrelated_key"] = "should_remain" + + pm.clear_parameter_session_state() + + # Parameter keys should be deleted + assert f"{pm.param_prefix}param1" not in mock_streamlit.session_state + assert f"{pm.param_prefix}param2" not in mock_streamlit.session_state + assert f"{pm.topp_param_prefix}Tool:1:param" not in mock_streamlit.session_state + + # Unrelated keys should remain + assert mock_streamlit.session_state["unrelated_key"] == "should_remain" + + def test_clear_parameter_session_state_empty(self, temp_workflow_dir): + """Test that clear_parameter_session_state handles empty session_state.""" + pm = ParameterManager(temp_workflow_dir) + + # Should not raise even with no matching keys + pm.clear_parameter_session_state() + + +class TestWorkflowNameParameter: + """Tests for the workflow_name parameter in ParameterManager.""" + + def test_workflow_name_defaults_to_directory_stem(self, temp_workflow_dir): + """Test that workflow_name defaults to the directory stem when not provided.""" + pm = ParameterManager(temp_workflow_dir) + assert pm.workflow_name == "test-workflow" + + def test_workflow_name_can_be_explicitly_set(self, temp_workflow_dir): + """Test that workflow_name can be explicitly set.""" + pm = ParameterManager(temp_workflow_dir, workflow_name="My Custom Workflow") + assert pm.workflow_name == "My Custom Workflow" + + def test_load_presets_normalizes_workflow_name(self, temp_workflow_dir, temp_cwd): + """Test that load_presets normalizes the workflow name for lookup.""" + # Create presets with normalized key + presets = { + "my-custom-workflow": { + "Test Preset": { + "_description": "A test preset" + } + } + } + with open("presets.json", "w") as f: + json.dump(presets, f) + + # Pass workflow name with spaces and mixed case + pm = ParameterManager(temp_workflow_dir, workflow_name="My Custom Workflow") + preset_names = pm.get_preset_names() + + assert "Test Preset" in preset_names + + def test_load_presets_with_original_name_format(self, temp_workflow_dir, temp_cwd): + """Test that presets can use the same format as WorkflowManager normalization.""" + # This simulates how WorkflowManager passes the name + presets = { + "topp-workflow": { + "Preset X": {"_description": "Test"} + } + } + with open("presets.json", "w") as f: + json.dump(presets, f) + + # Simulating WorkflowManager passing "TOPP Workflow" + pm = ParameterManager(temp_workflow_dir, workflow_name="TOPP Workflow") + preset_names = pm.get_preset_names() + + assert "Preset X" in preset_names + + +class TestPresetsJsonFormat: + """Tests for the presets.json format validation.""" + + def test_presets_json_structure(self): + """Test that the actual presets.json file has valid structure.""" + # Look for presets.json in the project root + presets_path = Path(PROJECT_ROOT) / "presets.json" + + if not presets_path.exists(): + pytest.skip("presets.json not found in project root") + + with open(presets_path, "r") as f: + presets = json.load(f) + + assert isinstance(presets, dict) + + for workflow_name, workflow_presets in presets.items(): + assert isinstance(workflow_name, str) + assert isinstance(workflow_presets, dict) + + for preset_name, preset_config in workflow_presets.items(): + assert isinstance(preset_name, str) + assert isinstance(preset_config, dict) + + for key, value in preset_config.items(): + # Keys should be strings + assert isinstance(key, str) + # Values should be either strings (description), dicts (tool params), or primitives + if key == "_description": + assert isinstance(value, str) + elif key == "_general" or not key.startswith("_"): + if isinstance(value, dict): + # Tool parameters dict + for param_name, param_value in value.items(): + assert isinstance(param_name, str)