From 71874c8158a1247c04d128b28ae3a80ea5bf94a2 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Wed, 4 Mar 2026 13:39:48 -0600 Subject: [PATCH 1/3] Tracking tasks per each new collection --- specifyweb/backend/accounts/views.py | 17 +--- specifyweb/backend/context/views.py | 10 +-- specifyweb/backend/setup_tool/api.py | 16 +++- specifyweb/backend/setup_tool/redis.py | 2 + specifyweb/backend/setup_tool/setup_tasks.py | 84 ++++++++++++++++++-- 5 files changed, 101 insertions(+), 28 deletions(-) diff --git a/specifyweb/backend/accounts/views.py b/specifyweb/backend/accounts/views.py index 6dc56aab016..0ef586ad614 100644 --- a/specifyweb/backend/accounts/views.py +++ b/specifyweb/backend/accounts/views.py @@ -1,5 +1,4 @@ import base64 -from datetime import timedelta import hashlib import hmac import json @@ -17,7 +16,7 @@ from django.db.models import Max from django.shortcuts import render from django.template.response import TemplateResponse -from django.utils import crypto, timezone +from django.utils import crypto from django.utils.http import url_has_allowed_host_and_scheme, urlencode from django.views.decorators.cache import never_cache from typing import cast @@ -295,8 +294,7 @@ def choose_collection(request) -> http.HttpResponse: id to the user if one is provided. """ from specifyweb.backend.context.views import set_collection_cookie, users_collections_for_sp7 - from specifyweb.backend.setup_tool.api import is_config_task_running - + from specifyweb.backend.setup_tool.api import get_collections_with_busy_config from specifyweb.specify.api.serializers import obj_to_data, toJson @@ -320,14 +318,8 @@ def choose_collection(request) -> http.HttpResponse: ) available_collections = users_collections_for_sp7(request.specify_user.id) - if is_config_task_running(): - # If config tasks are running, filter out newly created collections - fifteen_minutes_ago = timezone.now() - timedelta(minutes=15) - available_collections = [ - c - for c in available_collections - if c.timestampcreated is None or c.timestampcreated <= fifteen_minutes_ago - ] + busy_collection_ids = get_collections_with_busy_config(c.id for c in available_collections) + available_collections = [c for c in available_collections if c.id not in busy_collection_ids] if len(available_collections) == 1: set_collection_cookie(redirect_resp, available_collections[0].id) @@ -588,4 +580,3 @@ def set_admin_status(request, userid): else: user.clear_admin() return http.HttpResponse('false', content_type='text/plain') - diff --git a/specifyweb/backend/context/views.py b/specifyweb/backend/context/views.py index 373d599f3f6..77f29b19c8e 100644 --- a/specifyweb/backend/context/views.py +++ b/specifyweb/backend/context/views.py @@ -5,7 +5,6 @@ import json import os import re -from datetime import timedelta from typing import List from django.conf import settings @@ -38,7 +37,7 @@ from .remote_prefs import get_remote_prefs from .schema_localization import get_schema_languages, get_schema_localization from .viewsets import get_views -from specifyweb.backend.setup_tool.api import get_config_progress, is_config_task_running +from specifyweb.backend.setup_tool.api import get_collections_with_busy_config, get_config_progress def set_collection_cookie(response, collection_id): # pragma: no cover @@ -647,15 +646,14 @@ def get_server_time(request): def _filter_collections_not_ready_for_config_task(collections): - if not is_config_task_running(): + busy_collection_ids = get_collections_with_busy_config(c.id for c in collections) + if not busy_collection_ids: return collections - # If config tasks are running, filter out newly created collections. - fifteen_minutes_ago = timezone.now() - timedelta(minutes=15) return [ c for c in collections - if c.timestampcreated is None or c.timestampcreated <= fifteen_minutes_ago + if c.id not in busy_collection_ids ] diff --git a/specifyweb/backend/setup_tool/api.py b/specifyweb/backend/setup_tool/api.py index 05ad45c3b17..da61ea93500 100644 --- a/specifyweb/backend/setup_tool/api.py +++ b/specifyweb/backend/setup_tool/api.py @@ -22,6 +22,8 @@ from specifyweb.backend.setup_tool.prep_type_defaults import create_default_prep_types from specifyweb.backend.setup_tool.app_resource_defaults import ensure_discipline_resource_dir from specifyweb.backend.setup_tool.setup_tasks import ( + get_collections_with_busy_config as _get_collections_with_busy_config, + is_collection_config_busy, setup_database_background, get_active_setup_task, get_last_setup_error, @@ -343,7 +345,7 @@ def create_collection(data, run_fix_schema_config_async: bool = True): # Create picklists create_default_picklists(new_collection, discipline.type) if run_fix_schema_config_async: - queue_fix_schema_config_background() + queue_fix_schema_config_background(new_collection.id) else: fix_schema_config() @@ -518,6 +520,12 @@ def get_config_resource_progress(running_task_names: Optional[list[str]] = None) active_task_names = set(running_task_names or []) return _get_config_resource_progress_from_active_names(active_task_names) + +def get_collections_with_busy_config(collection_ids) -> set[int]: + """Returns collection ids that still have active tracked config tasks.""" + return _get_collections_with_busy_config(collection_ids) + + def get_config_progress(collection_id: Optional[int] = None) -> dict: """Returns a dict of the status of config/setup related background tasks""" try: @@ -525,7 +533,11 @@ def get_config_progress(collection_id: Optional[int] = None) -> dict: except MissingWorkerError: running_task_names = [] - busy = is_config_task_running(running_task_names) + busy = ( + is_collection_config_busy(collection_id) + if collection_id is not None + else is_config_task_running(running_task_names) + ) last_error = None completed_resources = get_config_resource_progress(running_task_names) diff --git a/specifyweb/backend/setup_tool/redis.py b/specifyweb/backend/setup_tool/redis.py index 1a70c241a9c..c53cdcaf4b5 100644 --- a/specifyweb/backend/setup_tool/redis.py +++ b/specifyweb/backend/setup_tool/redis.py @@ -2,5 +2,7 @@ # Also defined separately in setup_tool/apps.py ACTIVE_TASK_REDIS_KEY = "specify:{database}:setup:active_task_id" ACTIVE_TASK_TTL = 60*60*2 # setup should be less than 2 hours +# Keep track of config/setup celery task ids by collection. +COLLECTION_TASK_IDS_REDIS_KEY = "specify:{database}:setup:collection:{collection_id}:task_ids" # Keep track of last error. LAST_ERROR_REDIS_KEY = "specify:{database}:setup:last_error" \ No newline at end of file diff --git a/specifyweb/backend/setup_tool/setup_tasks.py b/specifyweb/backend/setup_tool/setup_tasks.py index bc6bf0dd1d9..3fdba62c87e 100644 --- a/specifyweb/backend/setup_tool/setup_tasks.py +++ b/specifyweb/backend/setup_tool/setup_tasks.py @@ -4,7 +4,7 @@ from django.db import transaction from specifyweb.celery_tasks import app -from typing import Tuple, Optional +from typing import Iterable, Optional, Tuple from celery.result import AsyncResult from specifyweb.backend.setup_tool import api from specifyweb.backend.setup_tool.app_resource_defaults import create_app_resource_defaults @@ -12,13 +12,73 @@ from specifyweb.specify.management.commands.run_key_migration_functions import fix_schema_config from specifyweb.specify.models_utils.model_extras import PALEO_DISCIPLINES, GEOLOGY_DISCIPLINES from specifyweb.celery_tasks import is_worker_alive, MissingWorkerError -from specifyweb.backend.redis_cache.store import set_string, get_string -from specifyweb.backend.setup_tool.redis import ACTIVE_TASK_REDIS_KEY, ACTIVE_TASK_TTL, LAST_ERROR_REDIS_KEY +from specifyweb.backend.redis_cache.store import ( + add_to_set, + delete_key, + get_string, + remove_from_set, + set_members, + set_string, +) +from specifyweb.backend.setup_tool.redis import ( + ACTIVE_TASK_REDIS_KEY, + ACTIVE_TASK_TTL, + COLLECTION_TASK_IDS_REDIS_KEY, + LAST_ERROR_REDIS_KEY, +) from uuid import uuid4 import logging logger = logging.getLogger(__name__) +ACTIVE_CELERY_STATES = frozenset(("PENDING", "RECEIVED", "STARTED", "RETRY", "PROGRESS")) + +def _collection_task_ids_key(collection_id: int) -> str: + return COLLECTION_TASK_IDS_REDIS_KEY.replace("{collection_id}", str(collection_id)) + +def _track_collection_task(collection_id: Optional[int], task_id: str) -> None: + if collection_id is None: + return + add_to_set(_collection_task_ids_key(collection_id), task_id) + +def _untrack_collection_task(collection_id: Optional[int], task_id: Optional[str]) -> None: + if collection_id is None or task_id is None: + return + key = _collection_task_ids_key(collection_id) + remove_from_set(key, task_id) + if not set_members(key): + delete_key(key) + +def _is_task_active(task_id: str) -> bool: + return app.AsyncResult(task_id).state in ACTIVE_CELERY_STATES + +def is_collection_config_busy(collection_id: Optional[int]) -> bool: + if collection_id is None: + return False + + key = _collection_task_ids_key(collection_id) + task_ids = set_members(key) + if not task_ids: + return False + + inactive_task_ids = [] + for task_id in task_ids: + if _is_task_active(task_id): + return True + inactive_task_ids.append(task_id) + + remove_from_set(key, *inactive_task_ids) + if not set_members(key): + delete_key(key) + return False + +def get_collections_with_busy_config(collection_ids: Iterable[int]) -> set[int]: + return { + collection_id + for collection_id in collection_ids + if is_collection_config_busy(collection_id) + } + def setup_database_background(data: dict) -> str: # Clear any previous error logs. set_last_setup_error(None) @@ -38,9 +98,15 @@ def setup_database_background(data: dict) -> str: return task.id -def queue_fix_schema_config_background() -> str: +def queue_fix_schema_config_background(collection_id: Optional[int] = None) -> str: """Queue fix_schema_config to run asynchronously and return the task id""" - task = fix_schema_config_task.apply_async() + task_id = str(uuid4()) + _track_collection_task(collection_id, task_id) + try: + task = fix_schema_config_task.apply_async(args=[collection_id], task_id=task_id) + except Exception: + _untrack_collection_task(collection_id, task_id) + raise return task.id def get_active_setup_task() -> Tuple[Optional[AsyncResult], bool]: @@ -171,9 +237,13 @@ def update_progress(): raise @app.task(bind=True) -def fix_schema_config_task(self): +def fix_schema_config_task(self, collection_id: Optional[int] = None): """Run schema config migration fixups in a background worker""" - fix_schema_config() + task_id = getattr(self.request, "id", None) + try: + fix_schema_config() + finally: + _untrack_collection_task(collection_id, task_id) def get_last_setup_error() -> Optional[str]: err = get_string(LAST_ERROR_REDIS_KEY) From 5d0e488fbf1cc6e0695d7d19c836bca7c746148c Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 5 Mar 2026 00:14:22 -0600 Subject: [PATCH 2/3] check discipline status for new collections --- specifyweb/backend/context/views.py | 3 + specifyweb/backend/setup_tool/api.py | 32 +++++++++- specifyweb/backend/setup_tool/redis.py | 4 +- .../backend/setup_tool/schema_defaults.py | 60 +++++++++++++++++-- specifyweb/backend/setup_tool/setup_tasks.py | 3 +- 5 files changed, 93 insertions(+), 9 deletions(-) diff --git a/specifyweb/backend/context/views.py b/specifyweb/backend/context/views.py index 77f29b19c8e..f16e8bbb937 100644 --- a/specifyweb/backend/context/views.py +++ b/specifyweb/backend/context/views.py @@ -305,6 +305,9 @@ def collection(request): set_collection_cookie(response, collection.id) return response else: + available_collections = _filter_collections_not_ready_for_config_task( + available_collections + ) response = dict( available=[obj_to_data(c) for c in available_collections], current=(current and int(current)) diff --git a/specifyweb/backend/setup_tool/api.py b/specifyweb/backend/setup_tool/api.py index da61ea93500..808a9343336 100644 --- a/specifyweb/backend/setup_tool/api.py +++ b/specifyweb/backend/setup_tool/api.py @@ -16,6 +16,7 @@ from specifyweb.backend.setup_tool.utils import normalize_keys, resolve_uri_or_fallback from specifyweb.backend.setup_tool.schema_defaults import ( apply_schema_defaults, + is_discipline_setup_busy, queue_apply_schema_defaults_background, ) from specifyweb.backend.setup_tool.picklist_defaults import create_default_picklists @@ -50,7 +51,6 @@ def get_setup_progress() -> dict: """Returns a dictionary of the status of the database setup.""" # Check if setup is currently in progress active_setup_task, busy = get_active_setup_task() - busy = busy or is_config_task_running() completed_resources = None last_error = None @@ -67,6 +67,11 @@ def get_setup_progress() -> dict: completed_resources = get_setup_resource_progress() last_error = get_last_setup_error() + has_incomplete_resources = not all(completed_resources.values()) + if has_incomplete_resources: + # During initial setup, include tracked config/setup tasks in busy state. + busy = busy or _tracked_setup_tasks_busy() or is_config_task_running() + return { "resources": completed_resources, "last_error": last_error, @@ -302,7 +307,11 @@ def create_discipline(data, run_apply_schema_defaults_async: bool = True): except Exception as e: raise SetupError(e) -def create_collection(data, run_fix_schema_config_async: bool = True): +def create_collection( + data, + run_fix_schema_config_async: bool = True, + require_discipline_ready: bool = True, +): from specifyweb.specify.models import Collection, Discipline # If collection_id is provided and exists, return success @@ -326,6 +335,11 @@ def create_collection(data, run_fix_schema_config_async: bool = True): data['discipline'] = discipline else: raise SetupError("No discipline available") + + if require_discipline_ready and is_discipline_setup_busy(discipline.id): + raise SetupError( + "This discipline is still being created. Please wait for background setup tasks to finish before creating a collection." + ) # The discipline needs a Taxon Tree in order for the Collection Object Type to be created. if not discipline.taxontreedef_id: @@ -491,6 +505,18 @@ def create_tree(name: str, data: dict) -> dict: "specifyweb.backend.setup_tool.schema_defaults.apply_schema_defaults_task", }) + +def _tracked_setup_tasks_busy() -> bool: + """True if any tracked collection/discipline setup task is active.""" + if any( + is_discipline_setup_busy(discipline_id) + for discipline_id in Discipline.objects.values_list('id', flat=True) + ): + return True + collection_ids = models.Collection.objects.values_list('id', flat=True) + return bool(_get_collections_with_busy_config(collection_ids)) + + def _task_name_to_progress_key(task_name: str) -> str: """Convert a task function name into a camelCase progress key.""" task_function_name = task_name.rsplit(".", 1)[-1] @@ -536,7 +562,7 @@ def get_config_progress(collection_id: Optional[int] = None) -> dict: busy = ( is_collection_config_busy(collection_id) if collection_id is not None - else is_config_task_running(running_task_names) + else is_config_task_running(running_task_names) or _tracked_setup_tasks_busy() ) last_error = None completed_resources = get_config_resource_progress(running_task_names) diff --git a/specifyweb/backend/setup_tool/redis.py b/specifyweb/backend/setup_tool/redis.py index c53cdcaf4b5..eb852080ead 100644 --- a/specifyweb/backend/setup_tool/redis.py +++ b/specifyweb/backend/setup_tool/redis.py @@ -4,5 +4,7 @@ ACTIVE_TASK_TTL = 60*60*2 # setup should be less than 2 hours # Keep track of config/setup celery task ids by collection. COLLECTION_TASK_IDS_REDIS_KEY = "specify:{database}:setup:collection:{collection_id}:task_ids" +# Keep track of discipline setup celery task ids by discipline. +DISCIPLINE_TASK_IDS_REDIS_KEY = "specify:{database}:setup:discipline:{discipline_id}:task_ids" # Keep track of last error. -LAST_ERROR_REDIS_KEY = "specify:{database}:setup:last_error" \ No newline at end of file +LAST_ERROR_REDIS_KEY = "specify:{database}:setup:last_error" diff --git a/specifyweb/backend/setup_tool/schema_defaults.py b/specifyweb/backend/setup_tool/schema_defaults.py index 6c9e7f9a8d3..e9d11b85f61 100644 --- a/specifyweb/backend/setup_tool/schema_defaults.py +++ b/specifyweb/backend/setup_tool/schema_defaults.py @@ -3,12 +3,55 @@ from specifyweb.celery_tasks import app from .utils import load_json_from_file from specifyweb.specify.models import Discipline +from specifyweb.backend.redis_cache.store import add_to_set, delete_key, remove_from_set, set_members +from specifyweb.backend.setup_tool.redis import DISCIPLINE_TASK_IDS_REDIS_KEY from pathlib import Path +from typing import Optional +from uuid import uuid4 import logging logger = logging.getLogger(__name__) +ACTIVE_CELERY_STATES = frozenset(("PENDING", "RECEIVED", "STARTED", "RETRY", "PROGRESS")) + +def _discipline_task_ids_key(discipline_id: int) -> str: + return DISCIPLINE_TASK_IDS_REDIS_KEY.replace("{discipline_id}", str(discipline_id)) + +def _track_discipline_task(discipline_id: int, task_id: str) -> None: + add_to_set(_discipline_task_ids_key(discipline_id), task_id) + +def _untrack_discipline_task(discipline_id: Optional[int], task_id: Optional[str]) -> None: + if discipline_id is None or task_id is None: + return + key = _discipline_task_ids_key(discipline_id) + remove_from_set(key, task_id) + if not set_members(key): + delete_key(key) + +def _is_task_active(task_id: str) -> bool: + return app.AsyncResult(task_id).state in ACTIVE_CELERY_STATES + +def is_discipline_setup_busy(discipline_id: Optional[int]) -> bool: + if discipline_id is None: + return False + + key = _discipline_task_ids_key(discipline_id) + task_ids = set_members(key) + if not task_ids: + return False + + inactive_task_ids = [] + for task_id in task_ids: + if _is_task_active(task_id): + return True + inactive_task_ids.append(task_id) + + remove_from_set(key, *inactive_task_ids) + if not set_members(key): + delete_key(key) + return False + def apply_schema_defaults(discipline: Discipline): """ Apply schema config localization defaults for this discipline. @@ -56,14 +99,23 @@ def apply_schema_defaults(discipline: Discipline): description=table_description, defaults=table_defaults, ) - def queue_apply_schema_defaults_background(discipline_id: int) -> str: """Queue apply_schema_defaults to run asynchronously and return the task id.""" - task = apply_schema_defaults_task.apply_async(args=[discipline_id]) + task_id = str(uuid4()) + _track_discipline_task(discipline_id, task_id) + try: + task = apply_schema_defaults_task.apply_async(args=[discipline_id], task_id=task_id) + except Exception: + _untrack_discipline_task(discipline_id, task_id) + raise return task.id @app.task(bind=True) def apply_schema_defaults_task(self, discipline_id: int): """Run schema localization defaults for one discipline in a background worker.""" - discipline = Discipline.objects.get(id=discipline_id) - apply_schema_defaults(discipline) + task_id = getattr(self.request, "id", None) + try: + discipline = Discipline.objects.get(id=discipline_id) + apply_schema_defaults(discipline) + finally: + _untrack_discipline_task(discipline_id, task_id) diff --git a/specifyweb/backend/setup_tool/setup_tasks.py b/specifyweb/backend/setup_tool/setup_tasks.py index 3fdba62c87e..83efcf4c25e 100644 --- a/specifyweb/backend/setup_tool/setup_tasks.py +++ b/specifyweb/backend/setup_tool/setup_tasks.py @@ -203,7 +203,8 @@ def update_progress(): logger.info('Creating collection') collection_result = api.create_collection( data['collection'], - run_fix_schema_config_async=False + run_fix_schema_config_async=False, + require_discipline_ready=False, ) collection_id = collection_result.get('collection_id') update_progress() From e3ae1db687396dc082af76e1aa1d95c186858019 Mon Sep 17 00:00:00 2001 From: alec_dev Date: Thu, 5 Mar 2026 12:38:16 -0600 Subject: [PATCH 3/3] Turn redis task key identifiers into uniquer tuples with database_name --- specifyweb/backend/setup_tool/schema_defaults.py | 4 +++- specifyweb/backend/setup_tool/setup_tasks.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/specifyweb/backend/setup_tool/schema_defaults.py b/specifyweb/backend/setup_tool/schema_defaults.py index e9d11b85f61..b4f6d450eb4 100644 --- a/specifyweb/backend/setup_tool/schema_defaults.py +++ b/specifyweb/backend/setup_tool/schema_defaults.py @@ -16,7 +16,9 @@ ACTIVE_CELERY_STATES = frozenset(("PENDING", "RECEIVED", "STARTED", "RETRY", "PROGRESS")) def _discipline_task_ids_key(discipline_id: int) -> str: - return DISCIPLINE_TASK_IDS_REDIS_KEY.replace("{discipline_id}", str(discipline_id)) + return DISCIPLINE_TASK_IDS_REDIS_KEY.replace( + "{discipline_id}", f"({{database}},{discipline_id})" + ) def _track_discipline_task(discipline_id: int, task_id: str) -> None: add_to_set(_discipline_task_ids_key(discipline_id), task_id) diff --git a/specifyweb/backend/setup_tool/setup_tasks.py b/specifyweb/backend/setup_tool/setup_tasks.py index 83efcf4c25e..426d22df76f 100644 --- a/specifyweb/backend/setup_tool/setup_tasks.py +++ b/specifyweb/backend/setup_tool/setup_tasks.py @@ -34,7 +34,9 @@ ACTIVE_CELERY_STATES = frozenset(("PENDING", "RECEIVED", "STARTED", "RETRY", "PROGRESS")) def _collection_task_ids_key(collection_id: int) -> str: - return COLLECTION_TASK_IDS_REDIS_KEY.replace("{collection_id}", str(collection_id)) + return COLLECTION_TASK_IDS_REDIS_KEY.replace( + "{collection_id}", f"({{database}},{collection_id})" + ) def _track_collection_task(collection_id: Optional[int], task_id: str) -> None: if collection_id is None: