Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 4 additions & 13 deletions specifyweb/backend/accounts/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import base64
from datetime import timedelta
import hashlib
import hmac
import json
Expand All @@ -17,7 +16,7 @@
from django.db.models import Max
from django.shortcuts import render
from django.template.response import TemplateResponse
from django.utils import crypto, timezone
from django.utils import crypto
from django.utils.http import url_has_allowed_host_and_scheme, urlencode
from django.views.decorators.cache import never_cache
from typing import cast
Expand Down Expand Up @@ -295,8 +294,7 @@ def choose_collection(request) -> http.HttpResponse:
id to the user if one is provided.
"""
from specifyweb.backend.context.views import set_collection_cookie, users_collections_for_sp7
from specifyweb.backend.setup_tool.api import is_config_task_running

from specifyweb.backend.setup_tool.api import get_collections_with_busy_config

from specifyweb.specify.api.serializers import obj_to_data, toJson

Expand All @@ -320,14 +318,8 @@ def choose_collection(request) -> http.HttpResponse:
)

available_collections = users_collections_for_sp7(request.specify_user.id)
if is_config_task_running():
# If config tasks are running, filter out newly created collections
fifteen_minutes_ago = timezone.now() - timedelta(minutes=15)
available_collections = [
c
for c in available_collections
if c.timestampcreated is None or c.timestampcreated <= fifteen_minutes_ago
]
busy_collection_ids = get_collections_with_busy_config(c.id for c in available_collections)
available_collections = [c for c in available_collections if c.id not in busy_collection_ids]

if len(available_collections) == 1:
set_collection_cookie(redirect_resp, available_collections[0].id)
Expand Down Expand Up @@ -588,4 +580,3 @@ def set_admin_status(request, userid):
else:
user.clear_admin()
return http.HttpResponse('false', content_type='text/plain')

10 changes: 4 additions & 6 deletions specifyweb/backend/context/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import json
import os
import re
from datetime import timedelta
from typing import List

from django.conf import settings
Expand Down Expand Up @@ -38,7 +37,7 @@
from .remote_prefs import get_remote_prefs
from .schema_localization import get_schema_languages, get_schema_localization
from .viewsets import get_views
from specifyweb.backend.setup_tool.api import get_config_progress, is_config_task_running
from specifyweb.backend.setup_tool.api import get_collections_with_busy_config, get_config_progress


def set_collection_cookie(response, collection_id): # pragma: no cover
Expand Down Expand Up @@ -647,15 +646,14 @@ def get_server_time(request):


def _filter_collections_not_ready_for_config_task(collections):
if not is_config_task_running():
busy_collection_ids = get_collections_with_busy_config(c.id for c in collections)
if not busy_collection_ids:
return collections

# If config tasks are running, filter out newly created collections.
fifteen_minutes_ago = timezone.now() - timedelta(minutes=15)
return [
c
for c in collections
if c.timestampcreated is None or c.timestampcreated <= fifteen_minutes_ago
if c.id not in busy_collection_ids
]


Expand Down
16 changes: 14 additions & 2 deletions specifyweb/backend/setup_tool/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from specifyweb.backend.setup_tool.prep_type_defaults import create_default_prep_types
from specifyweb.backend.setup_tool.app_resource_defaults import ensure_discipline_resource_dir
from specifyweb.backend.setup_tool.setup_tasks import (
get_collections_with_busy_config as _get_collections_with_busy_config,
is_collection_config_busy,
setup_database_background,
get_active_setup_task,
get_last_setup_error,
Expand Down Expand Up @@ -343,7 +345,7 @@ def create_collection(data, run_fix_schema_config_async: bool = True):
# Create picklists
create_default_picklists(new_collection, discipline.type)
if run_fix_schema_config_async:
queue_fix_schema_config_background()
queue_fix_schema_config_background(new_collection.id)
else:
fix_schema_config()

Expand Down Expand Up @@ -518,14 +520,24 @@ def get_config_resource_progress(running_task_names: Optional[list[str]] = None)
active_task_names = set(running_task_names or [])
return _get_config_resource_progress_from_active_names(active_task_names)


def get_collections_with_busy_config(collection_ids) -> set[int]:
"""Returns collection ids that still have active tracked config tasks."""
return _get_collections_with_busy_config(collection_ids)


def get_config_progress(collection_id: Optional[int] = None) -> dict:
"""Returns a dict of the status of config/setup related background tasks"""
try:
running_task_names = get_running_worker_task_names()
except MissingWorkerError:
running_task_names = []

busy = is_config_task_running(running_task_names)
busy = (
is_collection_config_busy(collection_id)
if collection_id is not None
else is_config_task_running(running_task_names)
)
last_error = None
completed_resources = get_config_resource_progress(running_task_names)

Expand Down
2 changes: 2 additions & 0 deletions specifyweb/backend/setup_tool/redis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@
# Also defined separately in setup_tool/apps.py
ACTIVE_TASK_REDIS_KEY = "specify:{database}:setup:active_task_id"
ACTIVE_TASK_TTL = 60*60*2 # setup should be less than 2 hours
# Keep track of config/setup celery task ids by collection.
COLLECTION_TASK_IDS_REDIS_KEY = "specify:{database}:setup:collection:{collection_id}:task_ids"
# Keep track of last error.
LAST_ERROR_REDIS_KEY = "specify:{database}:setup:last_error"
84 changes: 77 additions & 7 deletions specifyweb/backend/setup_tool/setup_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,81 @@

from django.db import transaction
from specifyweb.celery_tasks import app
from typing import Tuple, Optional
from typing import Iterable, Optional, Tuple
from celery.result import AsyncResult
from specifyweb.backend.setup_tool import api
from specifyweb.backend.setup_tool.app_resource_defaults import create_app_resource_defaults
from specifyweb.backend.setup_tool.tree_defaults import start_preload_default_tree
from specifyweb.specify.management.commands.run_key_migration_functions import fix_schema_config
from specifyweb.specify.models_utils.model_extras import PALEO_DISCIPLINES, GEOLOGY_DISCIPLINES
from specifyweb.celery_tasks import is_worker_alive, MissingWorkerError
from specifyweb.backend.redis_cache.store import set_string, get_string
from specifyweb.backend.setup_tool.redis import ACTIVE_TASK_REDIS_KEY, ACTIVE_TASK_TTL, LAST_ERROR_REDIS_KEY
from specifyweb.backend.redis_cache.store import (
add_to_set,
delete_key,
get_string,
remove_from_set,
set_members,
set_string,
)
from specifyweb.backend.setup_tool.redis import (
ACTIVE_TASK_REDIS_KEY,
ACTIVE_TASK_TTL,
COLLECTION_TASK_IDS_REDIS_KEY,
LAST_ERROR_REDIS_KEY,
)

from uuid import uuid4
import logging
logger = logging.getLogger(__name__)

ACTIVE_CELERY_STATES = frozenset(("PENDING", "RECEIVED", "STARTED", "RETRY", "PROGRESS"))

def _collection_task_ids_key(collection_id: int) -> str:
return COLLECTION_TASK_IDS_REDIS_KEY.replace("{collection_id}", str(collection_id))

def _track_collection_task(collection_id: Optional[int], task_id: str) -> None:
if collection_id is None:
return
add_to_set(_collection_task_ids_key(collection_id), task_id)

def _untrack_collection_task(collection_id: Optional[int], task_id: Optional[str]) -> None:
if collection_id is None or task_id is None:
return
key = _collection_task_ids_key(collection_id)
remove_from_set(key, task_id)
if not set_members(key):
delete_key(key)

def _is_task_active(task_id: str) -> bool:
return app.AsyncResult(task_id).state in ACTIVE_CELERY_STATES

def is_collection_config_busy(collection_id: Optional[int]) -> bool:
if collection_id is None:
return False

key = _collection_task_ids_key(collection_id)
task_ids = set_members(key)
if not task_ids:
return False

inactive_task_ids = []
for task_id in task_ids:
if _is_task_active(task_id):
return True
inactive_task_ids.append(task_id)

remove_from_set(key, *inactive_task_ids)
if not set_members(key):
delete_key(key)
return False

def get_collections_with_busy_config(collection_ids: Iterable[int]) -> set[int]:
return {
collection_id
for collection_id in collection_ids
if is_collection_config_busy(collection_id)
}

def setup_database_background(data: dict) -> str:
# Clear any previous error logs.
set_last_setup_error(None)
Expand All @@ -38,9 +98,15 @@ def setup_database_background(data: dict) -> str:

return task.id

def queue_fix_schema_config_background() -> str:
def queue_fix_schema_config_background(collection_id: Optional[int] = None) -> str:
"""Queue fix_schema_config to run asynchronously and return the task id"""
task = fix_schema_config_task.apply_async()
task_id = str(uuid4())
_track_collection_task(collection_id, task_id)
try:
task = fix_schema_config_task.apply_async(args=[collection_id], task_id=task_id)
except Exception:
_untrack_collection_task(collection_id, task_id)
raise
return task.id

def get_active_setup_task() -> Tuple[Optional[AsyncResult], bool]:
Expand Down Expand Up @@ -171,9 +237,13 @@ def update_progress():
raise

@app.task(bind=True)
def fix_schema_config_task(self):
def fix_schema_config_task(self, collection_id: Optional[int] = None):
"""Run schema config migration fixups in a background worker"""
fix_schema_config()
task_id = getattr(self.request, "id", None)
try:
fix_schema_config()
finally:
_untrack_collection_task(collection_id, task_id)

def get_last_setup_error() -> Optional[str]:
err = get_string(LAST_ERROR_REDIS_KEY)
Expand Down