Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
b3e3dc1
add logs module to track expired chunk deletion
paigewilliams Mar 4, 2026
b943f3b
management command to delete expired chunks
paigewilliams Mar 4, 2026
6ccc0ca
ruff format
paigewilliams Mar 4, 2026
61aa1d9
wip: add celery for deleting expired chunks
paigewilliams Mar 5, 2026
e5234f8
Merge branch 'develop' into delete-partial-files
paigewilliams Mar 5, 2026
8cecf62
sequence containers, update env vars
paigewilliams Mar 5, 2026
7e4b7b9
schedule task to delete remaining chunks
paigewilliams Mar 5, 2026
6a9f661
move delete_expired_chunks tasks to TEKDB
paigewilliams Mar 5, 2026
0241a46
Merge branch 'develop' into delete-partial-files
paigewilliams Mar 5, 2026
f14f7a0
move env vars out of common.yaml
paigewilliams Mar 6, 2026
2c95eec
add redis, celery, and celery beat to docker-compose.prod.local
paigewilliams Mar 6, 2026
d2fefb8
remove debug task
paigewilliams Mar 6, 2026
354fa94
wip: add tests for delete_expired_chunks
paigewilliams Mar 6, 2026
bcdd5fb
rename tests
paigewilliams Mar 6, 2026
ed2c2b4
add scripts and services for celery in vagrant
paigewilliams Mar 10, 2026
17a3ffb
add CELERY_RESULT_EXTENDED setting
paigewilliams Mar 10, 2026
4ffb47f
remove celery and celery beat restart commands from update.sh
paigewilliams Mar 10, 2026
c24318b
set redis service name as redis-server in celery service files
paigewilliams Mar 10, 2026
bd06fda
add redis, celery and celery-beat to docker-compose.prod.yaml; remove…
paigewilliams Mar 10, 2026
ad6f113
delete delete_expired_chunks.py
paigewilliams Mar 11, 2026
9633ef5
remove unnecessary env vars from celery and celery beat containers
paigewilliams Mar 11, 2026
ce84241
run as non-root user in docker to resolve warning in celery worker
paigewilliams Mar 11, 2026
4f1de26
add test for failed to delete chunk case
paigewilliams Mar 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions TEKDB/.env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ SQL_USER=postgres
SQL_PASSWORD=tekdb_password
SQL_HOST=db
SQL_PORT=5432
CELERY_BROKER_URL=redis://redis:6379/0
6 changes: 6 additions & 0 deletions TEKDB/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ COPY . /usr/src/app
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh

# Create a non-root user and give it ownership of the app directory
RUN addgroup --system appgroup && adduser --system --ingroup appgroup appuser \
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added to resolve warning of SecurityWarning: You're running the worker with superuser privileges: this is absolutely not recommended! in the celery container.

&& chown -R appuser:appgroup /usr/src/app

USER appuser

# Expose the port the app runs on (entrypoint starts django development server or uWSGI on 8000)
EXPOSE 8000

Expand Down
4 changes: 4 additions & 0 deletions TEKDB/TEKDB/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# TEKDB/__init__.py
# This will make sure the app is always imported when
# Django starts so that shared_task will use this app.
from .celery import app as celery_app

__all__ = ("celery_app",)
5 changes: 5 additions & 0 deletions TEKDB/TEKDB/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dal import autocomplete
from mimetypes import guess_type
from django.templatetags.static import static
from django_celery_results.admin import GroupResult

# from moderation.admin import ModerationAdmin
import nested_admin
Expand Down Expand Up @@ -1770,3 +1771,7 @@ class UsersAdmin(UserAdmin):
admin.site.register(LookupAuthorType)
admin.site.register(LookupUserInfo)
# admin.site.register(CurrentVersion)

admin.site.unregister(GroupResult)
# admin.site.unregister(CrontabScheduleAdmin)
# admin.site.unregister(ClockedScheduleAdmin)
17 changes: 17 additions & 0 deletions TEKDB/TEKDB/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import os

from celery import Celery

# Set the default Django settings module for the 'celery' program.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "TEKDB.settings")

app = Celery("TEKDB")

# Using a string here means the worker doesn't have to serialize
# the configuration object to child processes.
# - namespace='CELERY' means all celery-related configuration keys
# should have a `CELERY_` prefix.
app.config_from_object("django.conf:settings", namespace="CELERY")

# Load task modules from all registered Django apps.
app.autodiscover_tasks()
19 changes: 19 additions & 0 deletions TEKDB/TEKDB/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"""

import os
from datetime import timedelta
from glob import glob

try:
Expand Down Expand Up @@ -55,6 +56,8 @@
"django.contrib.messages",
"django.contrib.staticfiles",
"django.contrib.gis",
"django_celery_results",
"django_celery_beat",
"colorfield",
# 'registration',
"leaflet",
Expand Down Expand Up @@ -352,6 +355,22 @@
ADMIN_SIMULTANEOUS_UPLOADS = 1
ADMIN_RESUMABLE_CHUNK_FOLDER = "resumable_chunks"

# Celery Configuration Options
CELERY_TIMEZONE = TIME_ZONE
CELERY_TASK_TRACK_STARTED = True
CELERY_TASK_TIME_LIMIT = 30 * 60
CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379/0")
CELERY_RESULT_BACKEND = "django-db"
CELERY_RESULT_EXTENDED = True
CELERY_BEAT_SCHEDULER = "django_celery_beat.schedulers:DatabaseScheduler"
CELERY_BEAT_SCHEDULE = {
"delete-expired-chunks-every-48-hours": {
"task": "TEKDB.tasks.delete_expired_chunks",
"schedule": timedelta(hours=48),
"kwargs": {"max_age_hours": 48},
},
}

try:
from TEKDB.local_settings import * # noqa: F403
except Exception:
Expand Down
70 changes: 70 additions & 0 deletions TEKDB/TEKDB/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import logging
from datetime import timedelta
from celery import shared_task

from django.utils import timezone

logger = logging.getLogger("delete_partial_upload")


def bytes_to_readable(num_bytes, suffix="B"):
"""Converts bytes to a human-readable format (e.g., KB, MB, GB)."""
for unit in ["", "K", "M", "G", "T", "P"]:
if num_bytes < 1024:
return f"{num_bytes:.2f} {unit}{suffix}"
num_bytes /= 1024


Comment on lines +17 to +18
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bytes_to_readable can return None for very large values because the loop has no fallback return after the last unit. Ensure the function always returns a string (e.g., add a final return for the largest unit).

Suggested change
# Fallback: for extremely large values, use the largest unit.
return f"{num_bytes:.2f} P{suffix}"

Copilot uses AI. Check for mistakes.
@shared_task(bind=True, max_retries=3, autoretry_for=(Exception,))
def delete_expired_chunks(self, max_age_hours=24):
from django.conf import settings

target_dir = os.path.join(
settings.MEDIA_ROOT, settings.ADMIN_RESUMABLE_CHUNK_FOLDER
)
cutoff = timezone.now() - timedelta(hours=max_age_hours)
cutoff_timestamp = cutoff.timestamp()

if not os.path.isdir(target_dir):
logger.error(f"Target directory does not exist: {target_dir}")
return

logger.info(
f"Starting cleanup of '{target_dir}' — files older than {max_age_hours}h"
)

deleted, failed, skipped = [], [], []
for root, dirs, files in os.walk(target_dir):
for filename in files:
file_path = os.path.join(root, filename)
try:
mtime = os.path.getmtime(file_path)
if mtime >= cutoff_timestamp:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the one piece of logic I most wanted to see in this review, and not only is it here, it's clear, readable, and well-written!

skipped.append(file_path)
continue

file_size_bytes = os.path.getsize(file_path)
file_size = bytes_to_readable(file_size_bytes)
file_age_hours = (timezone.now().timestamp() - mtime) / 3600
logger.info(
f"Deleting: {file_path} "
f"(size={file_size}, age={file_age_hours:.1f}h)"
)

os.remove(file_path)
deleted.append(file_path)

except Exception as e:
logger.error(f"Failed to delete {file_path}: {e}")
failed.append(file_path)

logger.info(
f"Cleanup complete — deleted: {len(deleted)}, failed: {len(failed)}, skipped (too new): {len(skipped)}"
)
return {
"deleted": len(deleted),
"failed": len(failed),
"skipped": len(skipped),
"completed_at": timezone.now().isoformat(),
}
12 changes: 8 additions & 4 deletions TEKDB/TEKDB/tekdb_filebrowser.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,10 @@ def delete_media_without_record_confirm(self, request):

filelisting = self.filelisting_class(
path,
filter_func=lambda fo: not fo.has_media_record()
and fo.filename not in self.files_folders_to_ignore(),
filter_func=lambda fo: (
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changes to this file are just ruff formatting fixes

not fo.has_media_record()
and fo.filename not in self.files_folders_to_ignore()
),
sorting_by=query.get("o", DEFAULT_SORTING_BY),
sorting_order=query.get("ot", DEFAULT_SORTING_ORDER),
site=self,
Expand Down Expand Up @@ -216,8 +218,10 @@ def delete_media_without_record(self, request):

filelisting = self.filelisting_class(
path,
filter_func=lambda fo: not fo.has_media_record()
and fo.filename not in self.files_folders_to_ignore(),
filter_func=lambda fo: (
not fo.has_media_record()
and fo.filename not in self.files_folders_to_ignore()
),
sorting_by=query.get("o", DEFAULT_SORTING_BY),
sorting_order=query.get("ot", DEFAULT_SORTING_ORDER),
site=self,
Expand Down
73 changes: 73 additions & 0 deletions TEKDB/TEKDB/tests/test_tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
import time
from unittest.mock import patch
from django.test import TestCase, override_settings

from TEKDB.tasks import delete_expired_chunks


class DeleteExpiredChunksTest(TestCase):
def test_missing_dir_returns_none(self):
with override_settings(
MEDIA_ROOT="/nonexistent", ADMIN_RESUMABLE_CHUNK_FOLDER="does_not_exist"
):
result = delete_expired_chunks.run(max_age_hours=24)
self.assertIsNone(result)

def test_deletes_expired_chunks_and_skips_new(self):
import tempfile

with tempfile.TemporaryDirectory() as tmp_path:
target = os.path.join(tmp_path, "chunks")
os.makedirs(target)

old_file = os.path.join(target, "old.bin")
new_file = os.path.join(target, "new.bin")
with open(old_file, "wb") as f:
f.write(b"old")
with open(new_file, "wb") as f:
f.write(b"new")

now = time.time()
os.utime(old_file, (now - 48 * 3600, now - 48 * 3600)) # 48 hours old
os.utime(new_file, (now - 1 * 3600, now - 1 * 3600)) # 1 hour old

with override_settings(
MEDIA_ROOT=tmp_path, ADMIN_RESUMABLE_CHUNK_FOLDER="chunks"
):
result = delete_expired_chunks.run(max_age_hours=24)

self.assertIsInstance(result, dict)
self.assertEqual(result["deleted"], 1)
self.assertEqual(result["skipped"], 1)
self.assertEqual(result["failed"], 0)
self.assertFalse(os.path.exists(old_file))
self.assertTrue(os.path.exists(new_file))

def test_failed_to_delete_expired_chunks(self):
import tempfile

with tempfile.TemporaryDirectory() as tmp_path:
target = os.path.join(tmp_path, "chunks")
os.makedirs(target)

old_file = os.path.join(target, "old.bin")
with open(old_file, "wb") as f:
f.write(b"old")

now = time.time()
os.utime(old_file, (now - 48 * 3600, now - 48 * 3600)) # 48 hours old

with override_settings(
MEDIA_ROOT=tmp_path, ADMIN_RESUMABLE_CHUNK_FOLDER="chunks"
):
with patch(
"os.remove", side_effect=PermissionError("permission denied")
):
result = delete_expired_chunks.run(max_age_hours=24)

self.assertIsInstance(result, dict)
self.assertEqual(result["deleted"], 0)
self.assertEqual(result["skipped"], 0)
self.assertEqual(result["failed"], 1)
self.assertTrue(os.path.exists(old_file))
Empty file modified TEKDB/media/__init__.py
100755 → 100644
Empty file.
4 changes: 4 additions & 0 deletions TEKDB/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#REQUIRED
confusable-homoglyphs
coverage
celery
django >=4.2.16,<4.3
django-autocomplete-light
django-ckeditor
Expand All @@ -10,9 +11,12 @@ django-nested-admin
django-registration
django-reversion
django-tinymce
django-celery-results
django-celery-beat
pillow
psycopg2-binary
psutil
redis
django-filebrowser-no-grappelli>=4.0.0,<5.0.0
XlsxWriter
django-resumable-async-upload
Expand Down
32 changes: 32 additions & 0 deletions TEKDB/scripts/provision_celery.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

# Ensure script is run as root
if [ "$EUID" -ne 0 ]; then
echo "Please run as root: sudo bash $0"
exit 1
fi

PROJECT_DIR=/usr/local/apps/TEKDB
APP_USER=vagrant

# Install Redis
apt-get install -y redis-server
systemctl enable redis-server
systemctl start redis-server

# Create log and pid directories
mkdir -p /var/log/celery /var/run/celery
chown $APP_USER:$APP_USER /var/log/celery /var/run/celery

# Copy service files into systemd
cp $PROJECT_DIR/deployment/celery-worker.service /etc/systemd/system/
cp $PROJECT_DIR/deployment/celery-beat.service /etc/systemd/system/

# Enable and start services
systemctl daemon-reload
systemctl enable celery-worker
systemctl enable celery-beat
systemctl start celery-worker
systemctl start celery-beat

echo "Done. Check status with: systemctl status celery-worker celery-beat"
18 changes: 18 additions & 0 deletions deployment/celery-beat.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[Unit]
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Description=TEKDB Celery Beat Service
After=network.target postgresql.service redis-server.service

[Service]
Type=simple
User=vagrant
Group=vagrant
WorkingDirectory=/usr/local/apps/TEKDB/TEKDB
Environment="DJANGO_SETTINGS_MODULE=TEKDB.settings"
ExecStart=/usr/local/apps/env/bin/celery -A TEKDB beat \
--loglevel=info \
--logfile=/var/log/celery/tekdb-beat.log \
--scheduler django_celery_beat.schedulers:DatabaseScheduler
Restart=always

[Install]
WantedBy=multi-user.target
18 changes: 18 additions & 0 deletions deployment/celery-worker.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[Unit]
Description=TEKDB Celery Worker
After=network.target postgresql.service redis-server.service

[Service]
Type=simple
User=vagrant
Group=vagrant
WorkingDirectory=/usr/local/apps/TEKDB/TEKDB
Environment="DJANGO_SETTINGS_MODULE=TEKDB.settings"
ExecStart=/usr/local/apps/env/bin/celery -A TEKDB worker \
--loglevel=info \
--logfile=/var/log/celery/tekdb-worker.log
Restart=on-failure
RestartSec=5s

[Install]
WantedBy=multi-user.target
Loading