-
Notifications
You must be signed in to change notification settings - Fork 2
Add celery and celery-beat to delete partial files every 48 hours #234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b3e3dc1
b943f3b
6ccc0ca
61aa1d9
e5234f8
8cecf62
7e4b7b9
6a9f661
0241a46
f14f7a0
2c95eec
d2fefb8
354fa94
bcdd5fb
ed2c2b4
17a3ffb
4ffb47f
c24318b
bd06fda
ad6f113
9633ef5
ce84241
4f1de26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,3 +7,4 @@ SQL_USER=postgres | |
| SQL_PASSWORD=tekdb_password | ||
| SQL_HOST=db | ||
| SQL_PORT=5432 | ||
| CELERY_BROKER_URL=redis://redis:6379/0 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,6 @@ | ||
| # TEKDB/__init__.py | ||
| # This will make sure the app is always imported when | ||
| # Django starts so that shared_task will use this app. | ||
| from .celery import app as celery_app | ||
|
|
||
| __all__ = ("celery_app",) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| import os | ||
|
|
||
| from celery import Celery | ||
|
|
||
| # Set the default Django settings module for the 'celery' program. | ||
| os.environ.setdefault("DJANGO_SETTINGS_MODULE", "TEKDB.settings") | ||
|
|
||
| app = Celery("TEKDB") | ||
|
|
||
| # Using a string here means the worker doesn't have to serialize | ||
| # the configuration object to child processes. | ||
| # - namespace='CELERY' means all celery-related configuration keys | ||
| # should have a `CELERY_` prefix. | ||
| app.config_from_object("django.conf:settings", namespace="CELERY") | ||
|
|
||
| # Load task modules from all registered Django apps. | ||
| app.autodiscover_tasks() |
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,70 @@ | ||||||||
| import os | ||||||||
| import logging | ||||||||
| from datetime import timedelta | ||||||||
| from celery import shared_task | ||||||||
|
|
||||||||
| from django.utils import timezone | ||||||||
|
|
||||||||
| logger = logging.getLogger("delete_partial_upload") | ||||||||
|
|
||||||||
|
|
||||||||
| def bytes_to_readable(num_bytes, suffix="B"): | ||||||||
| """Converts bytes to a human-readable format (e.g., KB, MB, GB).""" | ||||||||
| for unit in ["", "K", "M", "G", "T", "P"]: | ||||||||
| if num_bytes < 1024: | ||||||||
| return f"{num_bytes:.2f} {unit}{suffix}" | ||||||||
| num_bytes /= 1024 | ||||||||
|
|
||||||||
|
|
||||||||
|
Comment on lines
+17
to
+18
|
||||||||
| # Fallback: for extremely large values, use the largest unit. | |
| return f"{num_bytes:.2f} P{suffix}" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the one piece of logic I most wanted to see in this review, and not only is it here, it's clear, readable, and well-written!
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -184,8 +184,10 @@ def delete_media_without_record_confirm(self, request): | |
|
|
||
| filelisting = self.filelisting_class( | ||
| path, | ||
| filter_func=lambda fo: not fo.has_media_record() | ||
| and fo.filename not in self.files_folders_to_ignore(), | ||
| filter_func=lambda fo: ( | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changes to this file are just ruff formatting fixes |
||
| not fo.has_media_record() | ||
| and fo.filename not in self.files_folders_to_ignore() | ||
| ), | ||
| sorting_by=query.get("o", DEFAULT_SORTING_BY), | ||
| sorting_order=query.get("ot", DEFAULT_SORTING_ORDER), | ||
| site=self, | ||
|
|
@@ -216,8 +218,10 @@ def delete_media_without_record(self, request): | |
|
|
||
| filelisting = self.filelisting_class( | ||
| path, | ||
| filter_func=lambda fo: not fo.has_media_record() | ||
| and fo.filename not in self.files_folders_to_ignore(), | ||
| filter_func=lambda fo: ( | ||
| not fo.has_media_record() | ||
| and fo.filename not in self.files_folders_to_ignore() | ||
| ), | ||
| sorting_by=query.get("o", DEFAULT_SORTING_BY), | ||
| sorting_order=query.get("ot", DEFAULT_SORTING_ORDER), | ||
| site=self, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,73 @@ | ||
| import os | ||
| import time | ||
| from unittest.mock import patch | ||
| from django.test import TestCase, override_settings | ||
|
|
||
| from TEKDB.tasks import delete_expired_chunks | ||
|
|
||
|
|
||
| class DeleteExpiredChunksTest(TestCase): | ||
| def test_missing_dir_returns_none(self): | ||
| with override_settings( | ||
| MEDIA_ROOT="/nonexistent", ADMIN_RESUMABLE_CHUNK_FOLDER="does_not_exist" | ||
| ): | ||
| result = delete_expired_chunks.run(max_age_hours=24) | ||
| self.assertIsNone(result) | ||
|
|
||
| def test_deletes_expired_chunks_and_skips_new(self): | ||
| import tempfile | ||
|
|
||
| with tempfile.TemporaryDirectory() as tmp_path: | ||
| target = os.path.join(tmp_path, "chunks") | ||
| os.makedirs(target) | ||
|
|
||
| old_file = os.path.join(target, "old.bin") | ||
| new_file = os.path.join(target, "new.bin") | ||
| with open(old_file, "wb") as f: | ||
| f.write(b"old") | ||
| with open(new_file, "wb") as f: | ||
| f.write(b"new") | ||
|
|
||
| now = time.time() | ||
| os.utime(old_file, (now - 48 * 3600, now - 48 * 3600)) # 48 hours old | ||
| os.utime(new_file, (now - 1 * 3600, now - 1 * 3600)) # 1 hour old | ||
|
|
||
| with override_settings( | ||
| MEDIA_ROOT=tmp_path, ADMIN_RESUMABLE_CHUNK_FOLDER="chunks" | ||
| ): | ||
| result = delete_expired_chunks.run(max_age_hours=24) | ||
|
|
||
| self.assertIsInstance(result, dict) | ||
| self.assertEqual(result["deleted"], 1) | ||
| self.assertEqual(result["skipped"], 1) | ||
| self.assertEqual(result["failed"], 0) | ||
| self.assertFalse(os.path.exists(old_file)) | ||
| self.assertTrue(os.path.exists(new_file)) | ||
|
|
||
| def test_failed_to_delete_expired_chunks(self): | ||
| import tempfile | ||
|
|
||
| with tempfile.TemporaryDirectory() as tmp_path: | ||
| target = os.path.join(tmp_path, "chunks") | ||
| os.makedirs(target) | ||
|
|
||
| old_file = os.path.join(target, "old.bin") | ||
| with open(old_file, "wb") as f: | ||
| f.write(b"old") | ||
|
|
||
| now = time.time() | ||
| os.utime(old_file, (now - 48 * 3600, now - 48 * 3600)) # 48 hours old | ||
|
|
||
| with override_settings( | ||
| MEDIA_ROOT=tmp_path, ADMIN_RESUMABLE_CHUNK_FOLDER="chunks" | ||
| ): | ||
| with patch( | ||
| "os.remove", side_effect=PermissionError("permission denied") | ||
| ): | ||
| result = delete_expired_chunks.run(max_age_hours=24) | ||
|
|
||
| self.assertIsInstance(result, dict) | ||
| self.assertEqual(result["deleted"], 0) | ||
| self.assertEqual(result["skipped"], 0) | ||
| self.assertEqual(result["failed"], 1) | ||
| self.assertTrue(os.path.exists(old_file)) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| #!/bin/bash | ||
|
|
||
| # Ensure script is run as root | ||
| if [ "$EUID" -ne 0 ]; then | ||
| echo "Please run as root: sudo bash $0" | ||
| exit 1 | ||
| fi | ||
|
|
||
| PROJECT_DIR=/usr/local/apps/TEKDB | ||
| APP_USER=vagrant | ||
|
|
||
| # Install Redis | ||
| apt-get install -y redis-server | ||
| systemctl enable redis-server | ||
| systemctl start redis-server | ||
|
|
||
| # Create log and pid directories | ||
| mkdir -p /var/log/celery /var/run/celery | ||
| chown $APP_USER:$APP_USER /var/log/celery /var/run/celery | ||
|
|
||
| # Copy service files into systemd | ||
| cp $PROJECT_DIR/deployment/celery-worker.service /etc/systemd/system/ | ||
| cp $PROJECT_DIR/deployment/celery-beat.service /etc/systemd/system/ | ||
|
|
||
| # Enable and start services | ||
| systemctl daemon-reload | ||
| systemctl enable celery-worker | ||
| systemctl enable celery-beat | ||
| systemctl start celery-worker | ||
| systemctl start celery-beat | ||
|
|
||
| echo "Done. Check status with: systemctl status celery-worker celery-beat" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| [Unit] | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pulled much of this from these docs: https://docs.celeryq.dev/en/stable/userguide/daemonizing.html#daemonization |
||
| Description=TEKDB Celery Beat Service | ||
| After=network.target postgresql.service redis-server.service | ||
|
|
||
| [Service] | ||
| Type=simple | ||
| User=vagrant | ||
| Group=vagrant | ||
| WorkingDirectory=/usr/local/apps/TEKDB/TEKDB | ||
| Environment="DJANGO_SETTINGS_MODULE=TEKDB.settings" | ||
| ExecStart=/usr/local/apps/env/bin/celery -A TEKDB beat \ | ||
| --loglevel=info \ | ||
| --logfile=/var/log/celery/tekdb-beat.log \ | ||
| --scheduler django_celery_beat.schedulers:DatabaseScheduler | ||
| Restart=always | ||
|
|
||
| [Install] | ||
| WantedBy=multi-user.target | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,18 @@ | ||
| [Unit] | ||
| Description=TEKDB Celery Worker | ||
| After=network.target postgresql.service redis-server.service | ||
|
|
||
| [Service] | ||
| Type=simple | ||
| User=vagrant | ||
| Group=vagrant | ||
| WorkingDirectory=/usr/local/apps/TEKDB/TEKDB | ||
| Environment="DJANGO_SETTINGS_MODULE=TEKDB.settings" | ||
| ExecStart=/usr/local/apps/env/bin/celery -A TEKDB worker \ | ||
| --loglevel=info \ | ||
| --logfile=/var/log/celery/tekdb-worker.log | ||
| Restart=on-failure | ||
| RestartSec=5s | ||
|
|
||
| [Install] | ||
| WantedBy=multi-user.target |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added to resolve warning of
SecurityWarning: You're running the worker with superuser privileges: this is absolutely not recommended!in the celery container.