From b0057625f07f57ccaa5351ce6cb43fe172a680c2 Mon Sep 17 00:00:00 2001 From: Alexey Masolov Date: Wed, 1 Apr 2026 01:00:57 +1100 Subject: [PATCH] Set default --max-requests for API workers to prevent unbounded RSS growth API workers currently run with max_requests=0 (unlimited lifetime), which means glibc heap fragmentation accumulates indefinitely and RSS grows without bound (~1 kB/request from normal Django ORM alloc/dealloc churn). Set max_requests=10000 and max_requests_jitter=500 as defaults for PulpApiWorker when gunicorn's effective max_requests is still 0 and the user did not pass --max-requests on the pulpcore-api CLI. An explicit --max-requests 0 disables recycling (gunicorn semantics) and is not overridden. Workers are gracefully recycled after ~10000 requests, resetting fragmented heap memory. Jitter prevents all workers from restarting simultaneously. Documented in docs/admin/learn/architecture.md. closes #7482 Assisted-by: Claude (Anthropic) Made-with: Cursor --- CHANGES/7482.bugfix | 2 ++ docs/admin/learn/architecture.md | 8 ++++++++ pulpcore/app/entrypoint.py | 7 +++++++ 3 files changed, 17 insertions(+) create mode 100644 CHANGES/7482.bugfix diff --git a/CHANGES/7482.bugfix b/CHANGES/7482.bugfix new file mode 100644 index 00000000000..4c0b49ddde4 --- /dev/null +++ b/CHANGES/7482.bugfix @@ -0,0 +1,2 @@ +Set default ``--max-requests 10000`` and ``--max-requests-jitter 500`` for API workers +to prevent unbounded RSS growth from glibc heap fragmentation over long-lived worker processes. diff --git a/docs/admin/learn/architecture.md b/docs/admin/learn/architecture.md index 74b6083650b..52d1aa71df0 100644 --- a/docs/admin/learn/architecture.md +++ b/docs/admin/learn/architecture.md @@ -23,6 +23,14 @@ Pulp's REST API is a Django application that runs standalone using the `gunicorn A simple way to run the REST API as a standalone service is using the provided `pulpcore-api` entrypoint. It is `gunicorn` based and provides many of its options. +!!! note "API worker recycling" + By default, `pulpcore-api` enables gunicorn ``--max-requests`` and ``--max-requests-jitter`` so + worker processes are periodically replaced. That limits memory growth from allocator + fragmentation in long-lived workers. Override via gunicorn's usual mechanisms (CLI flags, + ``GUNICORN_CMD_ARGS``, or a config file). To **disable** recycling and keep unlimited worker + lifetime, pass ``--max-requests 0`` on the ``pulpcore-api`` command line (gunicorn treats + ``0`` as unlimited; Pulp only applies its own defaults when ``--max-requests`` was not passed + there). Disabling recycling is not recommended for production. The REST API should only be deployed via the `pulpcore-api` entrypoint. diff --git a/pulpcore/app/entrypoint.py b/pulpcore/app/entrypoint.py index fed1d70e2a7..b6c28a0265a 100644 --- a/pulpcore/app/entrypoint.py +++ b/pulpcore/app/entrypoint.py @@ -124,6 +124,13 @@ def load_app_specific_config(self): PulpApiWorker.__module__ + "." + PulpApiWorker.__qualname__, enforced=True, ) + # Gunicorn's default for max_requests is 0 (unlimited worker lifetime). Apply Pulp defaults + # only when the user did not pass --max-requests on the pulpcore-api CLI. An explicit + # --max-requests 0 means "disable recycling" and must not be replaced. + if self.cfg.max_requests == 0 and self.options.get("max_requests") is None: + self.cfg.set("max_requests", 10000) + if self.options.get("max_requests_jitter") is None: + self.cfg.set("max_requests_jitter", 500) def load(self): using_pulp_api_worker.set(True)