PaddlePaddle · google-labs-jules · Apr 4, 2026
diff --git a/fastdeploy/benchmarks/serve.py b/fastdeploy/benchmarks/serve.py
@@ -538,7 +538,8 @@ def calculate_metrics(
             slo_values.append(goodput_config_dict["e2el"] / MILLISECONDS_TO_SECONDS_CONVERSION)
 
         for req_metric in zip(*valid_metrics):
-            is_good_req = all([s >= r for s, r in zip(slo_values, req_metric)])
+            # Optimization: Use generator expression instead of list comprehension in all() to save memory and enable short-circuiting
+            is_good_req = all(s >= r for s, r in zip(slo_values, req_metric))
             if is_good_req:
                 good_completed += 1
 

diff --git a/fastdeploy/cache_manager/cache_messager.py b/fastdeploy/cache_manager/cache_messager.py
@@ -1004,7 +1004,8 @@ def main():
                     gpu_cache_kvs[f"value_cache_scales_{i}_rank{rank}_device{device}"],
                     f"value_cache_scales_{i}_rank{rank}.device{device}",
                 )
-    cache_kv_size_byte = sum([tmp.numel() * 1 for key, tmp in gpu_cache_kvs.items()])
+    # Optimization: Use generator expression instead of list comprehension in sum() to save memory
+    cache_kv_size_byte = sum(tmp.numel() * 1 for key, tmp in gpu_cache_kvs.items())
     logger.info(f"device :{device}")
     logger.info(f"cache_kv_size_byte : {cache_kv_size_byte}")
     logger.info(f"done init cache (full) gmem alloc : {memory_allocated}")

diff --git a/fastdeploy/engine/resource_manager.py b/fastdeploy/engine/resource_manager.py
@@ -311,7 +311,8 @@ def allocate_resources_for_new_tasks(self, tasks):
                 break
 
         # record batch size here
-        num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
+        # Optimization: Use generator expression instead of list comprehension in sum() to save memory
+        num_blocks_used_by_tasks = sum(len(task.block_tables) if task else 0 for task in self.tasks_list)
         main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks)
         main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
         main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())

diff --git a/fastdeploy/engine/sched/resource_manager_v1.py b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -945,7 +945,8 @@ def _allocate_decode_and_extend():
                         len(self.running)
                         + len(self.to_be_rescheduled_request_id_set)
                         + len(self.to_be_aborted_req_id_set)
-                        + sum([req.status == RequestStatus.PREEMPTED for req in self.waiting])
+                        # Optimization: Use generator expression instead of list comprehension in sum() to save memory
+                        + sum(req.status == RequestStatus.PREEMPTED for req in self.waiting)
                         >= self.max_num_seqs
                     ):
                         break
@@ -1558,7 +1559,8 @@ def clear_data(self):
 
     def update_metrics(self, verbose=False):
         # Update metrics
-        num_tasks = sum([1 if task else 0 for task in self.tasks_list])
+        # Optimization: Use generator expression instead of list comprehension in sum() to save memory
+        num_tasks = sum(1 if task else 0 for task in self.tasks_list)
         blocks_used_by_tasks = set()
         for task in self.tasks_list:
             if task is not None:

diff --git a/fastdeploy/entrypoints/cli/tokenizer.py b/fastdeploy/entrypoints/cli/tokenizer.py
@@ -195,7 +195,8 @@ def print_separator(title=""):
             print(f"\n{'='*50}")
 
     # 检查参数
-    if not any([args.encode, args.decode, args.vocab_size, args.info, args.vocab_export]):
+    # Optimization: Use generator expression instead of list comprehension in any() to save memory and enable short-circuiting
+    if not any((args.encode, args.decode, args.vocab_size, args.info, args.vocab_export)):
         print("请至少指定一个参数：--encode, --decode, --vocab-size, --info, --vocab-export")
         return
 

diff --git a/fastdeploy/model_executor/models/paddleocr_vl/siglip.py b/fastdeploy/model_executor/models/paddleocr_vl/siglip.py
@@ -257,7 +257,8 @@ def forward(
                 assert batch_size == 1
                 start = 0
 
-                assert sum([np.prod(x) for x in flatten_image_grid_thw]) == embeddings.shape[1], (
+                # Optimization: Use generator expression instead of list comprehension in sum() to save memory
+                assert sum(np.prod(x) for x in flatten_image_grid_thw) == embeddings.shape[1], (
                     flatten_image_grid_thw,
                     embeddings.shape,
                 )
@@ -466,7 +467,8 @@ def forward(
         if use_rope is True:
             flatten_image_grid_thw = self.flatten_list(image_grid_thw)
             flatten_image_grid_thw = np.array(flatten_image_grid_thw)
-            assert sum([np.prod(x) for x in flatten_image_grid_thw]) == hidden_states.shape[1], (
+            # Optimization: Use generator expression instead of list comprehension in sum() to save memory
+            assert sum(np.prod(x) for x in flatten_image_grid_thw) == hidden_states.shape[1], (
                 flatten_image_grid_thw,
                 hidden_states.shape,
             )
@@ -512,8 +514,9 @@ def forward(
 
             if use_window_attn:
                 flatten_image_grid_thw = self.flatten_list(image_grid_thw)
+                # Optimization: Use generator expression instead of list comprehension in sum() to save memory
                 assert (
-                    sum([np.prod(x.astype("float32").cpu().numpy()) for x in flatten_image_grid_thw])
+                    sum(np.prod(x.astype("float32").cpu().numpy()) for x in flatten_image_grid_thw)
                     == hidden_states.shape[1]
                 ), (flatten_image_grid_thw, hidden_states.shape)
 

diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py
@@ -638,7 +638,8 @@ def event_loop_normal(self) -> None:
             # Let the ep group run control method synchronically
             if envs.FD_ENABLE_V1_UPDATE_WEIGHTS and self.parallel_config.use_ep:
                 pendings = all_gather_values(len(self.cached_control_reqs), self.parallel_config.ep_group)
-                if all([p > 0 for p in pendings]):
+                # Optimization: Use generator expression instead of list comprehension in all() to save memory and enable short-circuiting
+                if all(p > 0 for p in pendings):
                     logger.info(f"Rank: {self.local_rank} Detected all ep ranks have pending control tasks.")
                     self.run_control_method(self.cached_control_reqs.pop(0))