Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion fastdeploy/benchmarks/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,8 @@ def calculate_metrics(
slo_values.append(goodput_config_dict["e2el"] / MILLISECONDS_TO_SECONDS_CONVERSION)

for req_metric in zip(*valid_metrics):
is_good_req = all([s >= r for s, r in zip(slo_values, req_metric)])
# Optimization: Use generator expression instead of list comprehension in all() to save memory and enable short-circuiting
is_good_req = all(s >= r for s, r in zip(slo_values, req_metric))
if is_good_req:
good_completed += 1

Expand Down
3 changes: 2 additions & 1 deletion fastdeploy/cache_manager/cache_messager.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,8 @@ def main():
gpu_cache_kvs[f"value_cache_scales_{i}_rank{rank}_device{device}"],
f"value_cache_scales_{i}_rank{rank}.device{device}",
)
cache_kv_size_byte = sum([tmp.numel() * 1 for key, tmp in gpu_cache_kvs.items()])
# Optimization: Use generator expression instead of list comprehension in sum() to save memory
cache_kv_size_byte = sum(tmp.numel() * 1 for key, tmp in gpu_cache_kvs.items())
logger.info(f"device :{device}")
logger.info(f"cache_kv_size_byte : {cache_kv_size_byte}")
logger.info(f"done init cache (full) gmem alloc : {memory_allocated}")
Expand Down
3 changes: 2 additions & 1 deletion fastdeploy/engine/resource_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,8 @@ def allocate_resources_for_new_tasks(self, tasks):
break

# record batch size here
num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
# Optimization: Use generator expression instead of list comprehension in sum() to save memory
num_blocks_used_by_tasks = sum(len(task.block_tables) if task else 0 for task in self.tasks_list)
main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks)
main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())
Expand Down
6 changes: 4 additions & 2 deletions fastdeploy/engine/sched/resource_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -945,7 +945,8 @@ def _allocate_decode_and_extend():
len(self.running)
+ len(self.to_be_rescheduled_request_id_set)
+ len(self.to_be_aborted_req_id_set)
+ sum([req.status == RequestStatus.PREEMPTED for req in self.waiting])
# Optimization: Use generator expression instead of list comprehension in sum() to save memory
+ sum(req.status == RequestStatus.PREEMPTED for req in self.waiting)
>= self.max_num_seqs
):
break
Expand Down Expand Up @@ -1558,7 +1559,8 @@ def clear_data(self):

def update_metrics(self, verbose=False):
# Update metrics
num_tasks = sum([1 if task else 0 for task in self.tasks_list])
# Optimization: Use generator expression instead of list comprehension in sum() to save memory
num_tasks = sum(1 if task else 0 for task in self.tasks_list)
blocks_used_by_tasks = set()
for task in self.tasks_list:
if task is not None:
Expand Down
3 changes: 2 additions & 1 deletion fastdeploy/entrypoints/cli/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ def print_separator(title=""):
print(f"\n{'='*50}")

# 检查参数
if not any([args.encode, args.decode, args.vocab_size, args.info, args.vocab_export]):
# Optimization: Use generator expression instead of list comprehension in any() to save memory and enable short-circuiting
if not any((args.encode, args.decode, args.vocab_size, args.info, args.vocab_export)):
print("请至少指定一个参数:--encode, --decode, --vocab-size, --info, --vocab-export")
return

Expand Down
9 changes: 6 additions & 3 deletions fastdeploy/model_executor/models/paddleocr_vl/siglip.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,8 @@ def forward(
assert batch_size == 1
start = 0

assert sum([np.prod(x) for x in flatten_image_grid_thw]) == embeddings.shape[1], (
# Optimization: Use generator expression instead of list comprehension in sum() to save memory
assert sum(np.prod(x) for x in flatten_image_grid_thw) == embeddings.shape[1], (
flatten_image_grid_thw,
embeddings.shape,
)
Expand Down Expand Up @@ -466,7 +467,8 @@ def forward(
if use_rope is True:
flatten_image_grid_thw = self.flatten_list(image_grid_thw)
flatten_image_grid_thw = np.array(flatten_image_grid_thw)
assert sum([np.prod(x) for x in flatten_image_grid_thw]) == hidden_states.shape[1], (
# Optimization: Use generator expression instead of list comprehension in sum() to save memory
assert sum(np.prod(x) for x in flatten_image_grid_thw) == hidden_states.shape[1], (
flatten_image_grid_thw,
hidden_states.shape,
)
Expand Down Expand Up @@ -512,8 +514,9 @@ def forward(

if use_window_attn:
flatten_image_grid_thw = self.flatten_list(image_grid_thw)
# Optimization: Use generator expression instead of list comprehension in sum() to save memory
assert (
sum([np.prod(x.astype("float32").cpu().numpy()) for x in flatten_image_grid_thw])
sum(np.prod(x.astype("float32").cpu().numpy()) for x in flatten_image_grid_thw)
== hidden_states.shape[1]
), (flatten_image_grid_thw, hidden_states.shape)

Expand Down
3 changes: 2 additions & 1 deletion fastdeploy/worker/worker_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,8 @@ def event_loop_normal(self) -> None:
# Let the ep group run control method synchronically
if envs.FD_ENABLE_V1_UPDATE_WEIGHTS and self.parallel_config.use_ep:
pendings = all_gather_values(len(self.cached_control_reqs), self.parallel_config.ep_group)
if all([p > 0 for p in pendings]):
# Optimization: Use generator expression instead of list comprehension in all() to save memory and enable short-circuiting
if all(p > 0 for p in pendings):
logger.info(f"Rank: {self.local_rank} Detected all ep ranks have pending control tasks.")
self.run_control_method(self.cached_control_reqs.pop(0))

Expand Down
Loading