diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py index 2bee885ff43..b35d97d7660 100644 --- a/fastdeploy/model_executor/layers/linear.py +++ b/fastdeploy/model_executor/layers/linear.py @@ -82,10 +82,17 @@ def process_loaded_weights(self, layer, weights) -> None: layer.weight.set_value(weights) def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor: - linear_out = paddle.matmul(x, layer.weight) if layer.with_bias: - linear_out = paddle.add(linear_out, layer.bias) - return linear_out + bias = layer.bias + assert bias.dim() == 1 and bias.shape[-1] == layer.weight.shape[-1], ( + f"bias must be 1D with size equal to the last dim of weight, " + f"but got bias.shape={bias.shape}, weight.shape[-1]={layer.weight.shape[-1]}" + ) + out = paddle.nn.functional.linear(x, layer.weight, bias) + else: + out = paddle.matmul(x, layer.weight) + + return out class LinearBase(nn.Layer): diff --git a/tests/e2e/utils/rollout_routing_replay_test_utils.py b/tests/e2e/utils/rollout_routing_replay_test_utils.py index 4186a71649a..74af852a292 100644 --- a/tests/e2e/utils/rollout_routing_replay_test_utils.py +++ b/tests/e2e/utils/rollout_routing_replay_test_utils.py @@ -157,10 +157,10 @@ def check_routing_replay_chat_completion(openai_client, moe_layer_num: int, mode model_path = os.getenv("MODEL_PATH") if model_path: baseline_path = os.path.join( - model_path, f"R3_BaseLine_dev_uint8_0402/routing_replay_output_baseline_{model_name}" + model_path, f"R3_BaseLine_dev_uint8_0403/routing_replay_output_baseline_{model_name}" ) else: - baseline_path = f"./R3_BaseLine_dev_uint8_0402/routing_replay_output_baseline_{model_name}" + baseline_path = f"./R3_BaseLine_dev_uint8_0403/routing_replay_output_baseline_{model_name}" stream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_stream") nonstream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_nonstream")