From bbdd7fd47539907f00f896ece36e728d557c8f95 Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Mon, 16 Mar 2026 13:10:25 +0000 Subject: [PATCH 1/2] videos --- internal/provider/openai/cost.go | 42 ++++++++ internal/provider/openai/types.go | 15 +++ internal/server/web/proxy/middleware.go | 1 + internal/server/web/proxy/proxy.go | 9 ++ internal/server/web/proxy/video.go | 132 ++++++++++++++++++++++++ 5 files changed, 199 insertions(+) create mode 100644 internal/server/web/proxy/video.go diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index 777ab31..bd200af 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -152,6 +152,14 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ "tts-1": 0.015, "tts-1-hd": 0.03, }, + "video": { // $ per sec + "sora-2": 0.1, + "sora-2-pro": 0.30, + "sora-2-720": 0.1, + "sora-2-pro-720": 0.30, + "sora-2-pro-1024": 0.5, + "sora-2-pro-1080": 0.7, + }, "completion": { "gpt-image-1.5": 0.010, "chatgpt-image-latest": 0.010, @@ -769,6 +777,40 @@ func (ce *CostEstimator) EstimateResponseApiToolCreateContainerCost(req *Respons return totalCost, nil } +func (ce *CostEstimator) EstimateVideoCost(metadata *VideoResponseMetadata) (float64, error) { + if metadata == nil { + return 0, errors.New("metadata is nil") + } + costMap, ok := ce.tokenCostMap["video"] + if !ok { + return 0, errors.New("video cost map is not provided") + } + model := metadata.Model + size, err := normalizedVideoSize(metadata.Size) + if err != nil { + return 0, err + } + costKey := fmt.Sprintf("%s-%s", model, size) + cost, ok := costMap[costKey] + if !ok { + return 0, errors.New("model with provided size is not present in the video cost map") + } + return cost * metadata.GetSecondsAsFloat(), nil +} + +func normalizedVideoSize(size string) (string, error) { + switch size { + case "720x1280", "1280x720": + return "720", nil + case "1024x1792", "1792x1024": + return "1024", nil + case "1080x1920", "1920x1080": + return "1080", nil + default: + return "", errors.New("size is not valid") + } +} + var reasoningModelPrefix = []string{"gpt-5", "o1", "o2", "o3"} func extendedToolType(toolType, model string) string { diff --git a/internal/provider/openai/types.go b/internal/provider/openai/types.go index 3f2dae3..299f947 100644 --- a/internal/provider/openai/types.go +++ b/internal/provider/openai/types.go @@ -1,5 +1,7 @@ package openai +import "strconv" + type ResponseRequest struct { Background *bool `json:"background,omitzero"` Conversation *any `json:"conversation,omitzero"` @@ -89,3 +91,16 @@ type ImageResponseMetadata struct { Size string `json:"size,omitempty"` Usage ImageResponseUsage `json:"usage,omitempty"` } + +type VideoResponseMetadata struct { + Model string `json:"model,omitempty"` + Size string `json:"size,omitempty"` + Seconds string `json:"seconds,omitempty"` +} + +func (v *VideoResponseMetadata) GetSecondsAsFloat() float64 { + if secondsFloat, err := strconv.ParseFloat(v.Seconds, 64); err == nil { + return secondsFloat + } + return 0 +} diff --git a/internal/server/web/proxy/middleware.go b/internal/server/web/proxy/middleware.go index a994f72..ffed4c5 100644 --- a/internal/server/web/proxy/middleware.go +++ b/internal/server/web/proxy/middleware.go @@ -61,6 +61,7 @@ type estimator interface { EstimateResponseApiTotalCost(model string, usage responsesOpenai.ResponseUsage) (float64, error) EstimateResponseApiToolCallsCost(tools []responsesOpenai.ToolUnion, model string) (float64, error) EstimateResponseApiToolCreateContainerCost(req *openai.ResponseRequest) (float64, error) + EstimateVideoCost(metadata *openai.VideoResponseMetadata) (float64, error) } type azureEstimator interface { diff --git a/internal/server/web/proxy/proxy.go b/internal/server/web/proxy/proxy.go index 3a6cb82..4740867 100644 --- a/internal/server/web/proxy/proxy.go +++ b/internal/server/web/proxy/proxy.go @@ -104,6 +104,15 @@ func NewProxyServer(log *zap.Logger, mode, privacyMode string, c cache, m KeyMan router.POST("/api/providers/openai/v1/audio/transcriptions", getTranscriptionsHandler(prod, client, e)) router.POST("/api/providers/openai/v1/audio/translations", getTranslationsHandler(prod, client, e)) + // videos + router.POST("/api/providers/openai/v1/videos", getVideoHandler(prod, client, e)) + router.POST("/api/providers/openai/v1/videos/edits", getVideoHandler(prod, client, e)) + router.POST("/api/providers/openai/v1/videos/extensions", getVideoHandler(prod, client, e)) + router.GET("/api/providers/openai/v1/videos/:video_id", getVideoHandler(prod, client, e)) + router.DELETE("/api/providers/openai/v1/videos/:video_id", getVideoHandler(prod, client, e)) + router.POST("/api/providers/openai/v1/videos/:video_id/remix", getVideoHandler(prod, client, e)) + router.GET("/api/providers/openai/v1/videos/:video_id/content", getVideoHandler(prod, client, e)) + // completions router.POST("/api/providers/openai/v1/chat/completions", getChatCompletionHandler(prod, private, client, e)) diff --git a/internal/server/web/proxy/video.go b/internal/server/web/proxy/video.go new file mode 100644 index 0000000..5854c59 --- /dev/null +++ b/internal/server/web/proxy/video.go @@ -0,0 +1,132 @@ +package proxy + +import ( + "context" + "encoding/json" + "errors" + "io" + "net/http" + "strings" + "time" + + "github.com/bricks-cloud/bricksllm/internal/provider/openai" + "github.com/bricks-cloud/bricksllm/internal/telemetry" + "github.com/bricks-cloud/bricksllm/internal/util" + "github.com/gin-gonic/gin" + goopenai "github.com/sashabaranov/go-openai" +) + +func getVideoHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc { + return func(ginCtx *gin.Context) { + log := util.GetLogFromCtx(ginCtx) + telemetry.Incr("bricksllm.proxy.get_responses_handler.requests", nil, 1) + + if ginCtx == nil || ginCtx.Request == nil { + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] context is empty") + return + } + + ctx, cancel := context.WithTimeout(ginCtx.Request.Context(), ginCtx.GetDuration("requestTimeout")) + defer cancel() + + videoURL, err := constructVideoURL(ginCtx.Request.URL.Path) + if err != nil { + logError(log, "failed to construct video URL", prod, err) + JSON(ginCtx, http.StatusBadRequest, "[BricksLLM] invalid video request") + return + } + + req, err := http.NewRequestWithContext(ctx, ginCtx.Request.Method, videoURL, ginCtx.Request.Body) + if err != nil { + logError(log, "error when creating openai http request", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to create openai http request") + return + } + + copyHttpHeaders(ginCtx.Request, req, ginCtx.GetBool("removeUserAgent")) + + start := time.Now() + res, err := client.Do(req) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_video_handler.http_client_error", nil, 1) + + logError(log, "error when sending http request to openai", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to send http request to openai") + return + } + defer res.Body.Close() + + for name, values := range res.Header { + for _, value := range values { + ginCtx.Header(name, value) + } + } + + if res.StatusCode != http.StatusOK { + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_video_handler.error_latency", dur, nil, 1) + telemetry.Incr("bricksllm.proxy.get_video_handler.error_response", nil, 1) + + bytes, err2 := io.ReadAll(res.Body) + if err2 != nil { + logError(log, "error when reading openai http video response body", prod, err2) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + errorRes := &goopenai.ErrorResponse{} + err2 = json.Unmarshal(bytes, errorRes) + if err2 != nil { + logError(log, "error when unmarshalling openai video error response body", prod, err2) + } + + logOpenAiError(log, prod, errorRes) + + ginCtx.Data(res.StatusCode, "application/json", bytes) + return + } + + dur := time.Since(start) + telemetry.Timing("bricksllm.proxy.get_video_handler.latency", dur, nil, 1) + + bytes, err := io.ReadAll(res.Body) + if err != nil { + logError(log, "error when reading openai http video response body", prod, err) + JSON(ginCtx, http.StatusInternalServerError, "[BricksLLM] failed to read openai response body") + return + } + + var cost float64 = 0 + respMetadata := &openai.VideoResponseMetadata{} + telemetry.Incr("bricksllm.proxy.get_video_handler.success", nil, 1) + telemetry.Timing("bricksllm.proxy.get_video_handler.success_latency", dur, nil, 1) + + err = json.Unmarshal(bytes, respMetadata) + if err != nil { + logError(log, "error when unmarshalling openai http video response body", prod, err) + } + + isPaidRequest := ginCtx.Request.Method == http.MethodPost + if err == nil && isPaidRequest { + cost, err = e.EstimateVideoCost(respMetadata) + if err != nil { + telemetry.Incr("bricksllm.proxy.get_video_handler.estimate_cost_error", nil, 1) + logError(log, "error when estimating video cost", prod, err) + } + } + ginCtx.Set("costInUsd", cost) + ginCtx.Data(res.StatusCode, "application/json", bytes) + return + } +} + +func constructVideoURL(fullPath string) (string, error) { + if fullPath == "" { + return "", errors.New("empty full path") + } + if !strings.HasPrefix(fullPath, "/api/providers/openai") { + return "", errors.New("invalid path prefix") + } + path := strings.TrimPrefix(fullPath, "/api/providers/openai") + return "https://api.openai.com" + path, nil +} From c18dbd2e531ad37967a6bfbfeb64c1705750111f Mon Sep 17 00:00:00 2001 From: Sergei Bronnikov Date: Mon, 16 Mar 2026 15:04:03 +0000 Subject: [PATCH 2/2] fix video resp. audio models --- internal/provider/openai/cost.go | 11 +++++++++-- internal/server/web/proxy/video.go | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/internal/provider/openai/cost.go b/internal/provider/openai/cost.go index bd200af..fb9699f 100644 --- a/internal/provider/openai/cost.go +++ b/internal/provider/openai/cost.go @@ -149,8 +149,15 @@ var OpenAiPerThousandTokenCost = map[string]map[string]float64{ }, "audio": { "whisper-1": 0.006, - "tts-1": 0.015, - "tts-1-hd": 0.03, + + "tts-1": 0.015, + "tts-1-hd": 0.03, + + "gpt-4o-transcribe": 0.006, + "gpt-4o-transcribe-diarize": 0.006, + "gpt-4o-mini-transcribe": 0.003, + + "gpt-4o-mini-tts": 0.012, }, "video": { // $ per sec "sora-2": 0.1, diff --git a/internal/server/web/proxy/video.go b/internal/server/web/proxy/video.go index 5854c59..7e35130 100644 --- a/internal/server/web/proxy/video.go +++ b/internal/server/web/proxy/video.go @@ -115,7 +115,7 @@ func getVideoHandler(prod bool, client http.Client, e estimator) gin.HandlerFunc } } ginCtx.Set("costInUsd", cost) - ginCtx.Data(res.StatusCode, "application/json", bytes) + ginCtx.Data(res.StatusCode, res.Header.Get("Content-Type"), bytes) return } }