Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
3baecaa
Added monkey patch to pass in timeout for cohere
pk-zipstack Mar 10, 2026
f909a03
[FIX] Monkey-patch litellm cohere embed handler to forward timeout
pk-zipstack Mar 11, 2026
cada9d4
[FIX] Address review: version guard skips patch, stricter test assert…
pk-zipstack Mar 11, 2026
dcd0663
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 11, 2026
079b91f
[FIX] Address SonarCloud warnings in cohere timeout patch
pk-zipstack Mar 11, 2026
b93ac45
Merge branch 'main' into fix/litellm-cohere-embed-timeout
pk-zipstack Mar 11, 2026
3f5e852
[FIX] Defer private litellm imports behind version guard
pk-zipstack Mar 12, 2026
f6181a6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 12, 2026
3a6cc0f
Apply suggestion from @hari-kuriakose
pk-zipstack Mar 12, 2026
644da84
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 12, 2026
419d5e9
Update litellm patch version guard to 1.81.7
pk-zipstack Mar 12, 2026
89b8984
[FIX] Restore missing module imports and add production path test
pk-zipstack Mar 12, 2026
7b4c442
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 12, 2026
97cc3e0
Added todo to remove this patch when the issue is fixed
pk-zipstack Mar 13, 2026
1f1fc3c
[FIX] Use DeprecationWarning for version guard skip message
pk-zipstack Mar 13, 2026
2b766f9
[FIX] Replace vacuous importlib.import_module test with sys.modules c…
pk-zipstack Mar 13, 2026
a9f662f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 13, 2026
25ed96c
Merge branch 'main' into fix/litellm-cohere-embed-timeout
pk-zipstack Mar 13, 2026
a97fd80
Merge branch 'main' into fix/litellm-cohere-embed-timeout
pk-zipstack Mar 13, 2026
94f1d65
Update unstract/sdk1/src/unstract/sdk1/patches/litellm_cohere_timeout.py
pk-zipstack Mar 13, 2026
2859264
[FIX] Mock validate_environment in sync timeout tests
pk-zipstack Mar 13, 2026
992dacd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 13, 2026
d86fac5
[FIX] Use pytest-asyncio for async test instead of asyncio.run()
pk-zipstack Mar 13, 2026
4e163dd
Commit uv.lock changes
pk-zipstack Mar 13, 2026
1c4b7ed
Merge branch 'main' into fix/litellm-cohere-embed-timeout
pk-zipstack Mar 18, 2026
1a33282
[FIX] Use logger.warning instead of DeprecationWarning for patch skip
pk-zipstack Mar 18, 2026
a71c9fe
[FIX] Remove async test cases and pytest-asyncio dependency
pk-zipstack Mar 18, 2026
136a0e0
[FIX] Restore lock files and keep pytest-asyncio in pyproject.toml
pk-zipstack Mar 19, 2026
3472494
[FIX] Regenerate sdk1 uv.lock to match pyproject.toml
pk-zipstack Mar 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions platform-service/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions prompt-service/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions unstract/filesystem/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions unstract/sdk1/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ test = [
"parameterized==0.9.0",
"pytest==8.3.3",
"pytest-mock==3.14.0",
"pytest-asyncio>=0.24.0",
"pytest-cov>=6.0.0",
"pytest-md-report>=0.6.2",
]
Expand Down
1 change: 1 addition & 0 deletions unstract/sdk1/src/unstract/sdk1/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import TYPE_CHECKING

import litellm
import unstract.sdk1.patches.litellm_cohere_timeout # noqa: F401
from llama_index.core.embeddings import BaseEmbedding
from pydantic import ValidationError
from unstract.sdk1.adapters.constants import Common
Expand Down
7 changes: 7 additions & 0 deletions unstract/sdk1/src/unstract/sdk1/patches/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""Monkey-patches for third-party library bugs.

Patches in this package are applied via side-effect imports.
Currently activated from unstract.sdk1.embedding — any code path
that reaches Bedrock Cohere embeddings without going through that
module will NOT have patches active.
"""
218 changes: 218 additions & 0 deletions unstract/sdk1/src/unstract/sdk1/patches/litellm_cohere_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
"""Monkey-patch for litellm's cohere embed handler timeout bug.

Bug: litellm.llms.cohere.embed.handler.embedding() and async_embedding()
receive a `timeout` parameter but don't forward it to client.post(),
causing "Connection timed out after None seconds" errors.

Affected litellm version: 1.81.7 (also present on latest main as of
2026-03-10).

Activation: This patch is imported as a side-effect from
unstract.sdk1.embedding. Any code path that invokes Bedrock Cohere
embeddings without going through unstract.sdk1.embedding will NOT
have this patch active.

#TODO Remove this patch when litellm ships a fix upstream.
Issue link: https://github.com/BerriAI/litellm/issues/14635
"""

import importlib.metadata
import logging

from packaging.version import Version

logger = logging.getLogger(__name__)

# --- Version guard ---
# Only apply the patch on the exact litellm version it was written for.
# Any other version (newer or older) skips the patch with a visible
# warning so engineers know to verify compatibility.
_PATCHED_LITELLM_VERSION = "1.81.7"
_litellm_version = importlib.metadata.version("litellm")
_SKIP_PATCH = Version(_litellm_version) != Version(_PATCHED_LITELLM_VERSION)
if _SKIP_PATCH:
logger.warning(
"litellm_cohere_timeout patch was SKIPPED — not applied. "
"Current litellm version: %s. "
"Patch was written for: %s. "
"Please verify the upstream fix and remove this module.",
_litellm_version,
_PATCHED_LITELLM_VERSION,
)
else:
# Private litellm imports are deferred to here so they are only
# loaded when the patch will actually be applied.
import json
from collections.abc import Callable

import httpx
import litellm
import litellm.llms.bedrock.embed.embedding as _bedrock_embed
import litellm.llms.cohere.embed.handler as _cohere_handler
from litellm.litellm_core_utils.litellm_logging import (
Logging as LiteLLMLoggingObj,
)
from litellm.llms.cohere.embed.handler import (
validate_environment,
)
from litellm.llms.cohere.embed.v1_transformation import (
CohereEmbeddingConfig,
)
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.types.llms.bedrock import CohereEmbeddingRequest
from litellm.types.utils import EmbeddingResponse

_DEFAULT_TIMEOUT = httpx.Timeout(None)

# Copied from litellm 1.81.7 cohere/embed/handler.py async_embedding().
# ONLY CHANGE: Added timeout=timeout to the client.post() call.
# Source: litellm/llms/cohere/embed/handler.py::async_embedding
async def _patched_async_embedding( # type: ignore[return] # noqa: ANN202
model: str,
data: dict | CohereEmbeddingRequest,
input: list,
model_response: litellm.utils.EmbeddingResponse,
timeout: float | httpx.Timeout | None,

Check warning on line 79 in unstract/sdk1/src/unstract/sdk1/patches/litellm_cohere_timeout.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Remove this "timeout" parameter and use a timeout context manager instead.

See more on https://sonarcloud.io/project/issues?id=Zipstack_unstract&issues=AZzcgMLHNLkPNfb8_ZCS&open=AZzcgMLHNLkPNfb8_ZCS&pullRequest=1848
logging_obj: LiteLLMLoggingObj,
optional_params: dict,
api_base: str,
api_key: str | None,
headers: dict,
encoding: Callable,
client: AsyncHTTPHandler | None = None,
):
logging_obj.pre_call(
input=input,
api_key=api_key,
additional_args={
"complete_input_dict": data,
"headers": headers,
"api_base": api_base,
},
)

if client is None:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.COHERE,
params={"timeout": timeout},
)

try:
response = await client.post(
api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout, # ONLY CHANGE: forward timeout to client
)
except httpx.HTTPStatusError as e:
logging_obj.post_call(
input=input,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=e.response.text,
)
raise e
except Exception as e:
logging_obj.post_call(
input=input,
api_key=api_key,
additional_args={"complete_input_dict": data},
original_response=str(e),
)
raise e

return CohereEmbeddingConfig()._transform_response(
response=response,
api_key=api_key,
logging_obj=logging_obj,
data=data,
model_response=model_response,
model=model,
encoding=encoding,
input=input,
)

# Copied from litellm 1.81.7 cohere/embed/handler.py embedding().
# ONLY CHANGE: Added timeout=timeout to the client.post() call.
# Source: litellm/llms/cohere/embed/handler.py::embedding
def _patched_embedding( # type: ignore[return] # noqa: ANN202
model: str,
input: list,
model_response: EmbeddingResponse,
logging_obj: LiteLLMLoggingObj,
optional_params: dict,
headers: dict,
encoding: object,
data: dict | CohereEmbeddingRequest | None = None,
complete_api_base: str | None = None,
api_key: str | None = None,
aembedding: bool | None = None,
timeout: float | httpx.Timeout | None = _DEFAULT_TIMEOUT,
client: HTTPHandler | AsyncHTTPHandler | None = None,
):
headers = validate_environment(api_key, headers=headers)
embed_url = complete_api_base or "https://api.cohere.ai/v1/embed"

data = data or CohereEmbeddingConfig()._transform_request(
model=model, input=input, inference_params=optional_params
)

if aembedding is True:
return _patched_async_embedding(
model=model,
data=data,
input=input,
model_response=model_response,
timeout=timeout,
logging_obj=logging_obj,
optional_params=optional_params,
api_base=embed_url,
api_key=api_key,
headers=headers,
encoding=encoding,
client=(
client
if client is not None and isinstance(client, AsyncHTTPHandler)
else None
),
)

logging_obj.pre_call(
input=input,
api_key=api_key,
additional_args={"complete_input_dict": data},
)

if client is None or not isinstance(client, HTTPHandler):
client = HTTPHandler(concurrent_limit=1)

response = client.post(
embed_url,
headers=headers,
data=json.dumps(data),
timeout=timeout, # ONLY CHANGE: forward timeout to client
)

return CohereEmbeddingConfig()._transform_response(
response=response,
api_key=api_key,
logging_obj=logging_obj,
data=data,
model_response=model_response,
model=model,
encoding=encoding,
input=input,
)

# Apply the monkey-patch to both the source module and any existing
# direct bindings (e.g. bedrock's `from ... import embedding as
# cohere_embedding`), since direct imports capture a reference at
# import time and won't see module-level replacements.
_cohere_handler.async_embedding = _patched_async_embedding
_cohere_handler.embedding = _patched_embedding
_bedrock_embed.cohere_embedding = _patched_embedding
logger.info("Applied litellm cohere embed timeout patch")
Empty file.
Loading
Loading