Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ sc.pl.umap(adata, color='cytetype_annotation_clusters')
🚀 [Try it in Google Colab](https://colab.research.google.com/drive/1aRLsI3mx8JR8u5BKHs48YUbLsqRsh2N7?usp=sharing)

> **Note:** No API keys required for default configuration. See [custom LLM configuration](docs/configuration.md#llm-configuration) for advanced options.
>
> `run()` now handles artifact packaging and upload automatically (`vars.h5` + `obs.duckdb`) before annotation.
> Generated artifact files are kept on disk by default; use `cleanup_artifacts=True` to remove them after run completion/failure.
**Using R/Seurat?**[CyteTypeR](https://github.com/NygenAnalytics/CyteTypeR)

Expand Down
45 changes: 44 additions & 1 deletion cytetype/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,47 @@
__version__ = "0.14.0"

import requests

from .config import logger
from .main import CyteType

__all__ = ["CyteType"]
__version__ = "0.13.0"

_PYPI_JSON_URL = "https://pypi.org/pypi/cytetype/json"


def _parse_simple_version(value: str) -> tuple[int, ...] | None:
cleaned = value.lstrip("v")
parts = cleaned.split(".")
if not parts or any(not part.isdigit() for part in parts):
return None
return tuple(int(part) for part in parts)


def _warn_if_update_available() -> None:
current_version = _parse_simple_version(__version__)
if current_version is None:
return

try:
response = requests.get(_PYPI_JSON_URL, timeout=2)
response.raise_for_status()
latest_raw = response.json().get("info", {}).get("version")
if not isinstance(latest_raw, str):
return
latest_version = _parse_simple_version(latest_raw)
if latest_version is None:
return
if latest_version > current_version:
logger.warning(
"A newer CyteType version is available on PyPI ({} -> {}). "
"Run `pip install -U cytetype` to update.",
__version__,
latest_raw,
)
except Exception:
# Best-effort check: stay silent on all failures (offline, timeout, parsing, etc).
return


_warn_if_update_available()
13 changes: 12 additions & 1 deletion cytetype/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,14 @@
fetch_job_results,
wait_for_completion,
)
from .schemas import LLMModelConfig, InputData
from .schemas import (
LLMModelConfig,
InputData,
AnnotateRequest,
UploadResponse,
UploadedFiles,
UploadFileKind,
)
from .exceptions import (
CyteTypeError,
APIError,
Expand All @@ -29,6 +36,10 @@
# Schemas
"LLMModelConfig",
"InputData",
"AnnotateRequest",
"UploadResponse",
"UploadedFiles",
"UploadFileKind",
# Exceptions
"CyteTypeError",
"APIError",
Expand Down
56 changes: 56 additions & 0 deletions cytetype/api/client.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,68 @@
import time
from pathlib import Path
from typing import Any

from .transport import HTTPTransport
from .progress import ProgressDisplay
from .exceptions import JobFailedError, TimeoutError, APIError
from .schemas import UploadResponse, UploadFileKind
from ..config import logger


MAX_UPLOAD_BYTES: dict[UploadFileKind, int] = {
"obs_duckdb": 100 * 1024 * 1024, # 100MB
"vars_h5": 10 * 1024 * 1024 * 1024, # 10GB
}


def _upload_file(
base_url: str,
auth_token: str | None,
file_kind: UploadFileKind,
file_path: str,
timeout: float | tuple[float, float] = (30.0, 3600.0),
) -> UploadResponse:
path_obj = Path(file_path)
if not path_obj.is_file():
raise FileNotFoundError(f"Upload file not found: {path_obj}")

size_bytes = path_obj.stat().st_size
max_size = MAX_UPLOAD_BYTES[file_kind]
if size_bytes > max_size:
raise ValueError(
f"{file_kind} exceeds upload limit: {size_bytes} bytes > {max_size} bytes"
)

transport = HTTPTransport(base_url, auth_token)
with path_obj.open("rb") as f:
_, response = transport.post_binary(
f"upload/{file_kind}",
data=f,
timeout=timeout,
)
return UploadResponse(**response)


def upload_obs_duckdb(
base_url: str,
auth_token: str | None,
file_path: str,
timeout: float | tuple[float, float] = (30.0, 3600.0),
) -> UploadResponse:
"""Upload obs duckdb file and return upload metadata."""
return _upload_file(base_url, auth_token, "obs_duckdb", file_path, timeout=timeout)


def upload_vars_h5(
base_url: str,
auth_token: str | None,
file_path: str,
timeout: float | tuple[float, float] = (30.0, 3600.0),
) -> UploadResponse:
"""Upload vars h5 file and return upload metadata."""
return _upload_file(base_url, auth_token, "vars_h5", file_path, timeout=timeout)


def submit_annotation_job(
base_url: str,
auth_token: str | None,
Expand Down
50 changes: 49 additions & 1 deletion cytetype/api/schemas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from typing import Any, TypeAlias, Literal
from importlib.metadata import PackageNotFoundError, version
from typing import Any, Literal, TypeAlias
from pydantic import BaseModel, Field, model_validator


def _get_client_version() -> str | None:
try:
return version("cytetype")
except PackageNotFoundError:
return None


LLMProvider: TypeAlias = Literal[
"anthropic",
"bedrock",
Expand All @@ -18,6 +26,7 @@
AgentType: TypeAlias = Literal[
"contextualizer", "annotator", "reviewer", "summarizer", "clinician", "chat"
]
UploadFileKind: TypeAlias = Literal["obs_duckdb", "vars_h5"]


class LLMModelConfig(BaseModel):
Expand Down Expand Up @@ -64,6 +73,16 @@ def check_aws_credentials(self) -> "LLMModelConfig":
raise ValueError("Either apiKey or all AWS credentials must be provided")


class ClientInfo(BaseModel):
clientType: Literal["anndata"] = Field(
default="anndata", description="The type of client that is using the API"
)
clientVersion: str | None = Field(
default_factory=_get_client_version,
description="The version of the client that is using the API",
)


class InputData(BaseModel):
studyInfo: str = Field(
default="",
Expand Down Expand Up @@ -99,6 +118,10 @@ class InputData(BaseModel):
le=50,
description="Number of parallel requests to make to the model",
)
clientInfo: ClientInfo = Field(
default_factory=ClientInfo,
description="Client information",
)

@classmethod
def get_example(cls) -> "InputData":
Expand Down Expand Up @@ -171,6 +194,31 @@ def get_example(cls) -> "InputData":
)


class UploadedFiles(BaseModel):
obs_duckdb: str | None = None
vars_h5: str | None = None

@model_validator(mode="after")
def validate_at_least_one_reference(self) -> "UploadedFiles":
if self.obs_duckdb is None and self.vars_h5 is None:
raise ValueError("At least one uploaded file reference must be provided")
return self


class AnnotateRequest(BaseModel):
input_data: InputData
llm_configs: list[LLMModelConfig] | None = None
uploaded_files: UploadedFiles | None = None


class UploadResponse(BaseModel):
upload_id: str
file_kind: UploadFileKind
file_name: str
size_bytes: int
expires_at: str


# New schemas for API responses
class ErrorResponse(BaseModel):
"""Standard error response from CyteType API."""
Expand Down
34 changes: 30 additions & 4 deletions cytetype/api/transport.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import requests
from typing import Any
from typing import Any, BinaryIO

from .exceptions import create_api_exception, NetworkError, TimeoutError
from .schemas import ErrorResponse
Expand All @@ -13,13 +13,13 @@ def __init__(self, base_url: str, auth_token: str | None = None):
self.auth_token = auth_token
self.session = requests.Session()

def _build_headers(self, content_type: bool = False) -> dict[str, str]:
def _build_headers(self, content_type: str | None = None) -> dict[str, str]:
"""Build request headers with auth token."""
headers = {}
if self.auth_token:
headers["Authorization"] = f"Bearer {self.auth_token}"
if content_type:
headers["Content-Type"] = "application/json"
headers["Content-Type"] = content_type
return headers

def _parse_error(self, response: requests.Response) -> None:
Expand Down Expand Up @@ -61,7 +61,33 @@ def post(
response = self.session.post(
url,
json=data,
headers=self._build_headers(content_type=True),
headers=self._build_headers(content_type="application/json"),
timeout=timeout,
)

if not response.ok:
self._parse_error(response)

return response.status_code, response.json()

except requests.RequestException as e:
self._handle_request_error(e)
raise # For type checker

def post_binary(
self,
endpoint: str,
data: bytes | BinaryIO,
timeout: float | tuple[float, float] = (30.0, 3600.0),
) -> tuple[int, dict[str, Any]]:
"""Make POST request with raw binary body (application/octet-stream)."""
url = f"{self.base_url}/{endpoint.lstrip('/')}"

try:
response = self.session.post(
url,
data=data,
headers=self._build_headers(content_type="application/octet-stream"),
timeout=timeout,
)

Expand Down
Loading