Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
eb09dfe
fix(coglet): ensure logs are under 4MiB across bridge
tempusfrangit Feb 18, 2026
e83ba91
feat(coglet): wire file-based output spilling for large IPC frames
tempusfrangit Feb 18, 2026
9e08432
feat(coglet): wire orchestrator FileOutput handling from disk
tempusfrangit Feb 18, 2026
94d2dd9
feat(coglet): stream generator yields over IPC as they happen
tempusfrangit Feb 18, 2026
b2bae4d
feat(coglet): route file outputs to disk via SlotSender
tempusfrangit Feb 18, 2026
9e916df
feat(coglet): base64 data URI fallback for file outputs in orchestrator
tempusfrangit Feb 18, 2026
653bf75
feat(coglet): add optional mime_type to FileOutput protocol
tempusfrangit Feb 18, 2026
5f7f1b6
feat(coglet): wire upload_url from CLI to orchestrator for file uploads
tempusfrangit Feb 18, 2026
09a431e
fix(coglet): fix stub generation and venv setup
tempusfrangit Feb 18, 2026
c1ff72d
chore(coglet): regenerate Python type stubs
tempusfrangit Feb 18, 2026
7629255
fix(coglet): decouple output from Done message to prevent IPC frame o…
tempusfrangit Feb 18, 2026
a46fffe
chore: go fmt
tempusfrangit Feb 19, 2026
b8ca940
fix(test): correct coglet_iterator_path_output assertion and go forma…
tempusfrangit Feb 19, 2026
33365fe
fix(coglet): complete prediction synchronously when no uploads pending
tempusfrangit Feb 19, 2026
2e9303b
chore: update llm docs
tempusfrangit Feb 19, 2026
86a7824
fix(coglet): use notify_one instead of notify_waiters to eliminate race
tempusfrangit Feb 19, 2026
64592f9
fix(build): add source caching, fix output globs, and propagate COG_C…
tempusfrangit Feb 19, 2026
9004237
fix(serve): unify cog serve build path with cog build via ExcludeSource
tempusfrangit Feb 19, 2026
4cc2f96
refactor: remove BuildBase — all dev-mode commands use Build(ExcludeS…
tempusfrangit Feb 19, 2026
ecf89ec
chore: remove unused containerInfo type (lint fix)
tempusfrangit Feb 19, 2026
9be713c
fix(run): skip schema validation for cog run
tempusfrangit Feb 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ The CLI code is in the `cmd/cog/` and `pkg/` directories. Support tooling is in

The main commands for working on the CLI are:
- `go run ./cmd/cog` - Runs the Cog CLI directly from source (requires wheel to be built first)
- `mise run build:cog` - Builds the Cog CLI binary, embedding the Python wheel
- `mise run build:cog` - Builds the Cog CLI binary
- `mise run install` - Symlinks the built binary to `/usr/local/bin` (run `build:cog` first), or to a custom path with `PREFIX=/custom/path mise run install`
- `mise run test:go` - Runs all Go unit tests
- `go test ./pkg/...` - Runs tests directly with `go test`
Expand Down Expand Up @@ -180,7 +180,7 @@ COG_BINARY=dist/go/*/cog mise run test:integration
1. Run `mise install` to set up the development environment
2. Run `mise run build:sdk` after making changes to the `./python` directory
3. Run `mise run build:coglet:wheel:linux-x64` after making changes to the `./crates` directory (needed for Docker testing)
4. Run `mise run build:cog` to build the CLI (embeds the SDK wheel; picks up coglet wheel from `dist/`)
4. Run `mise run build:cog` to build the CLI (wheels are picked up from `dist/` at Docker build time, not embedded in the binary)
5. Run `mise run fmt:fix` to format code
6. Run `mise run lint` to check code quality
7. Run `mise run docs:llm` to regenerate `docs/llms.txt` after changing `README.md` or any `docs/*.md` file
Expand Down Expand Up @@ -212,7 +212,7 @@ See `crates/README.md` for detailed architecture documentation.
- `crates/coglet-python/` - PyO3 bindings for Python predictor integration

### Key Design Patterns
1. **Embedded Python Wheel**: The Go binary embeds the Python wheel at build time (`pkg/dockerfile/embed/`)
1. **Local Wheel Resolution**: The CLI discovers SDK and coglet wheels from `dist/` at Docker build time (not embedded in the binary)
2. **Docker SDK Integration**: Uses Docker Go SDK for container operations
3. **Type Safety**: Dataclasses for Python type validation, strongly typed Go interfaces
4. **Compatibility Matrix**: Automated CUDA/PyTorch/TensorFlow compatibility management
Expand Down
18 changes: 18 additions & 0 deletions crates/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 62 additions & 0 deletions crates/coglet-python/coglet/_impl.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# This file is automatically generated by pyo3_stub_gen
# ruff: noqa: E501, F401, F403, F405

import builtins
import typing
from . import _sdk
__all__ = [
"BuildInfo",
"Server",
"server",
]

__build__: BuildInfo
__version__: builtins.str
server: Server
@typing.final
class BuildInfo:
r"""
Frozen build metadata exposed as `coglet.__build__`.
"""
@property
def version(self) -> builtins.str: ...
@property
def git_sha(self) -> builtins.str: ...
@property
def build_time(self) -> builtins.str: ...
@property
def rustc_version(self) -> builtins.str: ...
def __repr__(self) -> builtins.str: ...

@typing.final
class Server:
r"""
The coglet prediction server.

Access via `coglet.server`. Frozen — attributes cannot be set or deleted.

- `coglet.server.active` — `True` when running inside a worker subprocess
- `coglet.server.serve(...)` — start the HTTP prediction server (blocking)
"""
@property
def active(self) -> builtins.bool:
r"""
`True` when running inside a coglet worker subprocess.
"""
def serve(self, predictor_ref: typing.Optional[builtins.str] = None, host: builtins.str = '0.0.0.0', port: builtins.int = 5000, await_explicit_shutdown: builtins.bool = False, is_train: builtins.bool = False, output_temp_dir_base: builtins.str = '/tmp/coglet/output', upload_url: typing.Optional[builtins.str] = None) -> None:
r"""
Start the HTTP prediction server. Blocks until shutdown.
"""
def _run_worker(self) -> None:
r"""
Worker subprocess entry point. Called by the orchestrator.

Sets the active flag, installs log writers and audit hooks,
then enters the worker event loop.
"""
def _is_cancelable(self) -> builtins.bool:
r"""
Returns `True` if the current thread is in a cancelable predict call.
"""
def __repr__(self) -> builtins.str: ...

32 changes: 31 additions & 1 deletion crates/coglet-python/src/bin/stub_gen.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,41 @@
//! Generate Python stub files for coglet.
//!
//! Run with: cargo run --bin stub_gen
//!
//! Custom generate logic: pyo3-stub-gen places classes from the native
//! `coglet._impl` module into the `coglet` parent package, but mypy stubgen
//! overwrites `coglet/__init__.pyi` from the hand-maintained `__init__.py`.
//! We redirect the `coglet` module output to `coglet/_impl.pyi` so the
//! native module types are preserved.

use pyo3_stub_gen::Result;
use std::fs;
use std::io::Write;

fn main() -> Result<()> {
let stub = coglet::stub_info()?;
stub.generate()?;

for (name, module) in &stub.modules {
let normalized = name.replace('-', "_");

let dest = if normalized == "coglet" {
// Native module classes land here — redirect to _impl.pyi
stub.python_root.join("coglet").join("_impl.pyi")
} else {
// Submodules like "coglet._sdk" → coglet/_sdk/__init__.pyi
let path = normalized.replace('.', "/");
stub.python_root.join(&path).join("__init__.pyi")
};

let dir = dest.parent().expect("cannot get parent directory");
if !dir.exists() {
fs::create_dir_all(dir)?;
}

let mut f = fs::File::create(&dest)?;
write!(f, "{module}")?;
eprintln!("Generated stub: {}", dest.display());
}

Ok(())
}
30 changes: 27 additions & 3 deletions crates/coglet-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ use tracing_subscriber::{EnvFilter, fmt, layer::SubscriberExt, util::SubscriberI
// Define stub info gatherer for generating .pyi files
pyo3_stub_gen::define_stub_info_gatherer!(stub_info);

// Module-level attributes (pyo3-stub-gen can't see m.add() calls).
// Uses "coglet" because that's the module key in StubInfo for the native module.
pyo3_stub_gen::module_variable!("coglet", "__version__", &str);
pyo3_stub_gen::module_variable!("coglet", "__build__", BuildInfo);
pyo3_stub_gen::module_variable!("coglet", "server", CogletServer);

use coglet_core::{
Health, PredictionService, SetupResult, VersionInfo,
transport::{ServerConfig, serve as http_serve},
Expand Down Expand Up @@ -188,7 +194,8 @@ impl CogletServer {
}

/// Start the HTTP prediction server. Blocks until shutdown.
#[pyo3(signature = (predictor_ref=None, host="0.0.0.0".to_string(), port=5000, await_explicit_shutdown=false, is_train=false))]
#[allow(clippy::too_many_arguments)]
#[pyo3(signature = (predictor_ref=None, host="0.0.0.0".to_string(), port=5000, await_explicit_shutdown=false, is_train=false, output_temp_dir_base="/tmp/coglet/output".to_string(), upload_url=None))]
fn serve(
&self,
py: Python<'_>,
Expand All @@ -197,6 +204,8 @@ impl CogletServer {
port: u16,
await_explicit_shutdown: bool,
is_train: bool,
output_temp_dir_base: String,
upload_url: Option<String>,
) -> PyResult<()> {
serve_impl(
py,
Expand All @@ -205,6 +214,8 @@ impl CogletServer {
port,
await_explicit_shutdown,
is_train,
output_temp_dir_base,
upload_url,
)
}

Expand Down Expand Up @@ -254,13 +265,16 @@ impl CogletServer {
}
}

#[allow(clippy::too_many_arguments)]
fn serve_impl(
py: Python<'_>,
predictor_ref: Option<String>,
host: String,
port: u16,
await_explicit_shutdown: bool,
is_train: bool,
_output_temp_dir_base: String,
upload_url: Option<String>,
) -> PyResult<()> {
let (setup_log_tx, setup_log_rx) = tokio::sync::mpsc::unbounded_channel();
init_tracing(false, Some(setup_log_tx));
Expand Down Expand Up @@ -303,7 +317,15 @@ fn serve_impl(
};

info!(predictor_ref = %pred_ref, is_train, "Using subprocess isolation");
serve_subprocess(py, pred_ref, config, version, is_train, setup_log_rx)
serve_subprocess(
py,
pred_ref,
config,
version,
is_train,
setup_log_rx,
upload_url,
)
}

fn serve_subprocess(
Expand All @@ -313,6 +335,7 @@ fn serve_subprocess(
version: VersionInfo,
is_train: bool,
mut setup_log_rx: tokio::sync::mpsc::UnboundedReceiver<String>,
upload_url: Option<String>,
) -> PyResult<()> {
let max_concurrency = read_max_concurrency(py);
info!(
Expand All @@ -322,7 +345,8 @@ fn serve_subprocess(

let orch_config = coglet_core::orchestrator::OrchestratorConfig::new(pred_ref)
.with_num_slots(max_concurrency)
.with_train(is_train);
.with_train(is_train)
.with_upload_url(upload_url);

let service = Arc::new(
PredictionService::new_no_pool()
Expand Down
8 changes: 4 additions & 4 deletions crates/coglet-python/src/log_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ mod tests {
fn registry_operations() {
let prediction_id = "pred_123".to_string();
let (tx, _rx) = mpsc::unbounded_channel();
let sender = Arc::new(SlotSender::new(tx));
let sender = Arc::new(SlotSender::new(tx, std::env::temp_dir()));

// Register
register_prediction(prediction_id.clone(), sender.clone());
Expand All @@ -609,7 +609,7 @@ mod tests {
#[test]
fn slot_sender_sends_log() {
let (tx, mut rx) = mpsc::unbounded_channel();
let sender = SlotSender::new(tx);
let sender = SlotSender::new(tx, std::env::temp_dir());

sender.send_log(LogSource::Stdout, "hello").unwrap();

Expand All @@ -626,7 +626,7 @@ mod tests {
#[test]
fn slot_sender_ignores_empty() {
let (tx, mut rx) = mpsc::unbounded_channel();
let sender = SlotSender::new(tx);
let sender = SlotSender::new(tx, std::env::temp_dir());

sender.send_log(LogSource::Stderr, "").unwrap();

Expand All @@ -639,7 +639,7 @@ mod tests {
let (tx, rx) = mpsc::unbounded_channel::<SlotResponse>();
drop(rx); // Close receiver

let sender = SlotSender::new(tx);
let sender = SlotSender::new(tx, std::env::temp_dir());
let result = sender.send_log(LogSource::Stdout, "hello");

assert!(result.is_err());
Expand Down
Loading