Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .release-please-manifest.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
".": "0.39.0"
".": "0.40.0"
}
4 changes: 2 additions & 2 deletions .stats.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
configured_endpoints: 75
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-76f8801298719cc87e9dc4c64b321bcfd432416d76488499d340b4bb6bf81b9b.yml
openapi_spec_hash: ce0b83ef0a5f174461bd7d13a379b636
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-a49c3cc894ca8f25b07985c98bf7999c44ec4005a4f70d21f21a05cd20b4f017.yml
openapi_spec_hash: 2ef0cbb70708d94bff220a17bce88eca
config_hash: 52d213100a0ca1a4b2cdcd2718936b51
27 changes: 27 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,32 @@
# Changelog

## 0.40.0 (2026-04-13)

Full Changelog: [v0.39.0...v0.40.0](https://github.com/togethercomputer/together-typescript/compare/v0.39.0...v0.40.0)

### Features

* **api:** accept string URLs for audio_inputs and source_video in videos ([9313762](https://github.com/togethercomputer/together-typescript/commit/93137622f3c3fe9158c5692a69a7a191aee27d45))
* **api:** add cuda/nvidia driver fields, auto-scaling, OIDC, scheduled capacity to clusters ([881242f](https://github.com/togethercomputer/together-typescript/commit/881242f30f5436353c4c977d4a205df35aedb9d1))
* **api:** add num_workers parameter to evals model/judge requests ([c61a7c1](https://github.com/togethercomputer/together-typescript/commit/c61a7c1cc7eafa0913d853e993992961cc38ebaf))


### Bug Fixes

* **types:** rename cuda_driver_version to cuda_version in clusters ([e83d0c4](https://github.com/togethercomputer/together-typescript/commit/e83d0c40631b352675cc6bc1149e86cfd9dbbcdd))
* **types:** update driver_versions structure, require supported_instance_types in cluster regions ([d1caf1c](https://github.com/togethercomputer/together-typescript/commit/d1caf1cb6f030a67980ec7c5569fcf521745f191))


### Chores

* **internal:** codegen related update ([f9b079e](https://github.com/togethercomputer/together-typescript/commit/f9b079e57f54ed0f4e0d4671d298efef8543874d))
* **internal:** codegen related update ([4ba1386](https://github.com/togethercomputer/together-typescript/commit/4ba1386fb9a20d7cd2a33c07c5e2fbb42e80d9fe))


### Documentation

* improve examples ([bdf968a](https://github.com/togethercomputer/together-typescript/commit/bdf968a7f48622f0865040dd8f9b81d029ce8e3e))

## 0.39.0 (2026-04-03)

Full Changelog: [v0.38.0...v0.39.0](https://github.com/togethercomputer/together-typescript/compare/v0.38.0...v0.39.0)
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "together-ai",
"version": "0.39.0",
"version": "0.40.0",
"description": "The official TypeScript library for the Together API",
"author": "Together <dev-feedback@TogetherAI.com>",
"types": "dist/index.d.ts",
Expand Down
4 changes: 2 additions & 2 deletions src/internal/utils/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
*/
export const readEnv = (env: string): string | undefined => {
if (typeof (globalThis as any).process !== 'undefined') {
return (globalThis as any).process.env?.[env]?.trim() ?? undefined;
return (globalThis as any).process.env?.[env]?.trim() || undefined;
}
if (typeof (globalThis as any).Deno !== 'undefined') {
return (globalThis as any).Deno.env?.get?.(env)?.trim();
return (globalThis as any).Deno.env?.get?.(env)?.trim() || undefined;
}
return undefined;
};
119 changes: 110 additions & 9 deletions src/resources/beta/clusters/clusters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ export interface Cluster {

control_plane_nodes: Array<Cluster.ControlPlaneNode>;

driver_version: 'CUDA_12_5_555' | 'CUDA_12_6_560' | 'CUDA_12_6_565' | 'CUDA_12_8_570';

duration_hours: number;
cuda_version: string;

gpu_type: 'H100_SXM' | 'H200_SXM' | 'RTX_6000_PCI' | 'L40_PCIE' | 'B200_SXM' | 'H100_SXM_INF';

Expand All @@ -88,6 +86,8 @@ export interface Cluster {

num_gpus: number;

nvidia_driver_version: string;

region: string;

/**
Expand All @@ -107,6 +107,20 @@ export interface Cluster {
| 'Deleting';

volumes: Array<Cluster.Volume>;

capacity_pool_id?: string;

created_at?: string;

duration_hours?: number;

install_traefik?: boolean;

reservation_end_time?: string;

reservation_start_time?: string;

slurm_shm_size_gib?: number;
}

export namespace Cluster {
Expand Down Expand Up @@ -142,6 +156,8 @@ export namespace Cluster {
num_gpus: number;

status: string;

instance_id?: string;
}

export interface Volume {
Expand Down Expand Up @@ -170,9 +186,10 @@ export interface ClusterListRegionsResponse {
export namespace ClusterListRegionsResponse {
export interface Region {
/**
* List of supported identifiable driver versions available in the region.
* List of supported identifiable cuda/nvidia driver versions pairs available in
* the region.
*/
driver_versions: Array<string>;
driver_versions: Array<Region.DriverVersion>;

/**
* Identifiable name of the region.
Expand All @@ -182,7 +199,25 @@ export namespace ClusterListRegionsResponse {
/**
* List of supported identifiable gpus available in the region.
*/
supported_instance_types?: Array<string>;
supported_instance_types: Array<string>;
}

export namespace Region {
/**
* CUDA/NVIDIA driver versions pair available in the region to use in the create
* cluster request.
*/
export interface DriverVersion {
/**
* CUDA driver version.
*/
cuda_version: string;

/**
* NVIDIA driver version.
*/
nvidia_driver_version: string;
}
}
}

Expand All @@ -192,17 +227,17 @@ export interface ClusterCreateParams {
* reservation via the duration_days field. ON_DEMAND billing types will give you
* ownership of the cluster until you delete it.
*/
billing_type: 'RESERVED' | 'ON_DEMAND';
billing_type: 'RESERVED' | 'ON_DEMAND' | 'SCHEDULED_CAPACITY';

/**
* Name of the GPU cluster.
*/
cluster_name: string;

/**
* NVIDIA driver version to use in the cluster.
* CUDA version for this cluster. For example, 12.5
*/
driver_version: 'CUDA_12_5_555' | 'CUDA_12_6_560' | 'CUDA_12_6_565' | 'CUDA_12_8_570';
cuda_version: string;

/**
* Type of GPU to use in the cluster
Expand All @@ -215,12 +250,36 @@ export interface ClusterCreateParams {
*/
num_gpus: number;

/**
* Nvidia driver version for this cluster. For example, 550. Only some combination
* of cuda_version and nvidia_driver_version are supported.
*/
nvidia_driver_version: string;

/**
* Region to create the GPU cluster in. Usable regions can be found from
* `client.clusters.list_regions()`
*/
region: string;

/**
* Maximum number of GPUs to which the cluster can be auto-scaled up. This field is
* required if auto_scaled is true.
*/
auto_scale_max_gpus?: number;

/**
* Whether GPU cluster should be auto-scaled based on the workload. By default, it
* is not auto-scaled.
*/
auto_scaled?: boolean;

/**
* ID of the capacity pool to use for the cluster. This field is optional and only
* applicable if the cluster is created from a capacity pool.
*/
capacity_pool_id?: string;

/**
* Type of cluster to create.
*/
Expand All @@ -231,11 +290,47 @@ export interface ClusterCreateParams {
*/
duration_days?: number;

/**
* Whether automated GPU node failover should be enabled for this cluster. By
* default, it is disabled.
*/
gpu_node_failover_enabled?: boolean;

/**
* Whether to install Traefik ingress controller in the cluster. This field is only
* applicable for Kubernetes clusters and is false by default.
*/
install_traefik?: boolean;

/**
* Reservation end time of the cluster. This field is required for SCHEDULED
* billing to specify the reservation end time for the cluster.
*/
reservation_end_time?: string;

/**
* Reservation start time of the cluster. This field is required for SCHEDULED
* billing to specify the reservation start time for the cluster. If not provided,
* the cluster will be provisioned immediately.
*/
reservation_start_time?: string;

/**
* Inline configuration to create a shared volume with the cluster creation.
*/
shared_volume?: ClusterCreateParams.SharedVolume;

/**
* Custom Slurm image for Slurm clusters.
*/
slurm_image?: string;

/**
* Shared memory size in GiB for Slurm cluster. This field is required if
* cluster_type is SLURM.
*/
slurm_shm_size_gib?: number;

/**
* ID of an existing volume to use with the cluster creation.
*/
Expand Down Expand Up @@ -275,6 +370,12 @@ export interface ClusterUpdateParams {
* example, 8, 16 or 24
*/
num_gpus?: number;

/**
* Timestamp at which the cluster should be decommissioned. Only accepted for
* prepaid clusters.
*/
reservation_end_time?: string;
}

Clusters.Storage = Storage;
Expand Down
49 changes: 49 additions & 0 deletions src/resources/evals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,13 @@ export namespace EvalCreateParams {
* Base URL for external judge models. Must be OpenAI-compatible base URL.
*/
external_base_url?: string;

/**
* Number of concurrent workers for inference requests. Overrides the default
* concurrency for this model. Useful for tuning throughput when using proxy
* endpoints (e.g. OpenRouter) or rate-limited external APIs.
*/
num_workers?: number;
}

export interface EvaluationModelRequest {
Expand Down Expand Up @@ -501,6 +508,13 @@ export namespace EvalCreateParams {
* Base URL for external models. Must be OpenAI-compatible base URL
*/
external_base_url?: string;

/**
* Number of concurrent workers for inference requests. Overrides the default
* concurrency for this model. Useful for tuning throughput when using proxy
* endpoints (e.g. OpenRouter) or rate-limited external APIs.
*/
num_workers?: number;
}
}

Expand Down Expand Up @@ -559,6 +573,13 @@ export namespace EvalCreateParams {
* Base URL for external judge models. Must be OpenAI-compatible base URL.
*/
external_base_url?: string;

/**
* Number of concurrent workers for inference requests. Overrides the default
* concurrency for this model. Useful for tuning throughput when using proxy
* endpoints (e.g. OpenRouter) or rate-limited external APIs.
*/
num_workers?: number;
}

export interface EvaluationModelRequest {
Expand Down Expand Up @@ -601,6 +622,13 @@ export namespace EvalCreateParams {
* Base URL for external models. Must be OpenAI-compatible base URL
*/
external_base_url?: string;

/**
* Number of concurrent workers for inference requests. Overrides the default
* concurrency for this model. Useful for tuning throughput when using proxy
* endpoints (e.g. OpenRouter) or rate-limited external APIs.
*/
num_workers?: number;
}
}

Expand Down Expand Up @@ -649,6 +677,13 @@ export namespace EvalCreateParams {
* Base URL for external judge models. Must be OpenAI-compatible base URL.
*/
external_base_url?: string;

/**
* Number of concurrent workers for inference requests. Overrides the default
* concurrency for this model. Useful for tuning throughput when using proxy
* endpoints (e.g. OpenRouter) or rate-limited external APIs.
*/
num_workers?: number;
}

export interface EvaluationModelRequest {
Expand Down Expand Up @@ -691,6 +726,13 @@ export namespace EvalCreateParams {
* Base URL for external models. Must be OpenAI-compatible base URL
*/
external_base_url?: string;

/**
* Number of concurrent workers for inference requests. Overrides the default
* concurrency for this model. Useful for tuning throughput when using proxy
* endpoints (e.g. OpenRouter) or rate-limited external APIs.
*/
num_workers?: number;
}

export interface EvaluationModelRequest {
Expand Down Expand Up @@ -733,6 +775,13 @@ export namespace EvalCreateParams {
* Base URL for external models. Must be OpenAI-compatible base URL
*/
external_base_url?: string;

/**
* Number of concurrent workers for inference requests. Overrides the default
* concurrency for this model. Useful for tuning throughput when using proxy
* endpoints (e.g. OpenRouter) or rate-limited external APIs.
*/
num_workers?: number;
}
}
}
Expand Down
Loading
Loading