togethercomputer · stainless-app · Apr 6, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 10, 2026
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.39.0"
+  ".": "0.40.0"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 75
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-76f8801298719cc87e9dc4c64b321bcfd432416d76488499d340b4bb6bf81b9b.yml
-openapi_spec_hash: ce0b83ef0a5f174461bd7d13a379b636
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-a49c3cc894ca8f25b07985c98bf7999c44ec4005a4f70d21f21a05cd20b4f017.yml
+openapi_spec_hash: 2ef0cbb70708d94bff220a17bce88eca
 config_hash: 52d213100a0ca1a4b2cdcd2718936b51
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,32 @@
 # Changelog
 
+## 0.40.0 (2026-04-13)
+
+Full Changelog: [v0.39.0...v0.40.0](https://github.com/togethercomputer/together-typescript/compare/v0.39.0...v0.40.0)
+
+### Features
+
+* **api:** accept string URLs for audio_inputs and source_video in videos ([9313762](https://github.com/togethercomputer/together-typescript/commit/93137622f3c3fe9158c5692a69a7a191aee27d45))
+* **api:** add cuda/nvidia driver fields, auto-scaling, OIDC, scheduled capacity to clusters ([881242f](https://github.com/togethercomputer/together-typescript/commit/881242f30f5436353c4c977d4a205df35aedb9d1))
+* **api:** add num_workers parameter to evals model/judge requests ([c61a7c1](https://github.com/togethercomputer/together-typescript/commit/c61a7c1cc7eafa0913d853e993992961cc38ebaf))
+
+
+### Bug Fixes
+
+* **types:** rename cuda_driver_version to cuda_version in clusters ([e83d0c4](https://github.com/togethercomputer/together-typescript/commit/e83d0c40631b352675cc6bc1149e86cfd9dbbcdd))
+* **types:** update driver_versions structure, require supported_instance_types in cluster regions ([d1caf1c](https://github.com/togethercomputer/together-typescript/commit/d1caf1cb6f030a67980ec7c5569fcf521745f191))
+
+
+### Chores
+
+* **internal:** codegen related update ([f9b079e](https://github.com/togethercomputer/together-typescript/commit/f9b079e57f54ed0f4e0d4671d298efef8543874d))
+* **internal:** codegen related update ([4ba1386](https://github.com/togethercomputer/together-typescript/commit/4ba1386fb9a20d7cd2a33c07c5e2fbb42e80d9fe))
+
+
+### Documentation
+
+* improve examples ([bdf968a](https://github.com/togethercomputer/together-typescript/commit/bdf968a7f48622f0865040dd8f9b81d029ce8e3e))
+
 ## 0.39.0 (2026-04-03)
 
 Full Changelog: [v0.38.0...v0.39.0](https://github.com/togethercomputer/together-typescript/compare/v0.38.0...v0.39.0)

diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "together-ai",
-  "version": "0.39.0",
+  "version": "0.40.0",
   "description": "The official TypeScript library for the Together API",
   "author": "Together <dev-feedback@TogetherAI.com>",
   "types": "dist/index.d.ts",

diff --git a/src/internal/utils/env.ts b/src/internal/utils/env.ts
@@ -9,10 +9,10 @@
  */
 export const readEnv = (env: string): string | undefined => {
   if (typeof (globalThis as any).process !== 'undefined') {
-    return (globalThis as any).process.env?.[env]?.trim() ?? undefined;
+    return (globalThis as any).process.env?.[env]?.trim() || undefined;
   }
   if (typeof (globalThis as any).Deno !== 'undefined') {
-    return (globalThis as any).Deno.env?.get?.(env)?.trim();
+    return (globalThis as any).Deno.env?.get?.(env)?.trim() || undefined;
   }
   return undefined;
 };
diff --git a/src/resources/beta/clusters/clusters.ts b/src/resources/beta/clusters/clusters.ts
@@ -76,9 +76,7 @@ export interface Cluster {
 
   control_plane_nodes: Array<Cluster.ControlPlaneNode>;
 
-  driver_version: 'CUDA_12_5_555' | 'CUDA_12_6_560' | 'CUDA_12_6_565' | 'CUDA_12_8_570';
-
-  duration_hours: number;
+  cuda_version: string;
 
   gpu_type: 'H100_SXM' | 'H200_SXM' | 'RTX_6000_PCI' | 'L40_PCIE' | 'B200_SXM' | 'H100_SXM_INF';
 
@@ -88,6 +86,8 @@ export interface Cluster {
 
   num_gpus: number;
 
+  nvidia_driver_version: string;
+
   region: string;
 
   /**
@@ -107,6 +107,20 @@ export interface Cluster {
     | 'Deleting';
 
   volumes: Array<Cluster.Volume>;
+
+  capacity_pool_id?: string;
+
+  created_at?: string;
+
+  duration_hours?: number;
+
+  install_traefik?: boolean;
+
+  reservation_end_time?: string;
+
+  reservation_start_time?: string;
+
+  slurm_shm_size_gib?: number;
 }
 
 export namespace Cluster {
@@ -142,6 +156,8 @@ export namespace Cluster {
     num_gpus: number;
 
     status: string;
+
+    instance_id?: string;
   }
 
   export interface Volume {
@@ -170,9 +186,10 @@ export interface ClusterListRegionsResponse {
 export namespace ClusterListRegionsResponse {
   export interface Region {
     /**
-     * List of supported identifiable driver versions available in the region.
+     * List of supported identifiable cuda/nvidia driver versions pairs available in
+     * the region.
      */
-    driver_versions: Array<string>;
+    driver_versions: Array<Region.DriverVersion>;
 
     /**
      * Identifiable name of the region.
@@ -182,7 +199,25 @@ export namespace ClusterListRegionsResponse {
     /**
      * List of supported identifiable gpus available in the region.
      */
-    supported_instance_types?: Array<string>;
+    supported_instance_types: Array<string>;
+  }
+
+  export namespace Region {
+    /**
+     * CUDA/NVIDIA driver versions pair available in the region to use in the create
+     * cluster request.
+     */
+    export interface DriverVersion {
+      /**
+       * CUDA driver version.
+       */
+      cuda_version: string;
+
+      /**
+       * NVIDIA driver version.
+       */
+      nvidia_driver_version: string;
+    }
   }
 }
 
@@ -192,17 +227,17 @@ export interface ClusterCreateParams {
    * reservation via the duration_days field. ON_DEMAND billing types will give you
    * ownership of the cluster until you delete it.
    */
-  billing_type: 'RESERVED' | 'ON_DEMAND';
+  billing_type: 'RESERVED' | 'ON_DEMAND' | 'SCHEDULED_CAPACITY';
 
   /**
    * Name of the GPU cluster.
    */
   cluster_name: string;
 
   /**
-   * NVIDIA driver version to use in the cluster.
+   * CUDA version for this cluster. For example, 12.5
    */
-  driver_version: 'CUDA_12_5_555' | 'CUDA_12_6_560' | 'CUDA_12_6_565' | 'CUDA_12_8_570';
+  cuda_version: string;
 
   /**
    * Type of GPU to use in the cluster
@@ -215,12 +250,36 @@ export interface ClusterCreateParams {
    */
   num_gpus: number;
 
+  /**
+   * Nvidia driver version for this cluster. For example, 550. Only some combination
+   * of cuda_version and nvidia_driver_version are supported.
+   */
+  nvidia_driver_version: string;
+
   /**
    * Region to create the GPU cluster in. Usable regions can be found from
    * `client.clusters.list_regions()`
    */
   region: string;
 
+  /**
+   * Maximum number of GPUs to which the cluster can be auto-scaled up. This field is
+   * required if auto_scaled is true.
+   */
+  auto_scale_max_gpus?: number;
+
+  /**
+   * Whether GPU cluster should be auto-scaled based on the workload. By default, it
+   * is not auto-scaled.
+   */
+  auto_scaled?: boolean;
+
+  /**
+   * ID of the capacity pool to use for the cluster. This field is optional and only
+   * applicable if the cluster is created from a capacity pool.
+   */
+  capacity_pool_id?: string;
+
   /**
    * Type of cluster to create.
    */
@@ -231,11 +290,47 @@ export interface ClusterCreateParams {
    */
   duration_days?: number;
 
+  /**
+   * Whether automated GPU node failover should be enabled for this cluster. By
+   * default, it is disabled.
+   */
+  gpu_node_failover_enabled?: boolean;
+
+  /**
+   * Whether to install Traefik ingress controller in the cluster. This field is only
+   * applicable for Kubernetes clusters and is false by default.
+   */
+  install_traefik?: boolean;
+
+  /**
+   * Reservation end time of the cluster. This field is required for SCHEDULED
+   * billing to specify the reservation end time for the cluster.
+   */
+  reservation_end_time?: string;
+
+  /**
+   * Reservation start time of the cluster. This field is required for SCHEDULED
+   * billing to specify the reservation start time for the cluster. If not provided,
+   * the cluster will be provisioned immediately.
+   */
+  reservation_start_time?: string;
+
   /**
    * Inline configuration to create a shared volume with the cluster creation.
    */
   shared_volume?: ClusterCreateParams.SharedVolume;
 
+  /**
+   * Custom Slurm image for Slurm clusters.
+   */
+  slurm_image?: string;
+
+  /**
+   * Shared memory size in GiB for Slurm cluster. This field is required if
+   * cluster_type is SLURM.
+   */
+  slurm_shm_size_gib?: number;
+
   /**
    * ID of an existing volume to use with the cluster creation.
    */
@@ -275,6 +370,12 @@ export interface ClusterUpdateParams {
    * example, 8, 16 or 24
    */
   num_gpus?: number;
+
+  /**
+   * Timestamp at which the cluster should be decommissioned. Only accepted for
+   * prepaid clusters.
+   */
+  reservation_end_time?: string;
 }
 
 Clusters.Storage = Storage;

diff --git a/src/resources/evals.ts b/src/resources/evals.ts
@@ -459,6 +459,13 @@ export namespace EvalCreateParams {
        * Base URL for external judge models. Must be OpenAI-compatible base URL.
        */
       external_base_url?: string;
+
+      /**
+       * Number of concurrent workers for inference requests. Overrides the default
+       * concurrency for this model. Useful for tuning throughput when using proxy
+       * endpoints (e.g. OpenRouter) or rate-limited external APIs.
+       */
+      num_workers?: number;
     }
 
     export interface EvaluationModelRequest {
@@ -501,6 +508,13 @@ export namespace EvalCreateParams {
        * Base URL for external models. Must be OpenAI-compatible base URL
        */
       external_base_url?: string;
+
+      /**
+       * Number of concurrent workers for inference requests. Overrides the default
+       * concurrency for this model. Useful for tuning throughput when using proxy
+       * endpoints (e.g. OpenRouter) or rate-limited external APIs.
+       */
+      num_workers?: number;
     }
   }
 
@@ -559,6 +573,13 @@ export namespace EvalCreateParams {
        * Base URL for external judge models. Must be OpenAI-compatible base URL.
        */
       external_base_url?: string;
+
+      /**
+       * Number of concurrent workers for inference requests. Overrides the default
+       * concurrency for this model. Useful for tuning throughput when using proxy
+       * endpoints (e.g. OpenRouter) or rate-limited external APIs.
+       */
+      num_workers?: number;
     }
 
     export interface EvaluationModelRequest {
@@ -601,6 +622,13 @@ export namespace EvalCreateParams {
        * Base URL for external models. Must be OpenAI-compatible base URL
        */
       external_base_url?: string;
+
+      /**
+       * Number of concurrent workers for inference requests. Overrides the default
+       * concurrency for this model. Useful for tuning throughput when using proxy
+       * endpoints (e.g. OpenRouter) or rate-limited external APIs.
+       */
+      num_workers?: number;
     }
   }
 
@@ -649,6 +677,13 @@ export namespace EvalCreateParams {
        * Base URL for external judge models. Must be OpenAI-compatible base URL.
        */
       external_base_url?: string;
+
+      /**
+       * Number of concurrent workers for inference requests. Overrides the default
+       * concurrency for this model. Useful for tuning throughput when using proxy
+       * endpoints (e.g. OpenRouter) or rate-limited external APIs.
+       */
+      num_workers?: number;
     }
 
     export interface EvaluationModelRequest {
@@ -691,6 +726,13 @@ export namespace EvalCreateParams {
        * Base URL for external models. Must be OpenAI-compatible base URL
        */
       external_base_url?: string;
+
+      /**
+       * Number of concurrent workers for inference requests. Overrides the default
+       * concurrency for this model. Useful for tuning throughput when using proxy
+       * endpoints (e.g. OpenRouter) or rate-limited external APIs.
+       */
+      num_workers?: number;
     }
 
     export interface EvaluationModelRequest {
@@ -733,6 +775,13 @@ export namespace EvalCreateParams {
        * Base URL for external models. Must be OpenAI-compatible base URL
        */
       external_base_url?: string;
+
+      /**
+       * Number of concurrent workers for inference requests. Overrides the default
+       * concurrency for this model. Useful for tuning throughput when using proxy
+       * endpoints (e.g. OpenRouter) or rate-limited external APIs.
+       */
+      num_workers?: number;
     }
   }
 }