From f1a32b57f2e7a3ab78b1ec261ca0a15904c14da9 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Wed, 4 Feb 2026 17:43:18 -0600 Subject: [PATCH 1/2] Update RL specs --- openapi.yaml | 60 ++++++---------------------------------------------- 1 file changed, 7 insertions(+), 53 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index e5c67eb..20c4450 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7265,7 +7265,13 @@ components: schemas: RL.OptimStepBody: type: object - properties: {} + properties: + learning_rate: + description: Learning rate for this step. + type: number + default: 0.0001 + adamw_params: + $ref: '#/components/schemas/RL.AdamWOptimizerParams' RL.ForwardBackwardBody: type: object required: @@ -7462,10 +7468,6 @@ components: example: checkpoint-123 lora_config: $ref: '#/components/schemas/RL.LoraConfig' - optimizer_config: - $ref: '#/components/schemas/RL.OptimizerConfig' - lr_scheduler_config: - $ref: '#/components/schemas/RL.LRSchedulerConfig' RL.TrainingSessionStatus: description: Status of the training session default: TRAINING_SESSION_STATUS_UNSPECIFIED @@ -7499,44 +7501,6 @@ components: description: Timestamp when the training session was last updated lora_config: $ref: '#/components/schemas/RL.LoraConfig' - optimizer_config: - $ref: '#/components/schemas/RL.OptimizerConfig' - lr_scheduler_config: - $ref: '#/components/schemas/RL.LRSchedulerConfig' - RL.LRSchedulerConfig: - description: Learning rate scheduler configuration - type: object - properties: - linear: - $ref: '#/components/schemas/RL.LinearLRScheduler' - RL.LinearLRScheduler: - description: Linear learning rate scheduler configuration - type: object - properties: - params: - $ref: '#/components/schemas/RL.LinearSchedulerParams' - RL.LinearSchedulerParams: - description: Linear learning rate scheduler parameters - type: object - properties: - warmup_steps: - description: Number of warmup steps - type: integer - default: 100 - lr_min: - description: Minimum learning rate at the end of linear decay - type: number - default: 0.0 - RL.OptimizerConfig: - description: Optimizer configuration. If omitted, defaults to AdamW with default parameters. - type: object - properties: - adamw: - $ref: '#/components/schemas/RL.AdamWOptimizer' - max_grad_norm: - description: Maximum gradient norm for gradient clipping. Applies to all optimizer types. - type: number - default: 1.0 RL.LoraConfig: type: object description: LoRA adapter configuration @@ -7553,20 +7517,10 @@ components: type: number default: 0.05 description: Dropout of the LoRA adapter - RL.AdamWOptimizer: - description: AdamW optimizer configuration - type: object - properties: - params: - $ref: '#/components/schemas/RL.AdamWOptimizerParams' RL.AdamWOptimizerParams: description: AdamW optimizer parameters type: object properties: - lr: - description: Learning rate - type: number - default: 0.0001 beta1: description: First moment decay rate type: number From b1551855b477307a0c29e19a3ba4ee315287c575 Mon Sep 17 00:00:00 2001 From: Blaine Kasten Date: Wed, 4 Feb 2026 17:46:50 -0600 Subject: [PATCH 2/2] add more --- openapi.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openapi.yaml b/openapi.yaml index 20c4450..fc56ad5 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -7491,6 +7491,12 @@ components: type: string example: meta-llama/Meta-Llama-3-8B-Instruct description: Base model used for the training session + checkpoint_id: + description: Checkpoint ID to use for the training session + type: string + step: + description: Current training step + type: integer created_at: type: string format: date-time