diff --git a/.env.example b/.env.example
index f24a9ec..584f217 100644
--- a/.env.example
+++ b/.env.example
@@ -25,6 +25,9 @@ MODELS_DIR=./models
 #
 # Override the primary DiT GGUF only if you want a non-default model:
 # ACESTEP_MODEL=/path/to/models/acestep-v15-turbo-Q8_0.gguf
+#
+# Override the base DiT GGUF used for lego mode (auto-detected from models/ by default):
+# ACESTEP_BASE_MODEL=/path/to/models/acestep-v15-base-Q8_0.gguf
 
 # Mode 2 (advanced): Connect to a separately running acestep-cpp HTTP server.
 #   Leave the above unset and configure ACESTEP_API_URL instead.
diff --git a/App.tsx b/App.tsx
index 80d27b0..0572e5b 100644
--- a/App.tsx
+++ b/App.tsx
@@ -23,6 +23,7 @@ import { SearchPage } from './components/SearchPage';
 import { NewsPage } from './components/NewsPage';
 import { ModelManager } from './components/ModelManager';
 import { ConfirmDialog } from './components/ConfirmDialog';
+import { DebugPanel } from './components/DebugPanel';
 
 
 function AppContent() {
@@ -215,7 +216,8 @@ function AppContent() {
   const handleBackFromProfile = () => {
     setViewingUsername(null);
     setCurrentView('create');
-    window.history.pushState({}, '', '/');
+    const wid = new URLSearchParams(window.location.search).get('wid');
+    window.history.pushState({}, '', wid ? `/?wid=${wid}` : '/');
   };
 
   // Navigate to Song Handler
@@ -229,7 +231,8 @@ function AppContent() {
   const handleBackFromSong = () => {
     setViewingSongId(null);
     setCurrentView('create');
-    window.history.pushState({}, '', '/');
+    const wid = new URLSearchParams(window.location.search).get('wid');
+    window.history.pushState({}, '', wid ? `/?wid=${wid}` : '/');
   };
 
   // Theme Effect
@@ -290,6 +293,8 @@ function AppContent() {
         setCurrentView('news');
       } else if (path === '/models') {
         setCurrentView('models');
+      } else if (path === '/debug') {
+        setCurrentView('debug');
       }
     };
 
@@ -736,6 +741,9 @@ function AppContent() {
           cleanupJob(jobId, tempId);
           console.error(`Job ${jobId} failed:`, status.error);
           showToast(`${t('generationFailed')}: ${status.error || 'Unknown error'}`, 'error');
+          // Auto-open debug view so the user can inspect the logs
+          setCurrentView('debug');
+          window.history.pushState({}, '', '/debug');
         }
       } catch (pollError) {
         console.error(`Polling error for job ${jobId}:`, pollError);
@@ -1305,6 +1313,13 @@ function AppContent() {
       case 'news':
         return <NewsPage />;
 
+      case 'debug':
+        return (
+          <div className="flex-1 relative overflow-hidden">
+            <DebugPanel />
+          </div>
+        );
+
       case 'create':
       default:
         return (
@@ -1402,7 +1417,8 @@ function AppContent() {
             setCurrentView(v);
             if (v === 'create') {
               setMobileShowList(false);
-              window.history.pushState({}, '', '/');
+              const wid = new URLSearchParams(window.location.search).get('wid');
+              window.history.pushState({}, '', wid ? `/?wid=${wid}` : '/');
             } else if (v === 'library') {
               window.history.pushState({}, '', '/library');
             } else if (v === 'models') {
@@ -1411,6 +1427,8 @@ function AppContent() {
               window.history.pushState({}, '', '/search');
             } else if (v === 'news') {
               window.history.pushState({}, '', '/news');
+            } else if (v === 'debug') {
+              window.history.pushState({}, '', '/debug');
             }
             if (isMobile) setShowLeftSidebar(false);
           }}
diff --git a/components/CreatePanel.tsx b/components/CreatePanel.tsx
index b9a2a8f..925c4ae 100644
--- a/components/CreatePanel.tsx
+++ b/components/CreatePanel.tsx
@@ -1,5 +1,5 @@
 import React, { useState, useEffect, useRef, useCallback, useMemo } from 'react';
-import { Sparkles, ChevronDown, Settings2, Trash2, Music2, Sliders, Dices, Hash, RefreshCw, Plus, Upload, Play, Pause, Loader2, AlertTriangle, CheckCircle2, ExternalLink } from 'lucide-react';
+import { Sparkles, ChevronDown, Settings2, Trash2, Music2, Sliders, Dices, Hash, RefreshCw, Plus, Upload, Play, Pause, Loader2, AlertTriangle, CheckCircle2, ExternalLink, Info } from 'lucide-react';
 import { GenerationParams, Song } from '../types';
 import { useAuth } from '../context/AuthContext';
 import { useI18n } from '../context/I18nContext';
@@ -133,29 +133,47 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
   }, []);
 
   // Mode
-  const [customMode, setCustomMode] = useState(true);
+  // Unified mode: always use the full-featured panel (no simple/custom split)
+  const customMode = true;
+
+  // Workspace ID: read from URL ?wid= query param, generate if absent
+  const workspaceId = useMemo(() => {
+    const params = new URLSearchParams(window.location.search);
+    let wid = params.get('wid');
+    if (!wid) {
+      wid = (typeof crypto !== 'undefined' && crypto.randomUUID ? crypto.randomUUID() : (Date.now().toString(36) + Math.random().toString(36).slice(2, 12)));
+      const newParams = new URLSearchParams(window.location.search);
+      newParams.set('wid', wid);
+      window.history.replaceState({}, '', window.location.pathname + '?' + newParams.toString());
+    }
+    return wid;
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []); // run once on mount
 
-  // Simple Mode
-  const [songDescription, setSongDescription] = useState('');
+  // Load persisted settings once at mount (before any useState calls)
+  const savedSettings = useMemo(() => {
+    try { return JSON.parse(localStorage.getItem('ace-settings-' + workspaceId) || '{}'); } catch { return {}; }
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []); // empty dep array: run once on mount only
 
   // Custom Mode
-  const [lyrics, setLyrics] = useState('');
-  const [style, setStyle] = useState('');
-  const [title, setTitle] = useState('');
+  const [lyrics, setLyrics] = useState<string>(savedSettings.lyrics ?? '');
+  const [style, setStyle] = useState<string>(savedSettings.style ?? '');
+  const [title, setTitle] = useState<string>(savedSettings.title ?? '');
 
   // Common
-  const [instrumental, setInstrumental] = useState(false);
-  const [vocalLanguage, setVocalLanguage] = useState('en');
-  const [vocalGender, setVocalGender] = useState<'male' | 'female' | ''>('');
+  const [instrumental, setInstrumental] = useState<boolean>(savedSettings.instrumental ?? false);
+  const [vocalLanguage, setVocalLanguage] = useState<string>(savedSettings.vocalLanguage ?? 'en');
+  const [vocalGender, setVocalGender] = useState<'male' | 'female' | ''>(savedSettings.vocalGender ?? '');
 
   // Music Parameters
-  const [bpm, setBpm] = useState(0);
-  const [keyScale, setKeyScale] = useState('');
-  const [timeSignature, setTimeSignature] = useState('');
+  const [bpm, setBpm] = useState<number>(savedSettings.bpm ?? 0);
+  const [keyScale, setKeyScale] = useState<string>(savedSettings.keyScale ?? '');
+  const [timeSignature, setTimeSignature] = useState<string>(savedSettings.timeSignature ?? '');
 
   // Advanced Settings
-  const [showAdvanced, setShowAdvanced] = useState(false);
-  const [duration, setDuration] = useState(-1);
+  const [showAdvanced, setShowAdvanced] = useState<boolean>(false);
+  const [duration, setDuration] = useState<number>(savedSettings.duration ?? -1);
   const [batchSize, setBatchSize] = useState(() => {
     const stored = localStorage.getItem('ace-batchSize');
     return stored ? Number(stored) : 1;
@@ -164,29 +182,33 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
     const stored = localStorage.getItem('ace-bulkCount');
     return stored ? Number(stored) : 1;
   });
-  const [guidanceScale, setGuidanceScale] = useState(9.0);
-  const [randomSeed, setRandomSeed] = useState(true);
-  const [seed, setSeed] = useState(-1);
-  const [thinking, setThinking] = useState(false); // Default false for GPU compatibility
-  const [enhance, setEnhance] = useState(false); // AI Enhance: uses LLM to enrich caption & generate metadata
-  const [audioFormat, setAudioFormat] = useState<'mp3' | 'flac'>('mp3');
-  const [inferenceSteps, setInferenceSteps] = useState(12);
-  const [inferMethod, setInferMethod] = useState<'ode' | 'sde'>('ode');
-  const [lmBackend, setLmBackend] = useState<'pt' | 'vllm'>('pt');
+  const [guidanceScale, setGuidanceScale] = useState<number>(savedSettings.guidanceScale ?? 9.0);
+  const [randomSeed, setRandomSeed] = useState<boolean>(savedSettings.randomSeed ?? true);
+  const [seed, setSeed] = useState<number>(savedSettings.seed ?? -1);
+  const [thinking, setThinking] = useState<boolean>(savedSettings.thinking ?? false); // Default false for GPU compatibility
+  const [enhance, setEnhance] = useState<boolean>(savedSettings.enhance ?? false); // AI Enhance: uses LLM to enrich caption & generate metadata
+  const [audioFormat, setAudioFormat] = useState<'wav' | 'mp3'>(() => {
+    const saved = savedSettings.audioFormat;
+    return (saved === 'wav' || saved === 'mp3') ? saved : 'mp3';
+  });
+  const [inferenceSteps, setInferenceSteps] = useState<number>(savedSettings.inferenceSteps ?? 12);
+  const [inferMethod, setInferMethod] = useState<'ode' | 'sde'>(savedSettings.inferMethod ?? 'ode');
+  const [lmBackend, setLmBackend] = useState<'pt' | 'vllm'>(savedSettings.lmBackend ?? 'pt');
   const [lmModel, setLmModel] = useState(() => {
     return localStorage.getItem('ace-lmModel') || 'acestep-5Hz-lm-0.6B';
   });
-  const [shift, setShift] = useState(3.0);
+  const [shift, setShift] = useState<number>(savedSettings.shift ?? 3.0);
 
   // LM Parameters (under Expert)
   const [showLmParams, setShowLmParams] = useState(false);
-  const [lmTemperature, setLmTemperature] = useState(0.8);
-  const [lmCfgScale, setLmCfgScale] = useState(2.2);
-  const [lmTopK, setLmTopK] = useState(0);
-  const [lmTopP, setLmTopP] = useState(0.92);
-  const [lmNegativePrompt, setLmNegativePrompt] = useState('NO USER INPUT');
+  const [lmTemperature, setLmTemperature] = useState<number>(savedSettings.lmTemperature ?? 0.8);
+  const [lmCfgScale, setLmCfgScale] = useState<number>(savedSettings.lmCfgScale ?? 2.2);
+  const [lmTopK, setLmTopK] = useState<number>(savedSettings.lmTopK ?? 0);
+  const [lmTopP, setLmTopP] = useState<number>(savedSettings.lmTopP ?? 0.92);
+  const [lmNegativePrompt, setLmNegativePrompt] = useState<string>(savedSettings.lmNegativePrompt ?? 'NO USER INPUT');
 
   // Expert Parameters (now in Advanced section)
+  // Note: audio URLs are NOT persisted — they may point to deleted/temporary files
   const [referenceAudioUrl, setReferenceAudioUrl] = useState('');
   const [sourceAudioUrl, setSourceAudioUrl] = useState('');
   const [referenceAudioTitle, setReferenceAudioTitle] = useState('');
@@ -195,8 +217,8 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
   const [repaintingStart, setRepaintingStart] = useState(0);
   const [repaintingEnd, setRepaintingEnd] = useState(-1);
   const [instruction, setInstruction] = useState('Fill the audio semantic mask based on the given conditions:');
-  const [audioCoverStrength, setAudioCoverStrength] = useState(1.0);
-  const [taskType, setTaskType] = useState('text2music');
+  const [audioCoverStrength, setAudioCoverStrength] = useState<number>(savedSettings.audioCoverStrength ?? 1.0);
+  const [taskType, setTaskType] = useState<string>(savedSettings.taskType ?? 'text2music');
   const [useAdg, setUseAdg] = useState(false);
   const [cfgIntervalStart, setCfgIntervalStart] = useState(0.0);
   const [cfgIntervalEnd, setCfgIntervalEnd] = useState(1.0);
@@ -211,9 +233,14 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
   const [getLrc, setGetLrc] = useState(false);
   const [scoreScale, setScoreScale] = useState(0.5);
   const [lmBatchChunkSize, setLmBatchChunkSize] = useState(8);
-  const [trackName, setTrackName] = useState('');
-  const [completeTrackClasses, setCompleteTrackClasses] = useState('');
+  const [trackName, setTrackName] = useState<string>(savedSettings.trackName ?? '');
+  const [completeTrackClasses, setCompleteTrackClasses] = useState<string>(savedSettings.completeTrackClasses ?? '');
   const [isFormatCaption, setIsFormatCaption] = useState(false);
+  // Parsed array — memoised so the split doesn't run on every render
+  const completeTrackClassesParsed = useMemo(
+    () => completeTrackClasses.split(',').map(s => s.trim()).filter(Boolean),
+    [completeTrackClasses]
+  );
   const [maxDurationWithLm, setMaxDurationWithLm] = useState(240);
   const [maxDurationWithoutLm, setMaxDurationWithoutLm] = useState(240);
 
@@ -242,6 +269,9 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
   // The SFT model GGUF file to download when not present (Q8_0 is the default quality tier)
   const SFT_MODEL_FILE = 'acestep-v15-sft-Q8_0.gguf';
 
+  // The base DiT model name — required for lego mode
+  const BASE_MODEL_NAME = 'acestep-v15-base';
+
   // Fallback model list when backend is unavailable
   const availableModels = useMemo(() => {
     if (fetchedModels.length > 0) {
@@ -280,11 +310,22 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
     return modelId.includes('sft');
   };
 
+  // Check if model is the base variant (required for lego)
+  const isBaseModel = (modelId: string): boolean => {
+    return modelId.startsWith('acestep-v15-base');
+  };
+
   // SFT model download/availability state for repaint mode
   type SftStatus = 'idle' | 'checking' | 'available' | 'downloading' | 'unavailable';
   const [sftStatus, setSftStatus] = useState<SftStatus>('idle');
   const sftSseRef = useRef<EventSource | null>(null);
 
+  // Understand state — per audio target
+  type UnderstandStatus = 'idle' | 'running' | 'done' | 'error';
+  const [understandStatus, setUnderstandStatus] = useState<Record<'reference' | 'source', UnderstandStatus>>({ reference: 'idle', source: 'idle' });
+  const [understandResult, setUnderstandResult] = useState<Record<'reference' | 'source', Record<string, unknown> | null>>({ reference: null, source: null });
+  const [understandError, setUnderstandError] = useState<Record<'reference' | 'source', string | null>>({ reference: null, source: null });
+
   const [isUploadingReference, setIsUploadingReference] = useState(false);
   const [isUploadingSource, setIsUploadingSource] = useState(false);
   const [isTranscribingReference, setIsTranscribingReference] = useState(false);
@@ -300,7 +341,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
   const [showAudioModal, setShowAudioModal] = useState(false);
   const [audioModalTarget, setAudioModalTarget] = useState<'reference' | 'source'>('reference');
   const [tempAudioUrl, setTempAudioUrl] = useState('');
-  const [audioTab, setAudioTab] = useState<'reference' | 'source'>('reference');
+  const [audioTab, setAudioTab] = useState<'reference' | 'source' | 'lego'>('reference');
   const referenceAudioRef = useRef<HTMLAudioElement>(null);
   const sourceAudioRef = useRef<HTMLAudioElement>(null);
   const [referencePlaying, setReferencePlaying] = useState(false);
@@ -504,7 +545,6 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
   // Reuse Effect - must be after all state declarations
   useEffect(() => {
     if (initialData) {
-      setCustomMode(true);
       setLyrics(initialData.song.lyrics);
       setStyle(initialData.song.style);
       setTitle(initialData.song.title);
@@ -668,6 +708,20 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
         localStorage.setItem('ace-model', prevModelBeforeRepaintRef.current);
         prevModelBeforeRepaintRef.current = null;
       }
+    } else if (taskType === 'lego') {
+      // Entering lego mode: switch to base model if not already on one
+      if (!isBaseModel(selectedModel)) {
+        prevModelBeforeRepaintRef.current = selectedModel;
+        setSelectedModel(BASE_MODEL_NAME);
+        localStorage.setItem('ace-model', BASE_MODEL_NAME);
+      }
+    } else if (prevTaskType === 'lego') {
+      // Leaving lego mode: restore previous model if it was switched
+      if (prevModelBeforeRepaintRef.current && isBaseModel(selectedModel)) {
+        setSelectedModel(prevModelBeforeRepaintRef.current);
+        localStorage.setItem('ace-model', prevModelBeforeRepaintRef.current);
+        prevModelBeforeRepaintRef.current = null;
+      }
     }
   }, [taskType, checkAndEnsureSftModel]);
 
@@ -706,6 +760,38 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
     prevIsGeneratingRef.current = isGenerating;
   }, [isGenerating, refreshModels]);
 
+  // Persist all main settings to localStorage (debounced 500ms)
+  useEffect(() => {
+    const timer = setTimeout(() => {
+      try {
+        localStorage.setItem('ace-settings-' + workspaceId, JSON.stringify({
+          lyrics, style, title,
+          instrumental, vocalLanguage, vocalGender,
+          bpm, keyScale, timeSignature,
+          duration,
+          guidanceScale, randomSeed, seed,
+          thinking, enhance, audioFormat,
+          inferenceSteps, inferMethod, lmBackend, shift,
+          lmTemperature, lmCfgScale, lmTopK, lmTopP, lmNegativePrompt,
+          audioCoverStrength, taskType,
+          trackName, completeTrackClasses,
+        }));
+      } catch { /* ignore quota errors */ }
+    }, 500);
+    return () => clearTimeout(timer);
+  }, [
+    lyrics, style, title,
+    instrumental, vocalLanguage, vocalGender,
+    bpm, keyScale, timeSignature,
+    duration,
+    guidanceScale, randomSeed, seed,
+    thinking, enhance, audioFormat,
+    inferenceSteps, inferMethod, lmBackend, shift,
+    lmTemperature, lmCfgScale, lmTopK, lmTopP, lmNegativePrompt,
+    audioCoverStrength, taskType,
+    trackName, completeTrackClasses,
+  ]);
+
   const activeMaxDuration = thinking ? maxDurationWithLm : maxDurationWithoutLm;
 
   useEffect(() => {
@@ -940,6 +1026,34 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
     setIsTranscribingReference(false);
   };
 
+  /** Run ace-understand on the audio at the given URL and store the result. */
+  const handleUnderstand = async (target: 'reference' | 'source', audioUrl: string) => {
+    if (!token || !audioUrl) return;
+    setUnderstandStatus(prev => ({ ...prev, [target]: 'running' }));
+    setUnderstandResult(prev => ({ ...prev, [target]: null }));
+    setUnderstandError(prev => ({ ...prev, [target]: null }));
+    try {
+      const result = await generateApi.understandAudioUrl(audioUrl, token);
+      setUnderstandResult(prev => ({ ...prev, [target]: result }));
+      setUnderstandStatus(prev => ({ ...prev, [target]: 'done' }));
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : 'Analysis failed';
+      setUnderstandError(prev => ({ ...prev, [target]: msg }));
+      setUnderstandStatus(prev => ({ ...prev, [target]: 'error' }));
+    }
+  };
+
+  /** Apply understand result fields to the generation form. */
+  const applyUnderstandResult = (result: Record<string, unknown>) => {
+    if (typeof result.caption === 'string' && result.caption) setStyle(result.caption);
+    if (typeof result.lyrics === 'string' && result.lyrics) setLyrics(result.lyrics);
+    if (typeof result.bpm === 'number' && result.bpm > 0) setBpm(result.bpm);
+    if (typeof result.duration === 'number' && result.duration > 0) setDuration(Math.round(result.duration));
+    if (typeof result.keyscale === 'string' && result.keyscale) setKeyScale(result.keyscale);
+    if (typeof result.timesignature === 'string' && result.timesignature) setTimeSignature(result.timesignature);
+    if (typeof result.vocal_language === 'string' && result.vocal_language) setVocalLanguage(result.vocal_language);
+  };
+
   const deleteReferenceTrack = async (trackId: string) => {
     if (!token) return;
     try {
@@ -1018,14 +1132,16 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
     return `${minutes}:${String(seconds).padStart(2, '0')}`;
   };
 
-  /** Clear the source audio and reset task type if it was cover/repaint. */
+  /** Clear the source audio and reset task type if it was cover/repaint/lego. */
   const handleClearSourceAudio = () => {
     setSourceAudioUrl('');
     setSourceAudioTitle('');
     setSourcePlaying(false);
     setSourceTime(0);
     setSourceDuration(0);
-    if (taskType === 'cover' || taskType === 'repaint') setTaskType('text2music');
+    if (taskType === 'cover' || taskType === 'repaint' || taskType === 'lego') setTaskType('text2music');
+    // If we're on the lego tab, switch away since there's no source audio anymore
+    if (audioTab === 'lego') setAudioTab('reference');
   };
 
   /**
@@ -1083,7 +1199,8 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
 
   const handleWorkspaceDrop = (e: React.DragEvent<HTMLDivElement>) => {
     if (e.dataTransfer.files?.length || e.dataTransfer.types.includes('application/x-ace-audio')) {
-      handleDrop(e, audioTab);
+      // Lego tab uses source audio slot for the backing track
+      handleDrop(e, audioTab === 'lego' ? 'source' : audioTab);
     }
   };
 
@@ -1093,6 +1210,16 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
     }
   };
 
+  /** Switch the audio tab; automatically syncs the taskType when entering/leaving lego. */
+  const handleAudioTabChange = (tab: 'reference' | 'source' | 'lego') => {
+    setAudioTab(tab);
+    if (tab === 'lego') {
+      setTaskType('lego');
+    } else if (taskType === 'lego') {
+      setTaskType('text2music');
+    }
+  };
+
   const handleGenerate = () => {
     const styleWithGender = (() => {
       if (!vocalGender) return style;
@@ -1113,8 +1240,6 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
       }
 
       onGenerate({
-        customMode,
-        songDescription: customMode ? undefined : songDescription,
         prompt: lyrics,
         lyrics,
         style: styleWithGender,
@@ -1168,13 +1293,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
         scoreScale,
         lmBatchChunkSize,
         trackName: trackName.trim() || undefined,
-        completeTrackClasses: (() => {
-          const parsed = completeTrackClasses
-            .split(',')
-            .map((item) => item.trim())
-            .filter(Boolean);
-          return parsed.length ? parsed : undefined;
-        })(),
+        completeTrackClasses: completeTrackClassesParsed.length ? completeTrackClassesParsed : undefined,
         isFormatCaption,
         loraLoaded,
       });
@@ -1256,22 +1375,6 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
           </div>
 
           <div className="flex items-center gap-2">
-            {/* Mode Toggle */}
-            <div className="flex items-center bg-zinc-200 dark:bg-black/40 rounded-lg p-1 border border-zinc-300 dark:border-white/5">
-              <button
-                onClick={() => setCustomMode(false)}
-                className={`px-3 py-1.5 rounded-md text-xs font-semibold transition-all ${!customMode ? 'bg-white dark:bg-zinc-800 text-black dark:text-white shadow-sm' : 'text-zinc-500 hover:text-zinc-900 dark:hover:text-zinc-300'}`}
-              >
-                {t('simple')}
-              </button>
-              <button
-                onClick={() => setCustomMode(true)}
-                className={`px-3 py-1.5 rounded-md text-xs font-semibold transition-all ${customMode ? 'bg-white dark:bg-zinc-800 text-black dark:text-white shadow-sm' : 'text-zinc-500 hover:text-zinc-900 dark:hover:text-zinc-300'}`}
-              >
-                {t('custom')}
-              </button>
-            </div>
-
             {/* Model Selection */}
             <div className="relative" ref={modelMenuRef}>
               <button
@@ -1293,10 +1396,22 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                         onClick={() => {
                           setSelectedModel(model.id);
                           localStorage.setItem('ace-model', model.id);
-                          // Auto-adjust parameters for non-turbo models
-                          if (!isTurboModel(model.id)) {
-                            setInferenceSteps(20);
-                            setUseAdg(true);
+                          // Apply acestep-cpp model presets automatically
+                          if (isTurboModel(model.id)) {
+                            // Turbo: 8 steps, shift=3.0, guidance_scale=0.0 (auto → 1.0)
+                            setInferenceSteps(8);
+                            setShift(3.0);
+                            setGuidanceScale(0.0);
+                          } else if (isSftModel(model.id)) {
+                            // SFT: 50 steps, shift=1.0, guidance_scale=1.0
+                            setInferenceSteps(50);
+                            setShift(1.0);
+                            setGuidanceScale(1.0);
+                          } else {
+                            // Base: 50 steps, shift=1.0, guidance_scale=7.0 (lego default)
+                            setInferenceSteps(50);
+                            setShift(1.0);
+                            setGuidanceScale(7.0);
                           }
                           setShowModelMenu(false);
                         }}
@@ -1333,96 +1448,288 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
           </div>
         </div>
 
-        {/* SIMPLE MODE */}
-        {!customMode && (
-          <div className="space-y-5">
-            {/* Song Description */}
-            <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
-              <div className="px-3 py-2.5 flex items-center justify-between border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5">
-                <span className="text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400">
-                  {t('describeYourSong')}
-                </span>
+
+        {/* UNIFIED PANEL */}
+        <div className="space-y-5">
+          {/* Title Input */}
+          <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
+            <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5">
+              {t('title')}
+            </div>
+            <input
+              type="text"
+              value={title}
+              onChange={(e) => setTitle(e.target.value)}
+              placeholder={t('nameSong')}
+              className="w-full bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none"
+            />
+          </div>
+
+          {/* Style Input */}
+          <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden transition-colors group focus-within:border-zinc-400 dark:focus-within:border-white/20">
+            <div className="flex items-center justify-between px-3 py-2.5 bg-zinc-50 dark:bg-white/5 border-b border-zinc-100 dark:border-white/5">
+              <div>
+                <div className="flex items-center gap-2">
+                  <span className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">{t('styleOfMusic')}</span>
+                  <button
+                    onClick={() => setEnhance(!enhance)}
+                    className={`flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium transition-all cursor-pointer ${enhance ? 'bg-violet-100 dark:bg-violet-500/20 text-violet-600 dark:text-violet-400' : 'text-zinc-400 dark:text-zinc-500 hover:text-zinc-600 dark:hover:text-zinc-300'}`}
+                    title={t('enhanceTooltip')}
+                  >
+                    <Sparkles size={9} />
+                    <span>{enhance ? 'ON' : 'OFF'}</span>
+                  </button>
+                </div>
+                <p className="text-[11px] text-zinc-400 dark:text-zinc-500 mt-0.5">{t('genreMoodInstruments')}</p>
+              </div>
+              <div className="flex items-center gap-1">
                 <button
-                  type="button"
-                  onClick={async () => {
-                    if (!token) return;
-                    try {
-                      const result = await generateApi.getRandomDescription(token);
-                      setSongDescription(result.description);
-                      setInstrumental(result.instrumental);
-                      setVocalLanguage(result.vocalLanguage || 'unknown');
-                    } catch (err) {
-                      console.error('Failed to load random description:', err);
-                    }
-                  }}
-                  title="Load random description"
-                  className="p-1 rounded-md text-zinc-400 hover:text-zinc-600 dark:hover:text-zinc-200 hover:bg-zinc-200 dark:hover:bg-white/10 transition-colors"
+                  className="p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded transition-colors text-zinc-500 hover:text-black dark:hover:text-white"
+                  title={t('refreshGenres')}
+                  onClick={refreshMusicTags}
                 >
                   <Dices size={14} />
                 </button>
+                <button
+                  className="p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded text-zinc-500 hover:text-black dark:hover:text-white transition-colors"
+                  onClick={() => setStyle('')}
+                >
+                  <Trash2 size={14} />
+                </button>
+                <button
+                  className={`p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded transition-colors ${isFormattingStyle ? 'text-pink-500' : 'text-zinc-500 hover:text-black dark:hover:text-white'}`}
+                  title="AI Format - Enhance style & auto-fill parameters"
+                  onClick={() => handleFormat('style')}
+                  disabled={isFormattingStyle || !style.trim()}
+                >
+                  {isFormattingStyle ? <Loader2 size={14} className="animate-spin" /> : <Sparkles size={14} />}
+                </button>
+              </div>
+            </div>
+            <textarea
+              value={style}
+              onChange={(e) => setStyle(e.target.value)}
+              placeholder={t('stylePlaceholder')}
+              className="w-full h-20 bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none resize-none"
+            />
+            <div className="px-3 pb-3 space-y-3">
+              {/* Quick Tags */}
+              <div className="flex flex-wrap gap-2">
+                {musicTags.map(tag => (
+                  <button
+                    key={tag}
+                    onClick={() => setStyle(prev => prev ? `${prev}, ${tag}` : tag)}
+                    className="text-[10px] font-medium bg-zinc-100 dark:bg-white/5 hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-600 dark:text-zinc-400 hover:text-black dark:hover:text-white px-2.5 py-1 rounded-full transition-colors border border-zinc-200 dark:border-white/5"
+                  >
+                    {tag}
+                  </button>
+                ))}
               </div>
-              <textarea
-                value={songDescription}
-                onChange={(e) => setSongDescription(e.target.value)}
-                placeholder={t('songDescriptionPlaceholder')}
-                className="w-full h-32 bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none resize-none"
-              />
             </div>
+          </div>
 
-            {/* Vocal Language (Simple) */}
-            <div className="grid grid-cols-2 gap-3">
-              <div className="space-y-1.5">
-                <label className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide px-1">
-                  {t('vocalLanguage')}
-                </label>
-                <select
-                  value={vocalLanguage}
-                  onChange={(e) => setVocalLanguage(e.target.value)}
-                  className="w-full bg-white dark:bg-suno-card border border-zinc-200 dark:border-white/5 rounded-xl px-3 py-2 text-sm text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
+          {/* Lyrics Input */}
+          <div
+            ref={lyricsRef}
+            className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden transition-colors group focus-within:border-zinc-400 dark:focus-within:border-white/20 relative flex flex-col"
+            style={{ height: 'auto' }}
+          >
+            <div className="flex items-center justify-between px-3 py-2.5 bg-zinc-50 dark:bg-white/5 border-b border-zinc-100 dark:border-white/5 flex-shrink-0">
+              <div>
+                <span className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">{t('lyrics')}</span>
+                <p className="text-[11px] text-zinc-400 dark:text-zinc-500 mt-0.5">{t('leaveLyricsEmpty')}</p>
+              </div>
+              <div className="flex items-center gap-2">
+                <button
+                  onClick={() => setInstrumental(!instrumental)}
+                  className={`px-2.5 py-1 rounded-full text-[10px] font-semibold border transition-colors ${
+                    instrumental
+                      ? 'bg-pink-600 text-white border-pink-500'
+                      : 'bg-white dark:bg-suno-card border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-200 hover:bg-zinc-100 dark:hover:bg-white/10'
+                  }`}
                 >
-                  {VOCAL_LANGUAGE_KEYS.map(lang => (
-                    <option key={lang.value} value={lang.value}>{t(lang.key)}</option>
-                  ))}
-                </select>
+                  {instrumental ? t('instrumental') : t('vocal')}
+                </button>
+                <button
+                  className={`p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded transition-colors ${isFormattingLyrics ? 'text-pink-500' : 'text-zinc-500 hover:text-black dark:hover:text-white'}`}
+                  title="AI Format - Enhance style & auto-fill parameters"
+                  onClick={() => handleFormat('lyrics')}
+                  disabled={isFormattingLyrics || !style.trim()}
+                >
+                  {isFormattingLyrics ? <Loader2 size={14} className="animate-spin" /> : <Sparkles size={14} />}
+                </button>
+                <button
+                  className="p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded text-zinc-500 hover:text-black dark:hover:text-white transition-colors"
+                  onClick={() => setLyrics('')}
+                >
+                  <Trash2 size={14} />
+                </button>
               </div>
-              <div className="space-y-1.5">
-                <label className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide px-1">
-                  {t('vocalGender')}
-                </label>
-                <div className="flex items-center gap-2">
+            </div>
+            <textarea
+              disabled={instrumental}
+              value={lyrics}
+              onChange={(e) => setLyrics(e.target.value)}
+              placeholder={instrumental ? t('instrumental') + ' mode' : t('lyricsPlaceholder')}
+              className={`w-full bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none resize-none font-mono leading-relaxed ${instrumental ? 'opacity-30 cursor-not-allowed' : ''}`}
+              style={{ height: `${lyricsHeight}px` }}
+            />
+            {/* Resize Handle */}
+            <div
+              onMouseDown={startResizing}
+              className="h-3 w-full cursor-ns-resize flex items-center justify-center hover:bg-zinc-100 dark:hover:bg-white/5 transition-colors absolute bottom-0 left-0 z-10"
+            >
+              <div className="w-8 h-1 rounded-full bg-zinc-300 dark:bg-zinc-700"></div>
+            </div>
+          </div>
+
+          {/* Audio Section */}
+          <div
+            onDrop={(e) => handleDrop(e, audioTab === 'lego' ? 'source' : audioTab)}
+            onDragOver={handleDragOver}
+            className="bg-white dark:bg-[#1a1a1f] rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden"
+          >
+            {/* Header with Audio label and tabs */}
+            <div className="px-3 py-2.5 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/[0.02]">
+              <div className="flex items-center justify-between">
+                <span className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">{t('audio')}</span>
+                <div className="flex items-center gap-1 bg-zinc-200/50 dark:bg-black/30 rounded-lg p-0.5">
                   <button
                     type="button"
-                    onClick={() => setVocalGender(vocalGender === 'male' ? '' : 'male')}
-                    className={`flex-1 px-3 py-2 rounded-lg text-xs font-semibold border transition-colors ${vocalGender === 'male' ? 'bg-pink-600 text-white border-pink-600' : 'border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-300 hover:border-zinc-300 dark:hover:border-white/20'}`}
+                    onClick={() => handleAudioTabChange('reference')}
+                    className={`px-2.5 py-1 rounded-md text-[11px] font-medium transition-all ${
+                      audioTab === 'reference'
+                        ? 'bg-white dark:bg-zinc-700 text-zinc-900 dark:text-white shadow-sm'
+                        : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200'
+                    }`}
                   >
-                    {t('male')}
+                    {t('reference')}
                   </button>
                   <button
                     type="button"
-                    onClick={() => setVocalGender(vocalGender === 'female' ? '' : 'female')}
-                    className={`flex-1 px-3 py-2 rounded-lg text-xs font-semibold border transition-colors ${vocalGender === 'female' ? 'bg-pink-600 text-white border-pink-600' : 'border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-300 hover:border-zinc-300 dark:hover:border-white/20'}`}
+                    onClick={() => handleAudioTabChange('source')}
+                    className={`px-2.5 py-1 rounded-md text-[11px] font-medium transition-all ${
+                      audioTab === 'source'
+                        ? 'bg-white dark:bg-zinc-700 text-zinc-900 dark:text-white shadow-sm'
+                        : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200'
+                    }`}
                   >
-                    {t('female')}
+                    {t('cover')}
+                  </button>
+                  <button
+                    type="button"
+                    onClick={() => handleAudioTabChange('lego')}
+                    className={`px-2.5 py-1 rounded-md text-[11px] font-medium transition-all ${
+                      audioTab === 'lego'
+                        ? 'bg-amber-500 text-white shadow-sm'
+                        : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200'
+                    }`}
+                  >
+                    {t('legoMode')}
                   </button>
                 </div>
               </div>
             </div>
 
-            {/* Source Audio — Cover / Repaint (Simple Mode) */}
-            <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
-              <div className="px-3 py-2.5 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/[0.02]">
-                <div className="flex items-center gap-2">
-                  <span className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">
-                    {t('cover')} / {t('repaintMode')}
-                  </span>
-                  <span className="text-[9px] px-1.5 py-0.5 rounded bg-zinc-200 dark:bg-white/10 text-zinc-500 dark:text-zinc-400 font-medium uppercase">
-                    optional
-                  </span>
-                </div>
-              </div>
-              <div className="p-3 space-y-2">
-                {/* Source audio mini-player */}
-                {sourceAudioUrl && (
+            {/* Audio Content */}
+            <div className="p-3 space-y-2">
+                {/* Reference Audio Player */}
+                {audioTab === 'reference' && referenceAudioUrl && (
+                  <>
+                  <div className="flex items-center gap-3 p-2 rounded-lg bg-zinc-50 dark:bg-white/[0.03] border border-zinc-100 dark:border-white/5">
+                    <button
+                      type="button"
+                      onClick={() => toggleAudio('reference')}
+                      className="relative flex-shrink-0 w-10 h-10 rounded-full bg-gradient-to-br from-pink-500 to-purple-600 text-white flex items-center justify-center shadow-lg shadow-pink-500/20 hover:scale-105 transition-transform"
+                    >
+                      {referencePlaying ? (
+                        <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24"><path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z"/></svg>
+                      ) : (
+                        <svg className="w-4 h-4 ml-0.5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
+                      )}
+                      <span className="absolute -bottom-1 -right-1 text-[8px] font-bold bg-zinc-900 text-white px-1 py-0.5 rounded">
+                        {formatTime(referenceDuration)}
+                      </span>
+                    </button>
+                    <div className="flex-1 min-w-0">
+                      <div className="text-xs font-medium text-zinc-800 dark:text-zinc-200 truncate mb-1.5">
+                        {referenceAudioTitle || getAudioLabel(referenceAudioUrl)}
+                      </div>
+                      <div className="flex items-center gap-2">
+                        <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(referenceTime)}</span>
+                        <div
+                          className="flex-1 h-1.5 rounded-full bg-zinc-200 dark:bg-white/10 cursor-pointer group/seek"
+                          onClick={(e) => {
+                            if (referenceAudioRef.current && referenceDuration > 0) {
+                              const rect = e.currentTarget.getBoundingClientRect();
+                              const percent = (e.clientX - rect.left) / rect.width;
+                              referenceAudioRef.current.currentTime = percent * referenceDuration;
+                            }
+                          }}
+                        >
+                          <div
+                            className="h-full bg-gradient-to-r from-pink-500 to-purple-500 rounded-full transition-all relative"
+                            style={{ width: referenceDuration ? `${Math.min(100, (referenceTime / referenceDuration) * 100)}%` : '0%' }}
+                          >
+                            <div className="absolute right-0 top-1/2 -translate-y-1/2 w-2.5 h-2.5 rounded-full bg-white shadow-md opacity-0 group-hover/seek:opacity-100 transition-opacity" />
+                          </div>
+                        </div>
+                        <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(referenceDuration)}</span>
+                      </div>
+                    </div>
+                    {/* Understand button */}
+                    <button
+                      type="button"
+                      onClick={() => void handleUnderstand('reference', referenceAudioUrl)}
+                      disabled={understandStatus.reference === 'running'}
+                      title={t('understandTooltip')}
+                      className="p-1.5 rounded-full hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-400 hover:text-violet-600 dark:hover:text-violet-400 transition-colors disabled:opacity-50"
+                    >
+                      {understandStatus.reference === 'running' ? <Loader2 size={13} className="animate-spin" /> : <Sparkles size={13} />}
+                    </button>
+                    <button
+                      type="button"
+                      onClick={() => { setReferenceAudioUrl(''); setReferenceAudioTitle(''); setReferencePlaying(false); setReferenceTime(0); setReferenceDuration(0); }}
+                      className="p-1.5 rounded-full hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-400 hover:text-zinc-600 dark:hover:text-white transition-colors"
+                    >
+                      <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12"/></svg>
+                    </button>
+                  </div>
+                  {/* Understand result panel (reference) */}
+                  {understandStatus.reference !== 'idle' && (
+                    <div className={`rounded-lg px-3 py-2 text-[11px] space-y-1 ${
+                      understandStatus.reference === 'error'
+                        ? 'bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400'
+                        : 'bg-violet-50 dark:bg-violet-900/20 text-violet-800 dark:text-violet-300'
+                    }`}>
+                      {understandStatus.reference === 'running' && <span className="flex items-center gap-1"><Loader2 size={11} className="animate-spin" /> {t('understandRunning')}</span>}
+                      {understandStatus.reference === 'error' && <span>{t('understandError')}: {understandError.reference}</span>}
+                      {understandStatus.reference === 'done' && understandResult.reference && (
+                        <>
+                          <div className="font-semibold">{t('understandResult')}</div>
+                          {understandResult.reference.caption && <div className="truncate opacity-80">🎵 {String(understandResult.reference.caption).slice(0, 80)}{String(understandResult.reference.caption).length > 80 ? '…' : ''}</div>}
+                          <div className="flex flex-wrap gap-2 opacity-70">
+                            {understandResult.reference.bpm && <span>BPM: {String(understandResult.reference.bpm)}</span>}
+                            {understandResult.reference.keyscale && <span>Key: {String(understandResult.reference.keyscale)}</span>}
+                            {understandResult.reference.duration && <span>Duration: {Math.round(Number(understandResult.reference.duration))}s</span>}
+                          </div>
+                          <button
+                            type="button"
+                            onClick={() => applyUnderstandResult(understandResult.reference!)}
+                            className="mt-1 px-2 py-0.5 rounded bg-violet-600 text-white text-[10px] font-medium hover:bg-violet-700 transition-colors"
+                          >
+                            {t('understandApply')}
+                          </button>
+                        </>
+                      )}
+                    </div>
+                  )}
+                  </>
+                )}
+
+                {/* Source/Cover Audio Player */}
+                {audioTab === 'source' && sourceAudioUrl && (
+                  <>
                   <div className="flex items-center gap-3 p-2 rounded-lg bg-zinc-50 dark:bg-white/[0.03] border border-zinc-100 dark:border-white/5">
                     <button
                       type="button"
@@ -1466,6 +1773,16 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                         <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(sourceDuration)}</span>
                       </div>
                     </div>
+                    {/* Understand button */}
+                    <button
+                      type="button"
+                      onClick={() => void handleUnderstand('source', sourceAudioUrl)}
+                      disabled={understandStatus.source === 'running'}
+                      title={t('understandTooltip')}
+                      className="p-1.5 rounded-full hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-400 hover:text-violet-600 dark:hover:text-violet-400 transition-colors disabled:opacity-50"
+                    >
+                      {understandStatus.source === 'running' ? <Loader2 size={13} className="animate-spin" /> : <Sparkles size={13} />}
+                    </button>
                     <button
                       type="button"
                       onClick={handleClearSourceAudio}
@@ -1474,12 +1791,42 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                       <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12"/></svg>
                     </button>
                   </div>
+                  {/* Understand result panel (source) */}
+                  {understandStatus.source !== 'idle' && (
+                    <div className={`rounded-lg px-3 py-2 text-[11px] space-y-1 ${
+                      understandStatus.source === 'error'
+                        ? 'bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400'
+                        : 'bg-violet-50 dark:bg-violet-900/20 text-violet-800 dark:text-violet-300'
+                    }`}>
+                      {understandStatus.source === 'running' && <span className="flex items-center gap-1"><Loader2 size={11} className="animate-spin" /> {t('understandRunning')}</span>}
+                      {understandStatus.source === 'error' && <span>{t('understandError')}: {understandError.source}</span>}
+                      {understandStatus.source === 'done' && understandResult.source && (
+                        <>
+                          <div className="font-semibold">{t('understandResult')}</div>
+                          {understandResult.source.caption && <div className="truncate opacity-80">🎵 {String(understandResult.source.caption).slice(0, 80)}{String(understandResult.source.caption).length > 80 ? '…' : ''}</div>}
+                          <div className="flex flex-wrap gap-2 opacity-70">
+                            {understandResult.source.bpm && <span>BPM: {String(understandResult.source.bpm)}</span>}
+                            {understandResult.source.keyscale && <span>Key: {String(understandResult.source.keyscale)}</span>}
+                            {understandResult.source.duration && <span>Duration: {Math.round(Number(understandResult.source.duration))}s</span>}
+                          </div>
+                          <button
+                            type="button"
+                            onClick={() => applyUnderstandResult(understandResult.source!)}
+                            className="mt-1 px-2 py-0.5 rounded bg-violet-600 text-white text-[10px] font-medium hover:bg-violet-700 transition-colors"
+                          >
+                            {t('understandApply')}
+                          </button>
+                        </>
+                      )}
+                    </div>
+                  )}
+                  </>
                 )}
 
-                {/* Cover / Repaint mode controls — shown when source audio is loaded */}
-                {sourceAudioUrl && (
+                {/* Cover / Repaint mode toggle (shown when source audio is loaded on the Cover tab) */}
+                {audioTab === 'source' && sourceAudioUrl && (
                   <div className="space-y-2">
-                    {/* Mode toggle */}
+                    {/* Mode toggle: Cover vs Repaint */}
                     <div className="flex items-center gap-1 bg-zinc-100 dark:bg-black/20 rounded-lg p-0.5">
                       <button
                         type="button"
@@ -1510,7 +1857,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                       {taskType === 'repaint' ? t('repaintModeDescription') : t('coverModeDescription')}
                     </p>
 
-                    {/* Cover strength slider (cover mode only) */}
+                    {/* Cover strength slider (only in cover mode) */}
                     {taskType !== 'repaint' && (
                       <div className="flex items-center gap-2">
                         <label className="text-[10px] text-zinc-500 dark:text-zinc-400 whitespace-nowrap">{t('audioCoverStrength')}</label>
@@ -1527,7 +1874,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                       </div>
                     )}
 
-                    {/* Repaint time range (repaint mode only) */}
+                    {/* Repaint time range (only in repaint mode) */}
                     {taskType === 'repaint' && (
                       <div className="grid grid-cols-2 gap-2">
                         <div className="space-y-1">
@@ -1565,7 +1912,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                       </div>
                     )}
 
-                    {/* SFT model status banner (repaint only) */}
+                    {/* SFT model status banner (shown when repaint mode active) */}
                     {taskType === 'repaint' && sftStatus !== 'idle' && (
                       <div className={`flex items-center gap-2 px-3 py-2 rounded-lg text-[11px] font-medium ${
                         sftStatus === 'available'
@@ -1597,11 +1944,89 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                   </div>
                 )}
 
-                {/* Upload / Library buttons */}
+                {/* ═══ LEGO TAB CONTENT ═══ */}
+                {audioTab === 'lego' && (
+                  <div className="space-y-3">
+                    {/* Instrument selector — required, shown always */}
+                    <div className="space-y-1">
+                      <label className="text-[10px] font-semibold text-amber-600 dark:text-amber-400 uppercase tracking-wide">{t('legoTrackLabel')}</label>
+                      <select
+                        value={trackName}
+                        onChange={(e) => setTrackName(e.target.value)}
+                        className="w-full bg-zinc-50 dark:bg-black/20 border-2 border-amber-400 dark:border-amber-500/60 rounded-lg px-2 py-1.5 text-sm text-zinc-900 dark:text-white focus:outline-none focus:border-amber-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800"
+                      >
+                        <option value="">{t('legoTrackPlaceholder')}</option>
+                        {TRACK_NAMES.map(name => (
+                          <option key={name} value={name}>{name}</option>
+                        ))}
+                      </select>
+                      <p className="text-[10px] text-amber-600 dark:text-amber-400">{t('legoModeDescription')}</p>
+                    </div>
+
+                    {/* Existing backing track player (when loaded) */}
+                    {sourceAudioUrl && (
+                      <>
+                      <div className="flex items-center gap-3 p-2 rounded-lg bg-zinc-50 dark:bg-white/[0.03] border border-zinc-100 dark:border-white/5">
+                        <button
+                          type="button"
+                          onClick={() => toggleAudio('source')}
+                          className="relative flex-shrink-0 w-10 h-10 rounded-full bg-gradient-to-br from-amber-500 to-orange-600 text-white flex items-center justify-center shadow-lg shadow-amber-500/20 hover:scale-105 transition-transform"
+                        >
+                          {sourcePlaying ? (
+                            <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24"><path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z"/></svg>
+                          ) : (
+                            <svg className="w-4 h-4 ml-0.5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
+                          )}
+                          <span className="absolute -bottom-1 -right-1 text-[8px] font-bold bg-zinc-900 text-white px-1 py-0.5 rounded">
+                            {formatTime(sourceDuration)}
+                          </span>
+                        </button>
+                        <div className="flex-1 min-w-0">
+                          <div className="text-xs font-medium text-zinc-800 dark:text-zinc-200 truncate mb-1.5">
+                            {sourceAudioTitle || getAudioLabel(sourceAudioUrl)}
+                          </div>
+                          <div className="flex items-center gap-2">
+                            <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(sourceTime)}</span>
+                            <div
+                              className="flex-1 h-1.5 rounded-full bg-zinc-200 dark:bg-white/10 cursor-pointer group/seek"
+                              onClick={(e) => {
+                                if (sourceAudioRef.current && sourceDuration > 0) {
+                                  const rect = e.currentTarget.getBoundingClientRect();
+                                  const percent = (e.clientX - rect.left) / rect.width;
+                                  sourceAudioRef.current.currentTime = percent * sourceDuration;
+                                }
+                              }}
+                            >
+                              <div
+                                className="h-full bg-gradient-to-r from-amber-500 to-orange-500 rounded-full transition-all relative"
+                                style={{ width: sourceDuration ? `${Math.min(100, (sourceTime / sourceDuration) * 100)}%` : '0%' }}
+                              >
+                                <div className="absolute right-0 top-1/2 -translate-y-1/2 w-2.5 h-2.5 rounded-full bg-white shadow-md opacity-0 group-hover/seek:opacity-100 transition-opacity" />
+                              </div>
+                            </div>
+                            <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(sourceDuration)}</span>
+                          </div>
+                        </div>
+                        <button
+                          type="button"
+                          onClick={handleClearSourceAudio}
+                          className="p-1.5 rounded-full hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-400 hover:text-zinc-600 dark:hover:text-white transition-colors"
+                        >
+                          <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12"/></svg>
+                        </button>
+                      </div>
+                      </>
+                    )}
+
+                    <p className="text-[10px] text-amber-600 dark:text-amber-400">{t('legoBaseModelRequired')}</p>
+                  </div>
+                )}
+
+                {/* Action buttons */}
                 <div className="flex gap-2">
                   <button
                     type="button"
-                    onClick={() => openAudioModal('source', 'uploads')}
+                    onClick={() => openAudioModal(audioTab === 'lego' ? 'source' : audioTab, 'uploads')}
                     className="flex-1 flex items-center justify-center gap-1.5 rounded-lg bg-zinc-100 dark:bg-white/5 hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-700 dark:text-zinc-300 px-3 py-2 text-xs font-medium transition-colors border border-zinc-200 dark:border-white/5"
                   >
                     <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -1611,7 +2036,10 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                   </button>
                   <button
                     type="button"
-                    onClick={() => sourceInputRef.current?.click()}
+                    onClick={() => {
+                      const input = audioTab === 'reference' ? referenceInputRef.current : sourceInputRef.current;
+                      input?.click();
+                    }}
                     className="flex-1 flex items-center justify-center gap-1.5 rounded-lg bg-zinc-100 dark:bg-white/5 hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-700 dark:text-zinc-300 px-3 py-2 text-xs font-medium transition-colors border border-zinc-200 dark:border-white/5"
                   >
                     <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
@@ -1622,591 +2050,10 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                 </div>
               </div>
             </div>
-
-            {/* Quick Settings (Simple Mode) */}
-            <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 p-4 space-y-4">
-              <h3 className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide flex items-center gap-2">
-                <Sliders size={14} />
-                {t('quickSettings')}
-              </h3>
-
-              {/* Duration */}
-              <EditableSlider
-                label={t('duration')}
-                value={duration}
-                min={-1}
-                max={activeMaxDuration}
-                step={5}
-                onChange={setDuration}
-                formatDisplay={(val) => val === -1 ? t('auto') : `${val}${t('seconds')}`}
-                title={''}
-                autoLabel={t('auto')}
-              />
-
-              {/* BPM */}
-              <EditableSlider
-                label="BPM"
-                value={bpm}
-                min={0}
-                max={300}
-                step={5}
-                onChange={setBpm}
-                formatDisplay={(val) => val === 0 ? 'Auto' : val.toString()}
-                autoLabel="Auto"
-              />
-
-              {/* Key & Time Signature */}
-              <div className="grid grid-cols-2 gap-3">
-                <div className="space-y-1.5">
-                  <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('key')}</label>
-                  <select
-                    value={keyScale}
-                    onChange={setKeyScale}
-                    className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-xl px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
-                  >
-                    <option value="">Auto</option>
-                    {KEY_SIGNATURES.filter(k => k).map(key => (
-                      <option key={key} value={key}>{key}</option>
-                    ))}
-                  </select>
-                </div>
-                <div className="space-y-1.5">
-                  <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('time')}</label>
-                  <select
-                    value={timeSignature}
-                    onChange={setTimeSignature}
-                    className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-xl px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
-                  >
-                    <option value="">Auto</option>
-                    {TIME_SIGNATURES.filter(t => t).map(time => (
-                      <option key={time} value={time}>{time}</option>
-                    ))}
-                  </select>
-                </div>
-              </div>
-
-              {/* Variations */}
-              <EditableSlider
-                label={t('variations')}
-                value={batchSize}
-                min={1}
-                max={4}
-                step={1}
-                onChange={setBatchSize}
-              />
-              <div style={{display: 'none'}}>
-                <input
-                  type="range"
-                  min="1"
-                  max="4"
-                  step="1"
-                  value={batchSize}
-                  onChange={setBatchSize}
-                  className="w-full h-2 bg-zinc-200 dark:bg-zinc-700 rounded-lg appearance-none cursor-pointer accent-pink-500"
-                />
-                <p className="text-[10px] text-zinc-500">{t('numberOfVariations')}</p>
-              </div>
-            </div>
-          </div>
-        )}
-
-        {/* CUSTOM MODE */}
-        {customMode && (
-          <div className="space-y-5">
-            {/* Audio Section */}
-            <div
-              onDrop={(e) => handleDrop(e, audioTab)}
-              onDragOver={handleDragOver}
-              className="bg-white dark:bg-[#1a1a1f] rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden"
-            >
-              {/* Header with Audio label and tabs */}
-              <div className="px-3 py-2.5 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/[0.02]">
-                <div className="flex items-center justify-between">
-                  <span className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">{t('audio')}</span>
-                  <div className="flex items-center gap-1 bg-zinc-200/50 dark:bg-black/30 rounded-lg p-0.5">
-                    <button
-                      type="button"
-                      onClick={() => setAudioTab('reference')}
-                      className={`px-2.5 py-1 rounded-md text-[11px] font-medium transition-all ${
-                        audioTab === 'reference'
-                          ? 'bg-white dark:bg-zinc-700 text-zinc-900 dark:text-white shadow-sm'
-                          : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200'
-                      }`}
-                    >
-                      {t('reference')}
-                    </button>
-                    <button
-                      type="button"
-                      onClick={() => setAudioTab('source')}
-                      className={`px-2.5 py-1 rounded-md text-[11px] font-medium transition-all ${
-                        audioTab === 'source'
-                          ? 'bg-white dark:bg-zinc-700 text-zinc-900 dark:text-white shadow-sm'
-                          : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200'
-                      }`}
-                    >
-                      {t('cover')}
-                    </button>
-                  </div>
-                </div>
-              </div>
-
-              {/* Audio Content */}
-              <div className="p-3 space-y-2">
-                {/* Reference Audio Player */}
-                {audioTab === 'reference' && referenceAudioUrl && (
-                  <div className="flex items-center gap-3 p-2 rounded-lg bg-zinc-50 dark:bg-white/[0.03] border border-zinc-100 dark:border-white/5">
-                    <button
-                      type="button"
-                      onClick={() => toggleAudio('reference')}
-                      className="relative flex-shrink-0 w-10 h-10 rounded-full bg-gradient-to-br from-pink-500 to-purple-600 text-white flex items-center justify-center shadow-lg shadow-pink-500/20 hover:scale-105 transition-transform"
-                    >
-                      {referencePlaying ? (
-                        <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24"><path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z"/></svg>
-                      ) : (
-                        <svg className="w-4 h-4 ml-0.5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
-                      )}
-                      <span className="absolute -bottom-1 -right-1 text-[8px] font-bold bg-zinc-900 text-white px-1 py-0.5 rounded">
-                        {formatTime(referenceDuration)}
-                      </span>
-                    </button>
-                    <div className="flex-1 min-w-0">
-                      <div className="text-xs font-medium text-zinc-800 dark:text-zinc-200 truncate mb-1.5">
-                        {referenceAudioTitle || getAudioLabel(referenceAudioUrl)}
-                      </div>
-                      <div className="flex items-center gap-2">
-                        <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(referenceTime)}</span>
-                        <div
-                          className="flex-1 h-1.5 rounded-full bg-zinc-200 dark:bg-white/10 cursor-pointer group/seek"
-                          onClick={(e) => {
-                            if (referenceAudioRef.current && referenceDuration > 0) {
-                              const rect = e.currentTarget.getBoundingClientRect();
-                              const percent = (e.clientX - rect.left) / rect.width;
-                              referenceAudioRef.current.currentTime = percent * referenceDuration;
-                            }
-                          }}
-                        >
-                          <div
-                            className="h-full bg-gradient-to-r from-pink-500 to-purple-500 rounded-full transition-all relative"
-                            style={{ width: referenceDuration ? `${Math.min(100, (referenceTime / referenceDuration) * 100)}%` : '0%' }}
-                          >
-                            <div className="absolute right-0 top-1/2 -translate-y-1/2 w-2.5 h-2.5 rounded-full bg-white shadow-md opacity-0 group-hover/seek:opacity-100 transition-opacity" />
-                          </div>
-                        </div>
-                        <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(referenceDuration)}</span>
-                      </div>
-                    </div>
-                    <button
-                      type="button"
-                      onClick={() => { setReferenceAudioUrl(''); setReferenceAudioTitle(''); setReferencePlaying(false); setReferenceTime(0); setReferenceDuration(0); }}
-                      className="p-1.5 rounded-full hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-400 hover:text-zinc-600 dark:hover:text-white transition-colors"
-                    >
-                      <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12"/></svg>
-                    </button>
-                  </div>
-                )}
-
-                {/* Source/Cover Audio Player */}
-                {audioTab === 'source' && sourceAudioUrl && (
-                  <div className="flex items-center gap-3 p-2 rounded-lg bg-zinc-50 dark:bg-white/[0.03] border border-zinc-100 dark:border-white/5">
-                    <button
-                      type="button"
-                      onClick={() => toggleAudio('source')}
-                      className="relative flex-shrink-0 w-10 h-10 rounded-full bg-gradient-to-br from-emerald-500 to-teal-600 text-white flex items-center justify-center shadow-lg shadow-emerald-500/20 hover:scale-105 transition-transform"
-                    >
-                      {sourcePlaying ? (
-                        <svg className="w-4 h-4" fill="currentColor" viewBox="0 0 24 24"><path d="M6 4h4v16H6V4zm8 0h4v16h-4V4z"/></svg>
-                      ) : (
-                        <svg className="w-4 h-4 ml-0.5" fill="currentColor" viewBox="0 0 24 24"><path d="M8 5v14l11-7z"/></svg>
-                      )}
-                      <span className="absolute -bottom-1 -right-1 text-[8px] font-bold bg-zinc-900 text-white px-1 py-0.5 rounded">
-                        {formatTime(sourceDuration)}
-                      </span>
-                    </button>
-                    <div className="flex-1 min-w-0">
-                      <div className="text-xs font-medium text-zinc-800 dark:text-zinc-200 truncate mb-1.5">
-                        {sourceAudioTitle || getAudioLabel(sourceAudioUrl)}
-                      </div>
-                      <div className="flex items-center gap-2">
-                        <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(sourceTime)}</span>
-                        <div
-                          className="relative flex-1 h-1.5 rounded-full bg-zinc-200 dark:bg-white/10 cursor-pointer group/seek"
-                          onClick={(e) => {
-                            if (sourceAudioRef.current && sourceDuration > 0) {
-                              const rect = e.currentTarget.getBoundingClientRect();
-                              const percent = (e.clientX - rect.left) / rect.width;
-                              sourceAudioRef.current.currentTime = percent * sourceDuration;
-                            }
-                          }}
-                        >
-                          {/* Repaint region overlay */}
-                          {renderRepaintRegionOverlay()}
-                          <div
-                            className="h-full bg-gradient-to-r from-emerald-500 to-teal-500 rounded-full transition-all relative"
-                            style={{ width: sourceDuration ? `${Math.min(100, (sourceTime / sourceDuration) * 100)}%` : '0%' }}
-                          >
-                            <div className="absolute right-0 top-1/2 -translate-y-1/2 w-2.5 h-2.5 rounded-full bg-white shadow-md opacity-0 group-hover/seek:opacity-100 transition-opacity" />
-                          </div>
-                        </div>
-                        <span className="text-[10px] text-zinc-400 tabular-nums">{formatTime(sourceDuration)}</span>
-                      </div>
-                    </div>
-                    <button
-                      type="button"
-                      onClick={handleClearSourceAudio}
-                      className="p-1.5 rounded-full hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-400 hover:text-zinc-600 dark:hover:text-white transition-colors"
-                    >
-                      <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12"/></svg>
-                    </button>
-                  </div>
-                )}
-
-                {/* Cover / Repaint mode toggle (shown when source audio is loaded) */}
-                {audioTab === 'source' && sourceAudioUrl && (
-                  <div className="space-y-2">
-                    {/* Mode toggle: Cover vs Repaint */}
-                    <div className="flex items-center gap-1 bg-zinc-100 dark:bg-black/20 rounded-lg p-0.5">
-                      <button
-                        type="button"
-                        onClick={() => setTaskType('cover')}
-                        className={`flex-1 py-1.5 rounded-md text-[11px] font-medium transition-all ${
-                          taskType !== 'repaint'
-                            ? 'bg-white dark:bg-zinc-700 text-zinc-900 dark:text-white shadow-sm'
-                            : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200'
-                        }`}
-                      >
-                        {t('coverMode')}
-                      </button>
-                      <button
-                        type="button"
-                        onClick={() => setTaskType('repaint')}
-                        className={`flex-1 py-1.5 rounded-md text-[11px] font-medium transition-all ${
-                          taskType === 'repaint'
-                            ? 'bg-white dark:bg-zinc-700 text-zinc-900 dark:text-white shadow-sm'
-                            : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200'
-                        }`}
-                      >
-                        {t('repaintMode')}
-                      </button>
-                    </div>
-
-                    {/* Mode description */}
-                    <p className="text-[10px] text-zinc-400 dark:text-zinc-500 px-0.5">
-                      {taskType === 'repaint' ? t('repaintModeDescription') : t('coverModeDescription')}
-                    </p>
-
-                    {/* Cover strength slider (only in cover mode) */}
-                    {taskType !== 'repaint' && (
-                      <div className="flex items-center gap-2">
-                        <label className="text-[10px] text-zinc-500 dark:text-zinc-400 whitespace-nowrap">{t('audioCoverStrength')}</label>
-                        <input
-                          type="range"
-                          min="0"
-                          max="1"
-                          step="0.05"
-                          value={audioCoverStrength}
-                          onChange={(e) => setAudioCoverStrength(Number(e.target.value))}
-                          className="flex-1 h-1.5 accent-emerald-500"
-                        />
-                        <span className="text-[10px] text-zinc-400 tabular-nums w-7 text-right">{audioCoverStrength.toFixed(2)}</span>
-                      </div>
-                    )}
-
-                    {/* Repaint time range (only in repaint mode) */}
-                    {taskType === 'repaint' && (
-                      <div className="grid grid-cols-2 gap-2">
-                        <div className="space-y-1">
-                          <label className="text-[10px] text-zinc-500 dark:text-zinc-400">
-                            {t('repaintStart')}
-                            {sourceDuration > 0 && <span className="text-zinc-400 ml-1">(max {formatTime(sourceDuration)})</span>}
-                          </label>
-                          <input
-                            type="number"
-                            step="0.1"
-                            min="0"
-                            max={sourceDuration > 0 ? sourceDuration : undefined}
-                            placeholder={t('repaintStartPlaceholder')}
-                            value={repaintingStart >= 0 ? repaintingStart : ''}
-                            onChange={(e) => setRepaintingStart(e.target.value === '' ? -1 : Number(e.target.value))}
-                            className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none focus:border-emerald-500 dark:focus:border-emerald-500 transition-colors"
-                          />
-                        </div>
-                        <div className="space-y-1">
-                          <label className="text-[10px] text-zinc-500 dark:text-zinc-400">
-                            {t('repaintEnd')}
-                            {sourceDuration > 0 && <span className="text-zinc-400 ml-1">(max {formatTime(sourceDuration)})</span>}
-                          </label>
-                          <input
-                            type="number"
-                            step="0.1"
-                            min="0"
-                            max={sourceDuration > 0 ? sourceDuration : undefined}
-                            placeholder={t('repaintEndPlaceholder')}
-                            value={repaintingEnd >= 0 ? repaintingEnd : ''}
-                            onChange={(e) => setRepaintingEnd(e.target.value === '' ? -1 : Number(e.target.value))}
-                            className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none focus:border-emerald-500 dark:focus:border-emerald-500 transition-colors"
-                          />
-                        </div>
-                      </div>
-                    )}
-
-                    {/* SFT model status banner (shown when repaint mode active) */}
-                    {taskType === 'repaint' && sftStatus !== 'idle' && (
-                      <div className={`flex items-center gap-2 px-3 py-2 rounded-lg text-[11px] font-medium ${
-                        sftStatus === 'available'
-                          ? 'bg-emerald-50 dark:bg-emerald-900/20 text-emerald-700 dark:text-emerald-400'
-                          : sftStatus === 'downloading' || sftStatus === 'checking'
-                          ? 'bg-amber-50 dark:bg-amber-900/20 text-amber-700 dark:text-amber-400'
-                          : 'bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-400'
-                      }`}>
-                        {sftStatus === 'available' && <CheckCircle2 size={13} />}
-                        {(sftStatus === 'downloading' || sftStatus === 'checking') && <Loader2 size={13} className="animate-spin" />}
-                        {sftStatus === 'unavailable' && <AlertTriangle size={13} />}
-                        <span className="flex-1">
-                          {sftStatus === 'available' && t('sftModelReady')}
-                          {sftStatus === 'checking' && t('sftModelRequired')}
-                          {sftStatus === 'downloading' && t('sftModelDownloading')}
-                          {sftStatus === 'unavailable' && t('sftModelNotFound')}
-                        </span>
-                        {sftStatus === 'unavailable' && (
-                          <a
-                            href="/models"
-                            onClick={(e) => { e.preventDefault(); window.history.pushState({}, '', '/models'); window.dispatchEvent(new PopStateEvent('popstate')); }}
-                            className="flex items-center gap-0.5 underline underline-offset-2"
-                          >
-                            Models <ExternalLink size={10} />
-                          </a>
-                        )}
-                      </div>
-                    )}
-                  </div>
-                )}
-
-                {/* Action buttons */}
-                <div className="flex gap-2">
-                  <button
-                    type="button"
-                    onClick={() => openAudioModal(audioTab, 'uploads')}
-                    className="flex-1 flex items-center justify-center gap-1.5 rounded-lg bg-zinc-100 dark:bg-white/5 hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-700 dark:text-zinc-300 px-3 py-2 text-xs font-medium transition-colors border border-zinc-200 dark:border-white/5"
-                  >
-                    <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
-                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 19V6l12-3v13M9 19c0 1.105-1.343 2-3 2s-3-.895-3-2 1.343-2 3-2 3 .895 3 2zm12-3c0 1.105-1.343 2-3 2s-3-.895-3-2 1.343-2 3-2 3 .895 3 2zM9 10l12-3"/>
-                    </svg>
-                    {t('fromLibrary')}
-                  </button>
-                  <button
-                    type="button"
-                    onClick={() => {
-                      const input = audioTab === 'reference' ? referenceInputRef.current : sourceInputRef.current;
-                      input?.click();
-                    }}
-                    className="flex-1 flex items-center justify-center gap-1.5 rounded-lg bg-zinc-100 dark:bg-white/5 hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-700 dark:text-zinc-300 px-3 py-2 text-xs font-medium transition-colors border border-zinc-200 dark:border-white/5"
-                  >
-                    <svg className="w-3.5 h-3.5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
-                      <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M4 16v1a3 3 0 003 3h10a3 3 0 003-3v-1m-4-8l-4-4m0 0L8 8m4-4v12"/>
-                    </svg>
-                    {t('upload')}
-                  </button>
-                </div>
-              </div>
-            </div>
-
-            {/* Lyrics Input */}
-            <div
-              ref={lyricsRef}
-              className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden transition-colors group focus-within:border-zinc-400 dark:focus-within:border-white/20 relative flex flex-col"
-              style={{ height: 'auto' }}
-            >
-              <div className="flex items-center justify-between px-3 py-2.5 bg-zinc-50 dark:bg-white/5 border-b border-zinc-100 dark:border-white/5 flex-shrink-0">
-                <div>
-                  <span className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">{t('lyrics')}</span>
-                  <p className="text-[11px] text-zinc-400 dark:text-zinc-500 mt-0.5">{t('leaveLyricsEmpty')}</p>
-                </div>
-                <div className="flex items-center gap-2">
-                  <button
-                    onClick={() => setInstrumental(!instrumental)}
-                    className={`px-2.5 py-1 rounded-full text-[10px] font-semibold border transition-colors ${
-                      instrumental
-                        ? 'bg-pink-600 text-white border-pink-500'
-                        : 'bg-white dark:bg-suno-card border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-200 hover:bg-zinc-100 dark:hover:bg-white/10'
-                    }`}
-                  >
-                    {instrumental ? t('instrumental') : t('vocal')}
-                  </button>
-                  <button
-                    className={`p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded transition-colors ${isFormattingLyrics ? 'text-pink-500' : 'text-zinc-500 hover:text-black dark:hover:text-white'}`}
-                    title="AI Format - Enhance style & auto-fill parameters"
-                    onClick={() => handleFormat('lyrics')}
-                    disabled={isFormattingLyrics || !style.trim()}
-                  >
-                    {isFormattingLyrics ? <Loader2 size={14} className="animate-spin" /> : <Sparkles size={14} />}
-                  </button>
-                  <button
-                    className="p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded text-zinc-500 hover:text-black dark:hover:text-white transition-colors"
-                    onClick={() => setLyrics('')}
-                  >
-                    <Trash2 size={14} />
-                  </button>
-                </div>
-              </div>
-              <textarea
-                disabled={instrumental}
-                value={lyrics}
-                onChange={(e) => setLyrics(e.target.value)}
-                placeholder={instrumental ? t('instrumental') + ' mode' : t('lyricsPlaceholder')}
-                className={`w-full bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none resize-none font-mono leading-relaxed ${instrumental ? 'opacity-30 cursor-not-allowed' : ''}`}
-                style={{ height: `${lyricsHeight}px` }}
-              />
-              {/* Resize Handle */}
-              <div
-                onMouseDown={startResizing}
-                className="h-3 w-full cursor-ns-resize flex items-center justify-center hover:bg-zinc-100 dark:hover:bg-white/5 transition-colors absolute bottom-0 left-0 z-10"
-              >
-                <div className="w-8 h-1 rounded-full bg-zinc-300 dark:bg-zinc-700"></div>
-              </div>
-            </div>
-
-            {/* Style Input */}
-            <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden transition-colors group focus-within:border-zinc-400 dark:focus-within:border-white/20">
-              <div className="flex items-center justify-between px-3 py-2.5 bg-zinc-50 dark:bg-white/5 border-b border-zinc-100 dark:border-white/5">
-                <div>
-                  <div className="flex items-center gap-2">
-                    <span className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">{t('styleOfMusic')}</span>
-                    <button
-                      onClick={() => setEnhance(!enhance)}
-                      className={`flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium transition-all cursor-pointer ${enhance ? 'bg-violet-100 dark:bg-violet-500/20 text-violet-600 dark:text-violet-400' : 'text-zinc-400 dark:text-zinc-500 hover:text-zinc-600 dark:hover:text-zinc-300'}`}
-                      title={t('enhanceTooltip')}
-                    >
-                      <Sparkles size={9} />
-                      <span>{enhance ? 'ON' : 'OFF'}</span>
-                    </button>
-                  </div>
-                  <p className="text-[11px] text-zinc-400 dark:text-zinc-500 mt-0.5">{t('genreMoodInstruments')}</p>
-                </div>
-                <div className="flex items-center gap-1">
-                  <button
-                    className="p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded transition-colors text-zinc-500 hover:text-black dark:hover:text-white"
-                    title={t('refreshGenres')}
-                    onClick={refreshMusicTags}
-                  >
-                    <Dices size={14} />
-                  </button>
-                  <button
-                    className="p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded text-zinc-500 hover:text-black dark:hover:text-white transition-colors"
-                    onClick={() => setStyle('')}
-                  >
-                    <Trash2 size={14} />
-                  </button>
-                  <button
-                    className={`p-1.5 hover:bg-zinc-200 dark:hover:bg-white/10 rounded transition-colors ${isFormattingStyle ? 'text-pink-500' : 'text-zinc-500 hover:text-black dark:hover:text-white'}`}
-                    title="AI Format - Enhance style & auto-fill parameters"
-                    onClick={() => handleFormat('style')}
-                    disabled={isFormattingStyle || !style.trim()}
-                  >
-                    {isFormattingStyle ? <Loader2 size={14} className="animate-spin" /> : <Sparkles size={14} />}
-                  </button>
-                </div>
-              </div>
-              <textarea
-                value={style}
-                onChange={(e) => setStyle(e.target.value)}
-                placeholder={t('stylePlaceholder')}
-                className="w-full h-20 bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none resize-none"
-              />
-              <div className="px-3 pb-3 space-y-3">
-                {/* Quick Tags */}
-                <div className="flex flex-wrap gap-2">
-                  {musicTags.map(tag => (
-                    <button
-                      key={tag}
-                      onClick={() => setStyle(prev => prev ? `${prev}, ${tag}` : tag)}
-                      className="text-[10px] font-medium bg-zinc-100 dark:bg-white/5 hover:bg-zinc-200 dark:hover:bg-white/10 text-zinc-600 dark:text-zinc-400 hover:text-black dark:hover:text-white px-2.5 py-1 rounded-full transition-colors border border-zinc-200 dark:border-white/5"
-                    >
-                      {tag}
-                    </button>
-                  ))}
-                </div>
-              </div>
-            </div>
-
-            {/* Title Input */}
-            <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 overflow-hidden">
-              <div className="px-3 py-2.5 text-xs font-bold uppercase tracking-wide text-zinc-500 dark:text-zinc-400 border-b border-zinc-100 dark:border-white/5 bg-zinc-50 dark:bg-white/5">
-                {t('title')}
-              </div>
-              <input
-                type="text"
-                value={title}
-                onChange={(e) => setTitle(e.target.value)}
-                placeholder={t('nameSong')}
-                className="w-full bg-transparent p-3 text-sm text-zinc-900 dark:text-white placeholder-zinc-400 dark:placeholder-zinc-600 focus:outline-none"
-              />
-            </div>
-          </div>
-        )}
-
-        {/* COMMON SETTINGS */}
-        <div className="space-y-4">
-          {/* Instrumental Toggle (Simple Mode) */}
-          {!customMode && (
-            <div className="flex items-center justify-between px-1 py-2">
-              <div className="flex items-center gap-2">
-                <Music2 size={14} className="text-zinc-500" />
-                <span className="text-sm font-medium text-zinc-700 dark:text-zinc-300">{t('instrumental')}</span>
-              </div>
-              <button
-                onClick={() => setInstrumental(!instrumental)}
-                className={`w-11 h-6 rounded-full flex items-center transition-colors duration-200 px-1 border border-zinc-200 dark:border-white/5 ${instrumental ? 'bg-pink-600' : 'bg-zinc-300 dark:bg-black/40'}`}
-              >
-                <div className={`w-4 h-4 rounded-full bg-white transform transition-transform duration-200 shadow-sm ${instrumental ? 'translate-x-5' : 'translate-x-0'}`} />
-              </button>
-            </div>
-          )}
-
-          {/* Vocal Language (Custom mode) */}
-          {customMode && !instrumental && (
-            <div className="grid grid-cols-2 gap-3">
-              <div className="space-y-1.5">
-                <label className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide px-1">
-                  {t('vocalLanguage')}
-                </label>
-                <select
-                  value={vocalLanguage}
-                  onChange={(e) => setVocalLanguage(e.target.value)}
-                  className="w-full bg-white dark:bg-suno-card border border-zinc-200 dark:border-white/5 rounded-xl px-3 py-2 text-sm text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
-                >
-                  {VOCAL_LANGUAGE_KEYS.map(lang => (
-                    <option key={lang.value} value={lang.value}>{t(lang.key)}</option>
-                  ))}
-                </select>
-              </div>
-              <div className="space-y-1.5">
-                <label className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide px-1">
-                  {t('vocalGender')}
-                </label>
-                <div className="flex items-center gap-2">
-                  <button
-                    type="button"
-                    onClick={() => setVocalGender(vocalGender === 'male' ? '' : 'male')}
-                    className={`flex-1 px-3 py-2 rounded-lg text-xs font-semibold border transition-colors ${vocalGender === 'male' ? 'bg-pink-600 text-white border-pink-600' : 'border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-300 hover:border-zinc-300 dark:hover:border-white/20'}`}
-                  >
-                    {t('male')}
-                  </button>
-                  <button
-                    type="button"
-                    onClick={() => setVocalGender(vocalGender === 'female' ? '' : 'female')}
-                    className={`flex-1 px-3 py-2 rounded-lg text-xs font-semibold border transition-colors ${vocalGender === 'female' ? 'bg-pink-600 text-white border-pink-600' : 'border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-300 hover:border-zinc-300 dark:hover:border-white/20'}`}
-                  >
-                    {t('female')}
-                  </button>
-                </div>
-              </div>
-            </div>
-          )}
-        </div>
+          </div>
 
         {/* LORA CONTROL PANEL */}
-        {customMode && (
-          <>
+        <>
             <button
               onClick={() => setShowLoraPanel(!showLoraPanel)}
               className="w-full flex items-center justify-between px-4 py-3 bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 text-sm font-medium text-zinc-700 dark:text-zinc-300 hover:bg-zinc-50 dark:hover:bg-white/5 transition-colors"
@@ -2292,9 +2139,54 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                   />
                 </div>
               </div>
-            )}
-          </>
-        )}
+            )}
+        </>
+
+        {/* COMMON SETTINGS */}
+        <div className="space-y-4">
+
+
+          {/* Vocal Language (Custom mode) */}
+          {!instrumental && (
+            <div className="grid grid-cols-2 gap-3">
+              <div className="space-y-1.5">
+                <label className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide px-1">
+                  {t('vocalLanguage')}
+                </label>
+                <select
+                  value={vocalLanguage}
+                  onChange={(e) => setVocalLanguage(e.target.value)}
+                  className="w-full bg-white dark:bg-suno-card border border-zinc-200 dark:border-white/5 rounded-xl px-3 py-2 text-sm text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
+                >
+                  {VOCAL_LANGUAGE_KEYS.map(lang => (
+                    <option key={lang.value} value={lang.value}>{t(lang.key)}</option>
+                  ))}
+                </select>
+              </div>
+              <div className="space-y-1.5">
+                <label className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide px-1">
+                  {t('vocalGender')}
+                </label>
+                <div className="flex items-center gap-2">
+                  <button
+                    type="button"
+                    onClick={() => setVocalGender(vocalGender === 'male' ? '' : 'male')}
+                    className={`flex-1 px-3 py-2 rounded-lg text-xs font-semibold border transition-colors ${vocalGender === 'male' ? 'bg-pink-600 text-white border-pink-600' : 'border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-300 hover:border-zinc-300 dark:hover:border-white/20'}`}
+                  >
+                    {t('male')}
+                  </button>
+                  <button
+                    type="button"
+                    onClick={() => setVocalGender(vocalGender === 'female' ? '' : 'female')}
+                    className={`flex-1 px-3 py-2 rounded-lg text-xs font-semibold border transition-colors ${vocalGender === 'female' ? 'bg-pink-600 text-white border-pink-600' : 'border-zinc-200 dark:border-white/10 text-zinc-600 dark:text-zinc-300 hover:border-zinc-300 dark:hover:border-white/20'}`}
+                  >
+                    {t('female')}
+                  </button>
+                </div>
+              </div>
+            </div>
+          )}
+        </div>
 
         {/* MUSIC PARAMETERS */}
         <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 p-4 space-y-4">
@@ -2372,6 +2264,16 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
               />
             </label>
 
+            {uploadError && (
+              <div className="text-[11px] text-rose-500">{uploadError}</div>
+            )}
+
+            {/* ── Output ─────────────────────────────────────────────── */}
+            <div className="flex items-center gap-2 pt-1">
+              <span className="text-[10px] font-bold text-zinc-400 uppercase tracking-widest whitespace-nowrap">Output</span>
+              <div className="flex-1 border-t border-zinc-200 dark:border-white/10" />
+            </div>
+
             {/* Duration */}
             <EditableSlider
               label={t('duration')}
@@ -2382,7 +2284,7 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
               onChange={setDuration}
               formatDisplay={(val) => val === -1 ? t('auto') : `${val}${t('seconds')}`}
               autoLabel={t('auto')}
-              helpText={`${t('auto')} - 10 ${t('min')}`}
+              helpText="Target audio duration in seconds. −1 = LLM picks it. Clamped to [1, 600] s after generation."
             />
 
             {/* Batch Size */}
@@ -2393,14 +2295,21 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
               max={4}
               step={1}
               onChange={setBatchSize}
-              helpText={t('numberOfVariations')}
-              title="Creates multiple variations in a single run. More variations = longer total time."
+              helpText="Number of DiT variations per LM output. All share the same lyrics; differences are timbral."
             />
 
             {/* Bulk Generate */}
             <div className="space-y-2">
               <div className="flex items-center justify-between">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('bulkGenerate')}</label>
+                <div className="flex items-center gap-1.5">
+                  <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('bulkGenerate')}</label>
+                  <span className="relative group/tip inline-flex">
+                    <Info size={12} className="text-zinc-400 cursor-help" />
+                    <span className="pointer-events-none absolute hidden group-hover/tip:block bottom-5 left-0 z-50 w-56 rounded-lg bg-zinc-900 p-2 text-[10px] leading-relaxed text-white shadow-xl">
+                      Queues N fully independent generation jobs (different seeds, different lyrics).
+                    </span>
+                  </span>
+                </div>
                 <span className="text-xs font-mono text-zinc-900 dark:text-white bg-zinc-100 dark:bg-black/20 px-2 py-0.5 rounded">
                   {bulkCount} {t(bulkCount === 1 ? 'job' : 'jobs')}
                 </span>
@@ -2420,95 +2329,20 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                   </button>
                 ))}
               </div>
-              <p className="text-[10px] text-zinc-500">{t('queueMultipleJobs')}</p>
-            </div>
-
-            {/* Inference Steps */}
-            <EditableSlider
-              label={t('inferenceSteps')}
-              value={inferenceSteps}
-              min={1}
-              max={isTurboModel(selectedModel) ? 20 : 200}
-              step={1}
-              onChange={setInferenceSteps}
-              helpText={t('moreStepsBetterQuality')}
-              title="More steps usually improves quality but slows generation."
-            />
-
-            {/* Guidance Scale */}
-            <EditableSlider
-              label={t('guidanceScale')}
-              value={guidanceScale}
-              min={1}
-              max={15}
-              step={0.1}
-              onChange={setGuidanceScale}
-              formatDisplay={(val) => val.toFixed(1)}
-              helpText={t('howCloselyFollowPrompt')}
-              title="How strongly the model follows the prompt. Higher = stricter, lower = freer."
-            />
-
-            {/* Audio Format & Inference Method */}
-            <div className="grid grid-cols-2 gap-3">
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('audioFormat')}</label>
-                <select
-                  value={audioFormat}
-                  onChange={(e) => setAudioFormat(e.target.value as 'mp3' | 'flac')}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-xl px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
-                >
-                  <option value="mp3">{t('mp3Smaller')}</option>
-                  <option value="flac">{t('flacLossless')}</option>
-                </select>
-              </div>
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Deterministic is more repeatable; stochastic adds randomness.">{t('inferMethod')}</label>
-                <select
-                  value={inferMethod}
-                  onChange={(e) => setInferMethod(e.target.value as 'ode' | 'sde')}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-xl px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
-                >
-                  <option value="ode">{t('odeDeterministic')}</option>
-                  <option value="sde">{t('sdeStochastic')}</option>
-                </select>
-              </div>
-            </div>
-
-            {/* LM Backend */}
-            <div className="space-y-1.5">
-              <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('lmBackendLabel')}</label>
-              <select
-                value={lmBackend}
-                onChange={(e) => setLmBackend(e.target.value as 'pt' | 'vllm')}
-                className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none"
-              >
-                <option value="pt">{t('lmBackendPt')}</option>
-                <option value="vllm">{t('lmBackendVllm')}</option>
-              </select>
-              <p className="text-[10px] text-zinc-500">{t('lmBackendHint')}</p>
-            </div>
-
-            {/* LM Model */}
-            <div className="space-y-1.5">
-              <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('lmModelLabel')}</label>
-              <select
-                value={lmModel}
-                onChange={(e) => { const v = e.target.value; setLmModel(v); localStorage.setItem('ace-lmModel', v); }}
-                className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none"
-              >
-                <option value="acestep-5Hz-lm-0.6B">{t('lmModel06B')}</option>
-                <option value="acestep-5Hz-lm-1.7B">{t('lmModel17B')}</option>
-                <option value="acestep-5Hz-lm-4B">{t('lmModel4B')}</option>
-              </select>
-              <p className="text-[10px] text-zinc-500">{t('lmModelHint')}</p>
             </div>
 
             {/* Seed */}
             <div className="space-y-2">
               <div className="flex items-center justify-between">
-                <div className="flex items-center gap-2">
+                <div className="flex items-center gap-1.5">
                   <Dices size={14} className="text-zinc-500" />
-                  <span className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Fixing the seed makes results repeatable. Random is recommended for variety.">{t('seed')}</span>
+                  <span className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('seed')}</span>
+                  <span className="relative group/tip inline-flex">
+                    <Info size={12} className="text-zinc-400 cursor-help" />
+                    <span className="pointer-events-none absolute hidden group-hover/tip:block bottom-5 left-0 z-50 w-56 rounded-lg bg-zinc-900 p-2 text-[10px] leading-relaxed text-white shadow-xl">
+                      RNG seed (int64). −1 = random. Fixed seed makes results repeatable across runs. Batch elements use seed+0, seed+1, …
+                    </span>
+                  </span>
                 </div>
                 <button
                   onClick={() => setRandomSeed(!randomSeed)}
@@ -2531,68 +2365,140 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
               <p className="text-[10px] text-zinc-500">{randomSeed ? t('randomSeedRecommended') : t('fixedSeedReproducible')}</p>
             </div>
 
-            {/* Thinking Toggle */}
-            <div className="flex items-center justify-between py-2 border-t border-zinc-100 dark:border-white/5">
-              <span className={`text-xs font-medium ${loraLoaded ? 'text-zinc-400 dark:text-zinc-600' : 'text-zinc-600 dark:text-zinc-400'}`} title="Lets the lyric model reason about structure and metadata. Slightly slower.">{t('thinkingCot')}</span>
-              <button
-                onClick={() => !loraLoaded && setThinking(!thinking)}
-                disabled={loraLoaded}
-                className={`w-10 h-5 rounded-full flex items-center transition-colors duration-200 px-0.5 border border-zinc-200 dark:border-white/5 ${thinking ? 'bg-pink-600' : 'bg-zinc-300 dark:bg-black/40'} ${loraLoaded ? 'opacity-50 cursor-not-allowed' : 'cursor-pointer'}`}
-              >
-                <div className={`w-4 h-4 rounded-full bg-white transform transition-transform duration-200 shadow-sm ${thinking ? 'translate-x-5' : 'translate-x-0'}`} />
-              </button>
+            {/* ── Output Format ──────────────────────────────────── */}
+            <div className="space-y-2">
+              <div className="flex items-center gap-1.5">
+                <Music2 size={14} className="text-zinc-500" />
+                <span className="text-xs font-medium text-zinc-600 dark:text-zinc-400">Output Format</span>
+                <span className="relative group/tip inline-flex">
+                  <Info size={12} className="text-zinc-400 cursor-help" />
+                  <span className="pointer-events-none absolute hidden group-hover/tip:block bottom-5 left-0 z-50 w-56 rounded-lg bg-zinc-900 px-3 py-2 text-[10px] text-zinc-200 shadow-xl border border-white/10">
+                    MP3 (default): native binary output, smaller file. WAV: lossless, passes --wav flag to dit-vae.
+                  </span>
+                </span>
+              </div>
+              <div className="flex items-center gap-1">
+                {(['mp3', 'wav'] as const).map((fmt) => (
+                  <button
+                    key={fmt}
+                    onClick={() => setAudioFormat(fmt)}
+                    className={`flex-1 py-2 rounded-lg text-xs font-bold uppercase tracking-wide transition-all ${
+                      audioFormat === fmt
+                        ? fmt === 'mp3'
+                          ? 'bg-orange-500 text-white shadow-md'
+                          : 'bg-sky-600 text-white shadow-md'
+                        : 'bg-zinc-100 dark:bg-zinc-800 text-zinc-600 dark:text-zinc-400 hover:bg-zinc-200 dark:hover:bg-zinc-700'
+                    }`}
+                  >
+                    {fmt}
+                  </button>
+                ))}
+              </div>
+              <p className="text-[10px] text-zinc-500">
+                {audioFormat === 'mp3' ? 'Native MP3 — smaller file, no extra conversion step' :
+                 'Lossless WAV — largest file, best quality (adds --wav flag)'}
+              </p>
             </div>
 
+            {/* ── DiT flow matching (dit-vae) ──────────────────────── */}
+            <div className="flex items-center gap-2 pt-1">
+              <span className="text-[10px] font-bold text-zinc-400 uppercase tracking-widest whitespace-nowrap">DiT flow matching (dit-vae)</span>
+              <div className="flex-1 border-t border-zinc-200 dark:border-white/10" />
+            </div>
+
+            {/* Inference Steps */}
+            <EditableSlider
+              label={t('inferenceSteps')}
+              value={inferenceSteps}
+              min={1}
+              max={isTurboModel(selectedModel) ? 20 : 200}
+              step={1}
+              onChange={setInferenceSteps}
+              helpText="Number of denoising steps. Turbo preset: 8. SFT/base preset: 50. More steps = better quality but slower."
+            />
+
+            {/* Guidance Scale */}
+            <EditableSlider
+              label={t('guidanceScale')}
+              value={guidanceScale}
+              min={0}
+              max={15}
+              step={0.1}
+              onChange={setGuidanceScale}
+              formatDisplay={(val) => val.toFixed(1)}
+              helpText="CFG scale for the DiT. 0.0 = auto (resolved to 1.0, CFG disabled). Any value > 1.0 on a turbo model is overridden to 1.0."
+            />
+
             {/* Shift */}
             <EditableSlider
               label={t('shift')}
               value={shift}
-              min={1}
-              max={5}
+              min={0.1}
+              max={8}
               step={0.1}
               onChange={setShift}
               formatDisplay={(val) => val.toFixed(1)}
-              helpText={t('timestepShiftForBase')}
-              title="Adjusts the diffusion schedule. Only affects base model."
+              helpText="Flow-matching schedule shift — controls the timestep distribution (shift = s·t / (1+(s−1)·t)). Turbo preset: 3.0. SFT/lego preset: 1.0. Values near 1.0 give a linear schedule; higher values front-load denoising."
             />
 
-            {/* Divider */}
-            <div className="border-t border-zinc-200 dark:border-white/10 pt-4">
-              <p className="text-[10px] text-zinc-500 uppercase tracking-wide font-bold mb-3">{t('expertControls')}</p>
-            </div>
+            {/* ── LM sampling (ace-qwen3) ──────────────────────────── */}
+            <>
+              <div className="flex items-center gap-2 pt-1">
+                  <span className="text-[10px] font-bold text-zinc-400 uppercase tracking-widest whitespace-nowrap">LM sampling (ace-qwen3)</span>
+                  <div className="flex-1 border-t border-zinc-200 dark:border-white/10" />
+                </div>
 
-            {uploadError && (
-              <div className="text-[11px] text-rose-500">{uploadError}</div>
-            )}
+                {/* LM Model */}
+                <div className="space-y-1.5">
+                  <div className="flex items-center gap-1.5">
+                    <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('lmModelLabel')}</label>
+                    <span className="relative group/tip inline-flex">
+                      <Info size={12} className="text-zinc-400 cursor-help" />
+                      <span className="pointer-events-none absolute hidden group-hover/tip:block bottom-5 left-0 z-50 w-56 rounded-lg bg-zinc-900 p-2 text-[10px] leading-relaxed text-white shadow-xl">
+                        ace-qwen3 model size. 0.6B is fastest; 4B produces the best lyrics and captions.
+                      </span>
+                    </span>
+                  </div>
+                  <select
+                    value={lmModel}
+                    onChange={(e) => { const v = e.target.value; setLmModel(v); localStorage.setItem('ace-lmModel', v); }}
+                    className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none"
+                  >
+                    <option value="acestep-5Hz-lm-0.6B">{t('lmModel06B')}</option>
+                    <option value="acestep-5Hz-lm-1.7B">{t('lmModel17B')}</option>
+                    <option value="acestep-5Hz-lm-4B">{t('lmModel4B')}</option>
+                  </select>
+                </div>
 
-            {/* LM Parameters */}
-            <button
-              onClick={() => setShowLmParams(!showLmParams)}
-              className="w-full flex items-center justify-between px-4 py-3 bg-white/60 dark:bg-black/20 rounded-xl border border-zinc-200/70 dark:border-white/10 text-sm font-medium text-zinc-700 dark:text-zinc-300 hover:bg-zinc-50 dark:hover:bg-white/5 transition-colors"
-            >
-              <div className="flex items-center gap-2">
-                <Music2 size={16} className="text-zinc-500" />
-                <div className="flex flex-col items-start">
-                  <span title="Controls the 5Hz lyric/caption model sampling behavior.">{t('lmParameters')}</span>
-                  <span className="text-[11px] text-zinc-400 dark:text-zinc-500 font-normal">{t('controlLyricGeneration')}</span>
+                {/* CoT Caption toggle */}
+                <div className="flex items-center justify-between py-1">
+                  <div className="flex items-center gap-1.5">
+                    <span className="text-xs font-medium text-zinc-600 dark:text-zinc-400">CoT Caption</span>
+                    <span className="relative group/tip inline-flex">
+                      <Info size={12} className="text-zinc-400 cursor-help" />
+                      <span className="pointer-events-none absolute hidden group-hover/tip:block bottom-5 left-0 z-50 w-60 rounded-lg bg-zinc-900 p-2 text-[10px] leading-relaxed text-white shadow-xl">
+                        <strong>use_cot_caption</strong> (default: on) — When enabled, the LLM enriches your caption using chain-of-thought reasoning before passing it to the DiT (only when the LLM is generating missing metadata). Disable to use your caption verbatim without AI rewriting.
+                      </span>
+                    </span>
+                  </div>
+                  <button
+                    onClick={() => setUseCotCaption(!useCotCaption)}
+                    className={`w-10 h-5 rounded-full flex items-center transition-colors duration-200 px-0.5 border border-zinc-200 dark:border-white/5 ${useCotCaption ? 'bg-pink-600' : 'bg-zinc-300 dark:bg-black/40'}`}
+                  >
+                    <div className={`w-4 h-4 rounded-full bg-white transform transition-transform duration-200 shadow-sm ${useCotCaption ? 'translate-x-5' : 'translate-x-0'}`} />
+                  </button>
                 </div>
-              </div>
-              <ChevronDown size={16} className={`text-zinc-500 transition-transform ${showLmParams ? 'rotate-180' : ''}`} />
-            </button>
 
-            {showLmParams && (
-              <div className="bg-white dark:bg-suno-card rounded-xl border border-zinc-200 dark:border-white/5 p-4 space-y-4">
                 {/* LM Temperature */}
                 <EditableSlider
                   label={t('lmTemperature')}
                   value={lmTemperature}
                   min={0}
                   max={2}
-                  step={0.1}
+                  step={0.05}
                   onChange={setLmTemperature}
                   formatDisplay={(val) => val.toFixed(2)}
-                  helpText={t('higherMoreRandom')}
-                  title="Higher temperature = more random word choices."
+                  helpText="Sampling temperature for phase 1 (lyrics/metadata) and phase 2 (audio codes). Lower = more deterministic. Default: 0.85."
                 />
 
                 {/* LM CFG Scale */}
@@ -2600,305 +2506,79 @@ export const CreatePanel: React.FC<CreatePanelProps> = ({
                   label={t('lmCfgScale')}
                   value={lmCfgScale}
                   min={1}
-                  max={3}
+                  max={5}
                   step={0.1}
                   onChange={setLmCfgScale}
                   formatDisplay={(val) => val.toFixed(1)}
-                  helpText={t('noCfgScale')}
-                  title="How strongly the lyric model follows the prompt."
+                  helpText="CFG scale for the LM. Active in phase 2 and in phase 1 when lyrics are provided. 1.0 disables CFG. Default: 2.0."
                 />
 
-                {/* LM Top-K & Top-P */}
-                <div className="grid grid-cols-2 gap-3">
-                  <EditableSlider
-                    label={t('topK')}
-                    value={lmTopK}
-                    min={0}
-                    max={100}
-                    step={1}
-                    onChange={setLmTopK}
-                    title="Restricts choices to the K most likely tokens. 0 disables."
-                  />
-                  <EditableSlider
-                    label={t('topP')}
-                    value={lmTopP}
-                    min={0}
-                    max={1}
-                    step={0.01}
-                    onChange={setLmTopP}
-                    formatDisplay={(val) => val.toFixed(2)}
-                    title="Samples from the smallest set whose total probability is P."
-                  />
-                </div>
+                {/* LM Top-P */}
+                <EditableSlider
+                  label={t('topP')}
+                  value={lmTopP}
+                  min={0}
+                  max={1}
+                  step={0.01}
+                  onChange={setLmTopP}
+                  formatDisplay={(val) => val.toFixed(2)}
+                  helpText="Nucleus sampling cutoff. 1.0 disables. Default: 0.9."
+                />
+
+                {/* LM Top-K */}
+                <EditableSlider
+                  label={t('topK')}
+                  value={lmTopK}
+                  min={0}
+                  max={200}
+                  step={1}
+                  onChange={setLmTopK}
+                  helpText="Top-K sampling. 0 disables hard top-K (top_p still applies). Default: 0."
+                />
 
                 {/* LM Negative Prompt */}
                 <div className="space-y-1.5">
-                  <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Words or ideas to steer the lyric model away from.">{t('lmNegativePrompt')}</label>
+                  <div className="flex items-center gap-1.5">
+                    <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('lmNegativePrompt')}</label>
+                    <span className="relative group/tip inline-flex">
+                      <Info size={12} className="text-zinc-400 cursor-help" />
+                      <span className="pointer-events-none absolute hidden group-hover/tip:block bottom-5 left-0 z-50 w-60 rounded-lg bg-zinc-900 p-2 text-[10px] leading-relaxed text-white shadow-xl">
+                        Negative caption for CFG in phase 2. Steers the LM away from these words/concepts. Empty string falls back to a caption-less unconditional prompt.
+                      </span>
+                    </span>
+                  </div>
                   <textarea
                     value={lmNegativePrompt}
                     onChange={(e) => setLmNegativePrompt(e.target.value)}
                     placeholder={t('thingsToAvoid')}
                     className="w-full h-16 bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg p-2 text-xs text-zinc-900 dark:text-white focus:outline-none resize-none"
                   />
-                  <p className="text-[10px] text-zinc-500">{t('useWhenCfgScaleGreater')}</p>
                 </div>
-              </div>
-            )}
+            </>
 
-            <div className="space-y-1">
-              <h4 className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide" title="Controls how much the output follows the input audio.">{t('transform')}</h4>
-              <p className="text-[11px] text-zinc-400 dark:text-zinc-500">{t('controlSourceAudio')}</p>
+            {/* ── Passthrough ──────────────────────────────────────── */}
+            <div className="flex items-center gap-2 pt-1">
+              <span className="text-[10px] font-bold text-zinc-400 uppercase tracking-widest whitespace-nowrap">Passthrough</span>
+              <div className="flex-1 border-t border-zinc-200 dark:border-white/10" />
             </div>
+
+            {/* Audio Codes */}
             <div className="space-y-1.5">
-              <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Advanced: precomputed audio codes for conditioning.">{t('audioCodes')}</label>
+              <div className="flex items-center gap-1.5">
+                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('audioCodes')}</label>
+                <span className="relative group/tip inline-flex">
+                  <Info size={12} className="text-zinc-400 cursor-help" />
+                  <span className="pointer-events-none absolute hidden group-hover/tip:block bottom-5 left-0 z-50 w-64 rounded-lg bg-zinc-900 p-2 text-[10px] leading-relaxed text-white shadow-xl">
+                    Comma-separated FSQ token IDs produced by ace-qwen3. When non-empty, the entire LLM pass is skipped and dit-vae decodes these codes directly (passthrough mode).
+                  </span>
+                </span>
+              </div>
               <textarea
                 value={audioCodes}
                 onChange={(e) => setAudioCodes(e.target.value)}
                 placeholder={t('optionalAudioCodes')}
                 className="w-full h-16 bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg p-2 text-xs text-zinc-900 dark:text-white focus:outline-none resize-none"
               />
-              <div className="flex gap-2">
-                <button
-                  type="button"
-                  onClick={() => {
-                    // Convert source audio to LM codes — requires Gradio lambda (not exposed as API)
-                    // This is a placeholder: Gradio's convert_src_audio_to_codes_wrapper is not a named endpoint
-                    console.log('Convert to Codes: requires source audio upload. Use Gradio UI for this feature.');
-                  }}
-                  disabled={!sourceAudioUrl}
-                  title="Convert source audio to LM codes (requires source audio)"
-                  className="px-2 py-1 rounded text-[10px] font-medium bg-zinc-100 dark:bg-zinc-800 text-zinc-500 dark:text-zinc-400 hover:bg-zinc-200 dark:hover:bg-zinc-700 disabled:opacity-40 disabled:cursor-not-allowed transition-colors"
-                >
-                  Convert to Codes
-                </button>
-                <button
-                  type="button"
-                  onClick={() => {
-                    // Transcribe audio codes to metadata — requires Gradio lambda (not exposed as API)
-                    console.log('Transcribe: requires audio codes. Use Gradio UI for this feature.');
-                  }}
-                  disabled={!audioCodes.trim()}
-                  title="Transcribe audio codes to metadata (requires audio codes)"
-                  className="px-2 py-1 rounded text-[10px] font-medium bg-zinc-100 dark:bg-zinc-800 text-zinc-500 dark:text-zinc-400 hover:bg-zinc-200 dark:hover:bg-zinc-700 disabled:opacity-40 disabled:cursor-not-allowed transition-colors"
-                >
-                  Transcribe
-                </button>
-              </div>
-            </div>
-
-            <div className="grid grid-cols-2 gap-3">
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Choose text-to-music or audio-based modes.">{t('taskType')}</label>
-                <select
-                  value={taskType}
-                  onChange={(e) => setTaskType(e.target.value)}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-xl px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none focus:border-pink-500 dark:focus:border-pink-500 transition-colors cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800 [&>option]:text-zinc-900 [&>option]:dark:text-white"
-                >
-                  <option value="text2music">{t('textToMusic')}</option>
-                  <option value="audio2audio">{t('audio2audio')}</option>
-                  <option value="cover">{t('coverTask')}</option>
-                  <option value="repaint">{t('repaintTask')}</option>
-                </select>
-              </div>
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="How strongly the source audio shapes the result.">{t('audioCoverStrength')}</label>
-                <input
-                  type="number"
-                  step="0.01"
-                  min="0"
-                  max="1"
-                  value={audioCoverStrength}
-                  onChange={(e) => setAudioCoverStrength(Number(e.target.value))}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-                />
-              </div>
-            </div>
-
-            <div className="grid grid-cols-2 gap-3">
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Start time for the region to repaint (seconds).">{t('repaintingStart')}</label>
-                <input
-                  type="number"
-                  step="0.1"
-                  value={repaintingStart}
-                  onChange={(e) => setRepaintingStart(Number(e.target.value))}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-                />
-              </div>
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="End time for the region to repaint (seconds).">{t('repaintingEnd')}</label>
-                <input
-                  type="number"
-                  step="0.1"
-                  value={repaintingEnd}
-                  onChange={(e) => setRepaintingEnd(Number(e.target.value))}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-                />
-              </div>
-            </div>
-
-            <div className="space-y-1.5">
-              <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Additional directives to guide generation.">{t('instruction')}</label>
-              <textarea
-                value={instruction}
-                onChange={(e) => setInstruction(e.target.value)}
-                className="w-full h-16 bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg p-2 text-xs text-zinc-900 dark:text-white focus:outline-none resize-none"
-              />
-            </div>
-
-            <div className="space-y-1">
-              <h4 className="text-xs font-bold text-zinc-500 dark:text-zinc-400 uppercase tracking-wide">{t('guidance')}</h4>
-              <p className="text-[11px] text-zinc-400 dark:text-zinc-500">{t('advancedCfgScheduling')}</p>
-            </div>
-            <div className="grid grid-cols-2 gap-3">
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Fraction of the diffusion process to start applying guidance.">{t('cfgIntervalStart')}</label>
-                <input
-                  type="number"
-                  step="0.01"
-                  min="0"
-                  max="1"
-                  value={cfgIntervalStart}
-                  onChange={(e) => setCfgIntervalStart(Number(e.target.value))}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-                />
-              </div>
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Fraction of the diffusion process to stop applying guidance.">{t('cfgIntervalEnd')}</label>
-                <input
-                  type="number"
-                  step="0.01"
-                  min="0"
-                  max="1"
-                  value={cfgIntervalEnd}
-                  onChange={(e) => setCfgIntervalEnd(Number(e.target.value))}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-                />
-              </div>
-            </div>
-
-            <div className="space-y-1.5">
-              <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Override the default timestep schedule (advanced).">{t('customTimesteps')}</label>
-              <input
-                type="text"
-                value={customTimesteps}
-                onChange={(e) => setCustomTimesteps(e.target.value)}
-                placeholder={t('timestepsPlaceholder')}
-                className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-              />
-            </div>
-
-            <div className="grid grid-cols-2 gap-3">
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Scales score-based guidance (advanced).">{t('scoreScale')}</label>
-                <input
-                  type="number"
-                  step="0.01"
-                  min="0.01"
-                  max="1"
-                  value={scoreScale}
-                  onChange={(e) => setScoreScale(Number(e.target.value))}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-                />
-              </div>
-              <div className="space-y-1.5">
-                <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Bigger chunks can be faster but use more memory.">{t('lmBatchChunkSize')}</label>
-                <input
-                  type="number"
-                  min="1"
-                  max="32"
-                  step="1"
-                  value={lmBatchChunkSize}
-                  onChange={(e) => setLmBatchChunkSize(Number(e.target.value))}
-                  className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-3 py-2 text-xs text-zinc-900 dark:text-white focus:outline-none"
-                />
-              </div>
-            </div>
-
-            <div className="space-y-1.5">
-              <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('trackName')}</label>
-              <select
-                value={trackName}
-                onChange={(e) => setTrackName(e.target.value)}
-                className="w-full bg-zinc-50 dark:bg-black/20 border border-zinc-200 dark:border-white/10 rounded-lg px-2 py-1.5 text-xs text-zinc-900 dark:text-white focus:outline-none cursor-pointer [&>option]:bg-white [&>option]:dark:bg-zinc-800"
-              >
-                <option value="">None</option>
-                {TRACK_NAMES.map(name => (
-                  <option key={name} value={name}>{name}</option>
-                ))}
-              </select>
-            </div>
-
-            <div className="space-y-1.5">
-              <label className="text-xs font-medium text-zinc-600 dark:text-zinc-400">{t('completeTrackClasses')}</label>
-              <div className="flex flex-wrap gap-2">
-                {TRACK_NAMES.map(name => {
-                  const selected = completeTrackClasses.split(',').map(s => s.trim()).filter(Boolean);
-                  const isChecked = selected.includes(name);
-                  return (
-                    <label key={name} className="flex items-center gap-1 text-[10px] font-medium text-zinc-500 dark:text-zinc-400 cursor-pointer">
-                      <input
-                        type="checkbox"
-                        checked={isChecked}
-                        onChange={() => {
-                          const next = isChecked
-                            ? selected.filter(s => s !== name)
-                            : [...selected, name];
-                          setCompleteTrackClasses(next.join(','));
-                        }}
-                        className="accent-pink-600"
-                      />
-                      {name}
-                    </label>
-                  );
-                })}
-              </div>
-            </div>
-
-            <div className="grid grid-cols-2 gap-3">
-              <label
-                className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400"
-                title="Adaptive Dual Guidance: dynamically adjusts CFG for quality. Base model only; slower."
-              >
-                <input type="checkbox" checked={useAdg} onChange={() => setUseAdg(!useAdg)} />
-                {t('useAdg')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Allow the LM to run in larger batches for speed (more VRAM).">
-                <input type="checkbox" checked={allowLmBatch} onChange={() => setAllowLmBatch(!allowLmBatch)} />
-                {t('allowLmBatch')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Let the LM reason about metadata like BPM, key, duration.">
-                <input type="checkbox" checked={useCotMetas} onChange={() => setUseCotMetas(!useCotMetas)} />
-                {t('useCotMetas')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Let the LM reason about the caption/style text.">
-                <input type="checkbox" checked={useCotCaption} onChange={() => setUseCotCaption(!useCotCaption)} />
-                {t('useCotCaption')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Let the LM reason about language selection.">
-                <input type="checkbox" checked={useCotLanguage} onChange={() => setUseCotLanguage(!useCotLanguage)} />
-                {t('useCotLanguage')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Auto-generate missing fields when possible.">
-                <input type="checkbox" checked={autogen} onChange={() => setAutogen(!autogen)} />
-                {t('autogen')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Include debug info for constrained decoding.">
-                <input type="checkbox" checked={constrainedDecodingDebug} onChange={() => setConstrainedDecodingDebug(!constrainedDecodingDebug)} />
-                {t('constrainedDecodingDebug')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Use the formatted caption produced by the AI formatter.">
-                <input type="checkbox" checked={isFormatCaption} onChange={() => setIsFormatCaption(!isFormatCaption)} />
-                {t('formatCaption')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Return scorer outputs for diagnostics.">
-                <input type="checkbox" checked={getScores} onChange={() => setGetScores(!getScores)} />
-                {t('getScores')}
-              </label>
-              <label className="flex items-center gap-2 text-xs font-medium text-zinc-600 dark:text-zinc-400" title="Return synced lyric (LRC) output when available.">
-                <input type="checkbox" checked={getLrc} onChange={() => setGetLrc(!getLrc)} />
-                {t('getLrcLyrics')}
-              </label>
             </div>
           </div>
         )}
diff --git a/components/DebugPanel.tsx b/components/DebugPanel.tsx
new file mode 100644
index 0000000..e3f336f
--- /dev/null
+++ b/components/DebugPanel.tsx
@@ -0,0 +1,284 @@
+import React, { useState, useEffect, useRef, useCallback } from 'react';
+import { useAuth } from '../context/AuthContext';
+import { Trash2, RefreshCw, ChevronDown } from 'lucide-react';
+
+interface JobSummary {
+  jobId: string;
+  status: string;
+  startTime: number;
+  stage?: string;
+  logCount: number;
+}
+
+const POLL_INTERVAL_MS = 1500;
+
+// Module-level rolling log that survives component remounts (persists for the tab session)
+const rollingLog: { jobId: string; startTime: number; status: string; lines: string[] }[] = [];
+const offsetByJob = new Map<string, number>();
+
+export const DebugPanel: React.FC = () => {
+  const { token } = useAuth();
+
+  const [jobs, setJobs] = useState<JobSummary[]>([]);
+  const [selectedJobId, setSelectedJobId] = useState<string>('');
+  // Combined display lines (all jobs, newest at bottom)
+  const [displayLines, setDisplayLines] = useState<string[]>(() =>
+    rollingLog.flatMap(entry => [
+      `=== Job ${entry.jobId.slice(-8)} [${new Date(entry.startTime).toLocaleTimeString()}] — ${entry.status.toUpperCase()} ===`,
+      ...entry.lines,
+    ])
+  );
+  const [autoScroll, setAutoScroll] = useState(true);
+
+  const consoleRef = useRef<HTMLPreElement>(null);
+  const pollTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const selectedJobRef = useRef('');
+
+  // Keep ref in sync
+  useEffect(() => { selectedJobRef.current = selectedJobId; }, [selectedJobId]);
+
+  const appendToDisplay = useCallback((newLines: string[]) => {
+    setDisplayLines(prev => [...prev, ...newLines]);
+  }, []);
+
+  const fetchJobList = useCallback(async () => {
+    if (!token) return;
+    try {
+      const res = await fetch('/api/generate/logs', {
+        headers: { Authorization: `Bearer ${token}` },
+      });
+      if (res.ok) {
+        const data = await res.json();
+        const jobList: JobSummary[] = data.jobs || [];
+        setJobs(jobList);
+
+        // For any newly discovered job, ensure it has an entry in rollingLog
+        for (const j of jobList) {
+          if (!rollingLog.find(e => e.jobId === j.jobId)) {
+            rollingLog.push({ jobId: j.jobId, startTime: j.startTime, status: j.status, lines: [] });
+            offsetByJob.set(j.jobId, 0);
+          } else {
+            // Update status
+            const entry = rollingLog.find(e => e.jobId === j.jobId)!;
+            entry.status = j.status;
+          }
+        }
+
+        // Auto-select the most recent job if none is selected
+        if (!selectedJobRef.current && jobList.length > 0) {
+          setSelectedJobId(jobList[0].jobId);
+        }
+      }
+    } catch { /* ignore */ }
+  }, [token]);
+
+  const fetchLogsForAllJobs = useCallback(async () => {
+    if (!token) return;
+
+    for (const entry of rollingLog) {
+      const currentOffset = offsetByJob.get(entry.jobId) ?? 0;
+      try {
+        const res = await fetch(`/api/generate/logs/${entry.jobId}?after=${currentOffset}`, {
+          headers: { Authorization: `Bearer ${token}` },
+        });
+        if (res.ok) {
+          const data = await res.json();
+          if (data.lines && data.lines.length > 0) {
+            const isNew = currentOffset === 0 && entry.lines.length === 0;
+            const newLines: string[] = isNew
+              ? [`=== Job ${entry.jobId.slice(-8)} [${new Date(entry.startTime).toLocaleTimeString()}] — ${entry.status.toUpperCase()} ===`, ...data.lines]
+              : data.lines;
+            entry.lines.push(...data.lines);
+            offsetByJob.set(entry.jobId, currentOffset + data.lines.length);
+            appendToDisplay(newLines);
+          }
+        }
+      } catch { /* ignore */ }
+    }
+  }, [token, appendToDisplay]);
+
+  // Poll loop
+  const schedulePoll = useCallback(() => {
+    if (pollTimerRef.current) clearTimeout(pollTimerRef.current);
+    pollTimerRef.current = setTimeout(async () => {
+      await fetchJobList();
+      await fetchLogsForAllJobs();
+      schedulePoll();
+    }, POLL_INTERVAL_MS);
+  }, [fetchJobList, fetchLogsForAllJobs]);
+
+  useEffect(() => {
+    void fetchJobList().then(() => fetchLogsForAllJobs());
+    schedulePoll();
+    return () => {
+      if (pollTimerRef.current) clearTimeout(pollTimerRef.current);
+    };
+  }, [fetchJobList, fetchLogsForAllJobs, schedulePoll]);
+
+  // Auto-scroll to bottom when new lines arrive
+  useEffect(() => {
+    if (autoScroll && consoleRef.current) {
+      consoleRef.current.scrollTop = consoleRef.current.scrollHeight;
+    }
+  }, [displayLines, autoScroll]);
+
+  // Scroll to the section for the selected job
+  useEffect(() => {
+    if (!selectedJobId || !consoleRef.current) return;
+    const pre = consoleRef.current;
+    // Walk through child spans to find the matching line
+    const spans = pre.querySelectorAll('span[data-job]');
+    for (const span of Array.from(spans)) {
+      if ((span as HTMLElement).dataset.job === selectedJobId) {
+        (span as HTMLElement).scrollIntoView({ behavior: 'smooth', block: 'start' });
+        break;
+      }
+    }
+  }, [selectedJobId]);
+
+  const handleScroll = () => {
+    if (!consoleRef.current) return;
+    const { scrollTop, scrollHeight, clientHeight } = consoleRef.current;
+    const atBottom = scrollHeight - scrollTop - clientHeight < 40;
+    setAutoScroll(atBottom);
+  };
+
+  const handleClear = () => {
+    setDisplayLines([]);
+    // Clear the module-level log too so it doesn't resurface on remount
+    rollingLog.length = 0;
+    offsetByJob.clear();
+  };
+
+  const handleRefresh = async () => {
+    await fetchJobList();
+    await fetchLogsForAllJobs();
+  };
+
+  const scrollToBottom = () => {
+    if (consoleRef.current) {
+      consoleRef.current.scrollTop = consoleRef.current.scrollHeight;
+    }
+    setAutoScroll(true);
+  };
+
+  const formatTime = (ts: number) => new Date(ts).toLocaleTimeString();
+
+  const statusColor = (s: string) => {
+    if (s === 'succeeded') return 'text-green-400';
+    if (s === 'failed') return 'text-red-400';
+    if (s === 'running') return 'text-amber-400';
+    return 'text-zinc-400';
+  };
+
+  const colorize = (line: string) => {
+    if (/^=== .* ===$/.test(line)) return 'text-cyan-300 font-bold';
+    if (/^--- Running /.test(line) || /^\$ /.test(line)) return 'text-emerald-400 font-semibold';
+    if (/error|Error|failed|Failed|FAILED/i.test(line)) return 'text-red-400';
+    if (/warning|Warning/i.test(line)) return 'text-amber-400';
+    if (/^\[DiT\]/.test(line)) return 'text-sky-300';
+    if (/^\[VAE\]/.test(line)) return 'text-violet-300';
+    if (/^\[Phase1\]|\[Phase2\]|\[Decode\]/.test(line)) return 'text-pink-300';
+    if (/^\[stdout\]/.test(line)) return 'text-zinc-400';
+    return 'text-green-300';
+  };
+
+  // Determine which job each display line belongs to (for scroll-to-job)
+  const getJobIdForLine = (line: string): string | null => {
+    const m = line.match(/^=== Job ([0-9a-f]{8}) \[/);
+    if (!m) return null;
+    const suffix = m[1];
+    return jobs.find(j => j.jobId.endsWith(suffix))?.jobId ?? null;
+  };
+
+  return (
+    <div className="flex flex-col h-full bg-zinc-950 text-green-300 font-mono">
+      {/* Toolbar */}
+      <div className="flex items-center gap-3 px-4 py-2.5 bg-zinc-900 border-b border-zinc-800 flex-shrink-0">
+        <span className="text-xs font-bold text-zinc-300 uppercase tracking-widest">Debug Console</span>
+        <span className="text-[9px] text-zinc-500 italic">rolling log — all jobs</span>
+
+        {/* Job jump selector */}
+        <div className="relative flex-1 max-w-xs">
+          <select
+            value={selectedJobId}
+            onChange={(e) => setSelectedJobId(e.target.value)}
+            className="w-full bg-zinc-800 border border-zinc-700 rounded px-2 py-1 text-xs text-zinc-200 focus:outline-none focus:border-emerald-500 appearance-none pr-6 cursor-pointer"
+          >
+            {jobs.length === 0 && <option value="">No jobs yet</option>}
+            {jobs.map(j => (
+              <option key={j.jobId} value={j.jobId}>
+                [{j.status.toUpperCase()}] {formatTime(j.startTime)} — {j.jobId.slice(-8)} ({j.logCount} lines)
+              </option>
+            ))}
+          </select>
+          <ChevronDown size={12} className="absolute right-2 top-1/2 -translate-y-1/2 text-zinc-500 pointer-events-none" />
+        </div>
+
+        {/* Job status badge */}
+        {selectedJobId && jobs.find(j => j.jobId === selectedJobId) && (
+          <span className={`text-[10px] font-bold uppercase ${statusColor(jobs.find(j => j.jobId === selectedJobId)!.status)}`}>
+            ● {jobs.find(j => j.jobId === selectedJobId)!.status}
+          </span>
+        )}
+
+        <div className="ml-auto flex items-center gap-2">
+          <span className="text-[9px] text-emerald-500 animate-pulse">● LIVE</span>
+          <button
+            onClick={handleRefresh}
+            title="Refresh now"
+            className="p-1.5 rounded hover:bg-zinc-700 text-zinc-400 hover:text-zinc-200 transition-colors"
+          >
+            <RefreshCw size={13} />
+          </button>
+          <button
+            onClick={handleClear}
+            title="Clear all logs"
+            className="p-1.5 rounded hover:bg-zinc-700 text-zinc-400 hover:text-zinc-200 transition-colors"
+          >
+            <Trash2 size={13} />
+          </button>
+        </div>
+      </div>
+
+      {/* Log output */}
+      <pre
+        ref={consoleRef}
+        onScroll={handleScroll}
+        className="flex-1 overflow-y-auto px-4 py-3 text-[11px] leading-[1.6] whitespace-pre-wrap break-all custom-scrollbar"
+        style={{ fontFamily: "'JetBrains Mono', 'Fira Code', 'Cascadia Code', monospace" }}
+      >
+        {displayLines.length === 0 ? (
+          <span className="text-zinc-600">
+            {jobs.length === 0
+              ? 'No generation jobs found. Start a generation to see debug output here.'
+              : 'Waiting for output…'}
+          </span>
+        ) : (
+          displayLines.map((line, i) => {
+            const jobId = getJobIdForLine(line);
+            return (
+              <span
+                key={i}
+                data-job={jobId ?? undefined}
+                className={`block ${colorize(line)}`}
+              >
+                {line}
+              </span>
+            );
+          })
+        )}
+      </pre>
+
+      {/* Footer: scroll-to-bottom hint */}
+      {!autoScroll && displayLines.length > 0 && (
+        <button
+          onClick={scrollToBottom}
+          className="absolute bottom-16 right-6 flex items-center gap-1.5 bg-emerald-700 hover:bg-emerald-600 text-white text-[10px] font-medium px-3 py-1.5 rounded-full shadow-lg transition-colors"
+        >
+          <ChevronDown size={12} /> Scroll to bottom
+        </button>
+      )}
+    </div>
+  );
+};
diff --git a/components/Sidebar.tsx b/components/Sidebar.tsx
index da53a5b..29c7280 100644
--- a/components/Sidebar.tsx
+++ b/components/Sidebar.tsx
@@ -1,5 +1,5 @@
 import React from 'react';
-import { Library, Disc, Search, LogIn, LogOut, Sun, Moon, Newspaper, Box } from 'lucide-react';
+import { Library, Disc, Search, LogIn, LogOut, Sun, Moon, Newspaper, Box, Terminal } from 'lucide-react';
 import { View } from '../types';
 import { useI18n } from '../context/I18nContext';
 
@@ -119,6 +119,13 @@ export const Sidebar: React.FC<SidebarProps> = ({
           onClick={() => onNavigate('models')}
           isExpanded={isOpen}
         />
+        <NavItem
+          icon={<Terminal size={20} />}
+          label={t('debug')}
+          active={currentView === 'debug'}
+          onClick={() => onNavigate('debug')}
+          isExpanded={isOpen}
+        />
 
         <div className="mt-auto flex flex-col gap-2">
           {/* Theme Toggle */}
diff --git a/components/SongList.tsx b/components/SongList.tsx
index 807fadd..2bfba37 100644
--- a/components/SongList.tsx
+++ b/components/SongList.tsx
@@ -641,6 +641,16 @@ const SongItem: React.FC<SongItemProps> = ({
                         <span className="inline-flex items-center justify-center text-[9px] font-bold text-white bg-gradient-to-r from-pink-500 to-purple-500 px-1.5 py-0.5 rounded-sm shadow-sm" title={`DiT model: ${song.ditModel || 'undefined'}`}>
                             {getModelDisplayName(song.ditModel)}
                         </span>
+                        {song.audioUrl && !song.isGenerating && (() => {
+                            const ext = song.audioUrl.split('.').pop()?.toLowerCase();
+                            const fmtLabel = ext === 'mp3' ? 'MP3' : ext === 'wav' ? 'WAV' : null;
+                            const fmtClass = ext === 'mp3' ? 'from-orange-500 to-amber-500' : 'from-sky-500 to-blue-600';
+                            return fmtLabel ? (
+                                <span className={`inline-flex items-center justify-center text-[9px] font-bold text-white bg-gradient-to-r ${fmtClass} px-1.5 py-0.5 rounded-sm shadow-sm`} title={`Audio format: ${fmtLabel}`}>
+                                    {fmtLabel}
+                                </span>
+                            ) : null;
+                        })()}
                         {song.isPublic === false && (
                             <Lock size={12} className="text-zinc-400 dark:text-zinc-500" />
                         )}
diff --git a/i18n/translations.ts b/i18n/translations.ts
index 717471c..dd2a38b 100644
--- a/i18n/translations.ts
+++ b/i18n/translations.ts
@@ -8,6 +8,7 @@ export const translations = {
     search: 'Search',
     models: 'Models',
     news: 'News',
+    debug: 'Debug',
 
     // Theme
     lightMode: 'Light Mode',
@@ -400,6 +401,25 @@ export const translations = {
     sftModelNotFound: 'SFT model not found — go to Models to download it',
     sftModelSwitched: 'Switched to SFT model for repaint',
     autoSwitchedToSft: 'Auto-switched to SFT model (required for repaint)',
+
+    // Lego mode
+    legoMode: 'Lego',
+    legoModeDescription: 'Generate a new instrument track layered over the backing track',
+    legoTrackLabel: 'Instrument Track',
+    legoTrackPlaceholder: 'Select instrument…',
+    legoBaseModelRequired: 'Lego mode requires the base model (acestep-v15-base)',
+    autoSwitchedToBase: 'Auto-switched to base model (required for lego)',
+
+    // Understand
+    understand: 'Understand',
+    understandTooltip: 'Analyse this audio with AI to extract caption, lyrics, BPM and more',
+    understandApply: 'Apply to form',
+    understandApplyCaption: 'Apply caption',
+    understandApplyLyrics: 'Apply lyrics',
+    understandResult: 'Understand Result',
+    understandRunning: 'Analysing audio…',
+    understandError: 'Analysis failed',
+    understandNotAvailable: 'ace-understand binary not found',
     
     // Search Page
     searchSongsPlaceholder: 'Search for songs, playlists, creators, or genres',
@@ -547,6 +567,7 @@ export const translations = {
     search: '搜索',
     models: '模型',
     news: '新闻',
+    debug: '调试',
 
     // Theme
     lightMode: '浅色模式',
@@ -939,6 +960,25 @@ export const translations = {
     sftModelNotFound: '未找到 SFT 模型 — 前往模型页面下载',
     sftModelSwitched: '已切换到 SFT 模型用于重绘',
     autoSwitchedToSft: '已自动切换到 SFT 模型（重绘所需）',
+
+    // Lego mode
+    legoMode: 'Lego',
+    legoModeDescription: '在伴奏音轨上生成新的乐器轨道',
+    legoTrackLabel: '乐器轨道',
+    legoTrackPlaceholder: '选择乐器…',
+    legoBaseModelRequired: 'Lego 模式需要基础模型（acestep-v15-base）',
+    autoSwitchedToBase: '已自动切换到基础模型（Lego 模式所需）',
+
+    // Understand
+    understand: '分析',
+    understandTooltip: '用 AI 分析此音频，提取标题、歌词、BPM 等信息',
+    understandApply: '应用到表单',
+    understandApplyCaption: '应用标题',
+    understandApplyLyrics: '应用歌词',
+    understandResult: '分析结果',
+    understandRunning: '正在分析音频…',
+    understandError: '分析失败',
+    understandNotAvailable: '未找到 ace-understand 程序',
     
     // Search Page
     searchSongsPlaceholder: '搜索歌曲、播放列表、创作者或风格',
@@ -1086,6 +1126,7 @@ export const translations = {
     search: '検索',
     models: 'モデル',
     news: 'ニュース',
+    debug: 'デバッグ',
 
     // Theme
     lightMode: 'ライトモード',
@@ -1478,6 +1519,25 @@ export const translations = {
     sftModelNotFound: 'SFT モデルが見つかりません — モデルページでダウンロード',
     sftModelSwitched: 'リペイント用に SFT モデルに切り替えました',
     autoSwitchedToSft: 'SFT モデルに自動切り替え（リペイントに必要）',
+
+    // Lego mode
+    legoMode: 'Lego',
+    legoModeDescription: 'バッキングトラックに新しい楽器トラックを重ねて生成',
+    legoTrackLabel: '楽器トラック',
+    legoTrackPlaceholder: '楽器を選択…',
+    legoBaseModelRequired: 'Lego モードにはベースモデル（acestep-v15-base）が必要',
+    autoSwitchedToBase: 'ベースモデルに自動切り替え（Lego に必要）',
+
+    // Understand
+    understand: '解析',
+    understandTooltip: 'AI でこの音声を解析し、キャプション、歌詞、BPM などを抽出',
+    understandApply: 'フォームに適用',
+    understandApplyCaption: 'キャプションを適用',
+    understandApplyLyrics: '歌詞を適用',
+    understandResult: '解析結果',
+    understandRunning: '音声を解析中…',
+    understandError: '解析に失敗しました',
+    understandNotAvailable: 'ace-understand バイナリが見つかりません',
     
     // Search Page
     searchSongsPlaceholder: '曲、プレイリスト、クリエイター、スタイルを検索',
@@ -1625,6 +1685,7 @@ export const translations = {
     search: '검색',
     models: '모델',
     news: '뉴스',
+    debug: '디버그',
 
     // Theme
     lightMode: '라이트 모드',
@@ -2017,6 +2078,25 @@ export const translations = {
     sftModelNotFound: 'SFT 모델을 찾을 수 없습니다 — 모델 페이지에서 다운로드하세요',
     sftModelSwitched: '리페인트를 위해 SFT 모델로 전환했습니다',
     autoSwitchedToSft: 'SFT 모델로 자동 전환됨 (리페인트에 필요)',
+
+    // Lego mode
+    legoMode: 'Lego',
+    legoModeDescription: '기존 반주 트랙 위에 새로운 악기 트랙 생성',
+    legoTrackLabel: '악기 트랙',
+    legoTrackPlaceholder: '악기 선택…',
+    legoBaseModelRequired: 'Lego 모드에는 기본 모델(acestep-v15-base)이 필요합니다',
+    autoSwitchedToBase: '기본 모델로 자동 전환됨 (Lego에 필요)',
+
+    // Understand
+    understand: '분석',
+    understandTooltip: 'AI로 이 오디오를 분석하여 캡션, 가사, BPM 등을 추출',
+    understandApply: '양식에 적용',
+    understandApplyCaption: '캡션 적용',
+    understandApplyLyrics: '가사 적용',
+    understandResult: '분석 결과',
+    understandRunning: '오디오 분석 중…',
+    understandError: '분석 실패',
+    understandNotAvailable: 'ace-understand 바이너리를 찾을 수 없습니다',
     
     // Search Page
     searchSongsPlaceholder: '곡, 재생목록, 제작자 또는 스타일 검색',
diff --git a/server/package-lock.json b/server/package-lock.json
index e252cb4..a9087f3 100644
--- a/server/package-lock.json
+++ b/server/package-lock.json
@@ -13,6 +13,7 @@
         "cors": "^2.8.5",
         "dotenv": "^16.3.1",
         "express": "^4.18.2",
+        "express-rate-limit": "^8.3.1",
         "helmet": "^8.1.0",
         "jsonwebtoken": "^9.0.2",
         "multer": "^2.0.2",
@@ -1160,6 +1161,24 @@
         "url": "https://opencollective.com/express"
       }
     },
+    "node_modules/express-rate-limit": {
+      "version": "8.3.1",
+      "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.1.tgz",
+      "integrity": "sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==",
+      "license": "MIT",
+      "dependencies": {
+        "ip-address": "10.1.0"
+      },
+      "engines": {
+        "node": ">= 16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/express-rate-limit"
+      },
+      "peerDependencies": {
+        "express": ">= 4.11"
+      }
+    },
     "node_modules/file-uri-to-path": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
@@ -1397,6 +1416,15 @@
       "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
       "license": "ISC"
     },
+    "node_modules/ip-address": {
+      "version": "10.1.0",
+      "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz",
+      "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 12"
+      }
+    },
     "node_modules/ipaddr.js": {
       "version": "1.9.1",
       "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz",
diff --git a/server/package.json b/server/package.json
index 027abad..e41890a 100644
--- a/server/package.json
+++ b/server/package.json
@@ -15,6 +15,7 @@
     "cors": "^2.8.5",
     "dotenv": "^16.3.1",
     "express": "^4.18.2",
+    "express-rate-limit": "^8.3.1",
     "helmet": "^8.1.0",
     "jsonwebtoken": "^9.0.2",
     "multer": "^2.0.2",
diff --git a/server/src/config/index.ts b/server/src/config/index.ts
index 7abb41c..3155977 100644
--- a/server/src/config/index.ts
+++ b/server/src/config/index.ts
@@ -51,6 +51,16 @@ function resolveDitVaeBin(): string {
   return '';
 }
 
+/** Resolves the ace-understand binary path (reverse pipeline: audio → metadata). */
+function resolveUnderstandBin(): string {
+  if (process.env.ACE_UNDERSTAND_BIN) return resolveFromRoot(process.env.ACE_UNDERSTAND_BIN);
+  for (const name of ['ace-understand', 'ace-understand.exe']) {
+    const p = path.join(APP_ROOT, 'bin', name);
+    if (existsSync(p)) return p;
+  }
+  return '';
+}
+
 // ── Model resolution ─────────────────────────────────────────────────────────
 
 /** Resolves the models directory. */
@@ -91,6 +101,40 @@ function resolveDitModel(modelsDir: string): string {
   return '';
 }
 
+/**
+ * Resolves the base DiT model (acestep-v15-base-*.gguf).
+ * The base model is mandatory for lego mode — the turbo/sft variants will not work.
+ * Override via ACESTEP_BASE_MODEL in .env.
+ */
+function resolveBaseModel(modelsDir: string): string {
+  if (process.env.ACESTEP_BASE_MODEL) {
+    const p = resolveFromRoot(process.env.ACESTEP_BASE_MODEL);
+    if (existsSync(p)) return p;
+    console.warn(`[config] ACESTEP_BASE_MODEL path not found: ${p} — falling back to auto-detection`);
+  }
+  if (!existsSync(modelsDir)) return '';
+
+  const preference = [
+    'acestep-v15-base-Q8_0.gguf',
+    'acestep-v15-base-Q6_K.gguf',
+    'acestep-v15-base-Q5_K_M.gguf',
+    'acestep-v15-base-Q4_K_M.gguf',
+    'acestep-v15-base-BF16.gguf',
+  ];
+  for (const name of preference) {
+    const p = path.join(modelsDir, name);
+    if (existsSync(p)) return p;
+  }
+
+  try {
+    const files = readdirSync(modelsDir).filter(f => f.endsWith('.gguf') && !f.endsWith('.part'));
+    const base = files.find(f => f.startsWith('acestep-v15-base'));
+    if (base) return path.join(modelsDir, base);
+  } catch { /* ignore read errors */ }
+
+  return '';
+}
+
 /** Resolves the causal LM model (acestep-5Hz-lm-*.gguf). */
 function resolveLmModel(modelsDir: string): string {
   if (process.env.LM_MODEL) return resolveFromRoot(process.env.LM_MODEL);
@@ -167,7 +211,9 @@ function resolveVaeModel(modelsDir: string): string {
 const modelsDir          = resolveModelsDir();
 const resolvedLmBin      = resolveLmBin();
 const resolvedDitVaeBin  = resolveDitVaeBin();
+const resolvedUnderstandBin = resolveUnderstandBin();
 const resolvedDitModel   = resolveDitModel(modelsDir);
+const resolvedBaseModel  = resolveBaseModel(modelsDir);
 const resolvedLmModel    = resolveLmModel(modelsDir);
 const resolvedTextEncoderModel = resolveTextEncoderModel(modelsDir);
 const resolvedVaeModel   = resolveVaeModel(modelsDir);
@@ -177,12 +223,16 @@ if (resolvedLmBin)             console.log(`[config] ace-qwen3:      ${resolvedL
 else                           console.log('[config] ace-qwen3:      not found (set ACE_QWEN3_BIN)');
 if (resolvedDitVaeBin)         console.log(`[config] dit-vae:        ${resolvedDitVaeBin}`);
 else                           console.log('[config] dit-vae:        not found (set DIT_VAE_BIN)');
+if (resolvedUnderstandBin)     console.log(`[config] ace-understand: ${resolvedUnderstandBin}`);
+else                           console.log('[config] ace-understand: not found (set ACE_UNDERSTAND_BIN)');
 if (resolvedLmModel)           console.log(`[config] LM model:       ${resolvedLmModel}`);
 else                           console.log('[config] LM model:       none (run models.sh)');
 if (resolvedTextEncoderModel)  console.log(`[config] text encoder:   ${resolvedTextEncoderModel}`);
 else                           console.log('[config] text encoder:   none (run models.sh)');
 if (resolvedDitModel)          console.log(`[config] DiT model:      ${resolvedDitModel}`);
 else                           console.log('[config] DiT model:      none (run models.sh)');
+if (resolvedBaseModel)         console.log(`[config] base DiT model: ${resolvedBaseModel}`);
+else                           console.log('[config] base DiT model: none (download acestep-v15-base for lego mode)');
 if (resolvedVaeModel)          console.log(`[config] VAE model:      ${resolvedVaeModel}`);
 else                           console.log('[config] VAE model:      none (run models.sh)');
 
@@ -203,9 +253,12 @@ export const config = {
     // Two-binary spawn mode (acestep.cpp native pipeline)
     lmBin:             resolvedLmBin,
     ditVaeBin:         resolvedDitVaeBin,
+    understandBin:     resolvedUnderstandBin,
     lmModel:           resolvedLmModel,
     textEncoderModel:  resolvedTextEncoderModel,
     ditModel:          resolvedDitModel,
+    // Base DiT model — required for lego mode (turbo/sft will not work)
+    baseModel:         resolvedBaseModel,
     vaeModel:          resolvedVaeModel,
 
     // HTTP fallback mode
diff --git a/server/src/routes/generate.ts b/server/src/routes/generate.ts
index 5402cb1..47c07fd 100644
--- a/server/src/routes/generate.ts
+++ b/server/src/routes/generate.ts
@@ -2,6 +2,7 @@ import { Router, Response } from 'express';
 import multer from 'multer';
 import path from 'path';
 import { spawn } from 'child_process';
+import rateLimit from 'express-rate-limit';
 import { pool } from '../db/pool.js';
 import { generateUUID } from '../db/sqlite.js';
 import { config } from '../config/index.js';
@@ -14,16 +15,43 @@ import {
   checkSpaceHealth,
   cleanupJob,
   getJobRawResponse,
-  downloadAudioToBuffer,
+  getJobLogs,
+  listActiveJobs,
 } from '../services/acestep.js';
 import { getStorageProvider } from '../services/storage/factory.js';
 
+// Rate limiter for the debug log polling endpoints (read-only, lightweight)
+const logRateLimiter = rateLimit({
+  windowMs: 60_000,
+  max: 120, // 2 req/s sustained — enough for 1.5s poll intervals
+  standardHeaders: true,
+  legacyHeaders: false,
+  message: { error: 'Too many log requests — please slow down polling' },
+});
+
+// Rate limiter for the job status polling endpoint (performs FS operations on first completion)
+const statusRateLimiter = rateLimit({
+  windowMs: 60_000,
+  max: 120, // 2 req/s sustained — enough for 2s frontend poll intervals
+  standardHeaders: true,
+  legacyHeaders: false,
+  message: { error: 'Too many status requests — please slow down polling' },
+});
+
 const router = Router();
 
 // Auto-generate a song title from lyrics or style when none is provided
-function autoTitle(params: { title?: string; lyrics?: string; instrumental?: boolean; style?: string; songDescription?: string }): string {
+function autoTitle(params: { title?: string; lyrics?: string; instrumental?: boolean; style?: string; songDescription?: string; taskType?: string; trackName?: string; sourceAudioTitle?: string }): string {
   if (params.title?.trim()) return params.title.trim();
 
+  // For lego mode: combine source audio name + instrument to make a descriptive title
+  if (params.taskType === 'lego' && params.trackName) {
+    const base = params.sourceAudioTitle
+      ? params.sourceAudioTitle.replace(/\.[^.]+$/, '').replace(/[_-]+/g, ' ').trim()
+      : 'track';
+    return `${base} — ${params.trackName}`;
+  }
+
   // Try first meaningful lyric line (skip section markers like [verse], [chorus])
   if (!params.instrumental && params.lyrics) {
     for (const line of params.lyrics.split('\n')) {
@@ -78,8 +106,8 @@ const audioUpload = multer({
 });
 
 interface GenerateBody {
-  // Mode
-  customMode: boolean;
+  // Mode (kept for backward compatibility; unified mode always uses full-featured panel)
+  customMode?: boolean;
 
   // Simple Mode
   songDescription?: string;
@@ -106,7 +134,7 @@ interface GenerateBody {
   randomSeed?: boolean;
   seed?: number;
   thinking?: boolean;
-  audioFormat?: 'mp3' | 'flac';
+  audioFormat?: 'mp3' | 'wav';
   inferMethod?: 'ode' | 'sde';
   shift?: number;
 
@@ -265,17 +293,11 @@ router.post('/', authMiddleware, async (req: AuthenticatedRequest, res: Response
       ditModel,
     } = req.body as GenerateBody;
 
-    if (!customMode && !songDescription) {
-      res.status(400).json({ error: 'Song description required for simple mode' });
-      return;
-    }
-
-    // In custom mode, at least one content field is required — unless the request
-    // is for cover, audio2audio, or repaint mode and a source audio is provided
-    // (the source audio itself is the primary input; style/lyrics are optional).
-    const requiresSourceAudio = taskType === 'cover' || taskType === 'audio2audio' || taskType === 'repaint';
-    if (customMode && !style && !lyrics && !referenceAudioUrl && !(requiresSourceAudio && sourceAudioUrl)) {
-      res.status(400).json({ error: 'Style, lyrics, or reference audio required for custom mode' });
+    // At least one content field is required — unless the request is for cover/repaint/lego
+    // and a source audio is provided (the source audio itself is the primary input).
+    const requiresSourceAudio = taskType === 'cover' || taskType === 'audio2audio' || taskType === 'repaint' || taskType === 'lego';
+    if (!songDescription && !style && !lyrics && !referenceAudioUrl && !(requiresSourceAudio && sourceAudioUrl)) {
+      res.status(400).json({ error: 'Please provide a description, style, lyrics, or audio' });
       return;
     }
 
@@ -283,7 +305,6 @@ router.post('/', authMiddleware, async (req: AuthenticatedRequest, res: Response
     console.log(
       `[API] POST /generate:` +
       `\n  taskType    = ${taskType || 'text2music'}` +
-      `\n  customMode  = ${customMode}` +
       `\n  ditModel    = ${ditModel || '(default)'}` +
       `\n  sourceAudio = ${sourceAudioUrl || 'none'}` +
       `\n  repaint     = [${repaintingStart ?? 'start'}, ${repaintingEnd ?? 'end'}]` +
@@ -292,7 +313,7 @@ router.post('/', authMiddleware, async (req: AuthenticatedRequest, res: Response
     );
 
     const params = {
-      customMode,
+      customMode: true,
       songDescription,
       lyrics,
       style,
@@ -377,7 +398,7 @@ router.post('/', authMiddleware, async (req: AuthenticatedRequest, res: Response
   }
 });
 
-router.get('/status/:jobId', authMiddleware, async (req: AuthenticatedRequest, res: Response) => {
+router.get('/status/:jobId', statusRateLimiter, authMiddleware, async (req: AuthenticatedRequest, res: Response) => {
   try {
     const jobResult = await pool.query(
       `SELECT id, user_id, acestep_task_id, status, params, result, error, created_at
@@ -440,10 +461,18 @@ router.get('/status/:jobId', authMiddleware, async (req: AuthenticatedRequest, r
               const songId = generateUUID();
 
               try {
-                const { buffer } = await downloadAudioToBuffer(audioUrl);
-                const ext = audioUrl.includes('.flac') ? '.flac' : '.mp3';
+                let ext = '.mp3';
+                if (audioUrl.endsWith('.flac')) ext = '.flac';
+                else if (audioUrl.endsWith('.wav')) ext = '.wav';
                 const storageKey = `${req.user!.id}/${songId}${ext}`;
-                await storage.upload(storageKey, buffer, `audio/${ext.slice(1)}`);
+                // Move the intermediate job file directly to its library location to avoid storing
+                // a duplicate copy of the (potentially large) audio file on disk.
+                const { rename, mkdir } = await import('fs/promises');
+                const srcPath = path.join(config.storage.audioDir, audioUrl.slice('/audio/'.length));
+                const dstDir  = path.join(config.storage.audioDir, req.user!.id);
+                const dstPath = path.join(dstDir, `${songId}${ext}`);
+                await mkdir(dstDir, { recursive: true });
+                await rename(srcPath, dstPath);
                 const storedPath = storage.getPublicUrl(storageKey);
 
                 await pool.query(
@@ -710,6 +739,35 @@ router.get('/debug/:taskId', authMiddleware, async (req: AuthenticatedRequest, r
   }
 });
 
+// ── Debug log endpoints ───────────────────────────────────────────────────────
+
+/** List all in-memory jobs (for the debug panel job selector). */
+router.get('/logs', logRateLimiter, authMiddleware, async (_req: AuthenticatedRequest, res: Response) => {
+  try {
+    res.json({ jobs: listActiveJobs() });
+  } catch (error) {
+    res.status(500).json({ error: (error as Error).message });
+  }
+});
+
+/**
+ * Stream log lines for a specific job.
+ * Query param `after` (integer) returns only lines after that index for efficient polling.
+ */
+router.get('/logs/:jobId', logRateLimiter, authMiddleware, async (req: AuthenticatedRequest, res: Response) => {
+  try {
+    const after = parseInt(req.query.after as string || '0', 10);
+    const result = getJobLogs(req.params.jobId, isNaN(after) ? 0 : after);
+    if (!result) {
+      res.status(404).json({ error: 'Job not found' });
+      return;
+    }
+    res.json(result);
+  } catch (error) {
+    res.status(500).json({ error: (error as Error).message });
+  }
+});
+
 // Format endpoint - uses LLM to enhance style/lyrics
 // Spawn mode: runs `acestep-generate --mode format` with the prompt/lyrics as args
 // HTTP mode:  calls ACESTEP_API_URL/format_input
diff --git a/server/src/routes/referenceTrack.ts b/server/src/routes/referenceTrack.ts
index 4510b2f..542bf5c 100644
--- a/server/src/routes/referenceTrack.ts
+++ b/server/src/routes/referenceTrack.ts
@@ -4,16 +4,27 @@ import path from 'path';
 import os from 'os';
 import { promises as fs } from 'fs';
 import { fileURLToPath } from 'url';
+import rateLimit from 'express-rate-limit';
 import { pool } from '../db/pool.js';
 import { authMiddleware, AuthenticatedRequest } from '../middleware/auth.js';
 import { getStorageProvider } from '../services/storage/factory.js';
 import { spawn } from 'child_process';
+import { runUnderstand } from '../services/acestep.js';
 
 const router = Router();
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
 const AUDIO_DIR = path.join(__dirname, '../../public/audio');
 
+// Per-IP rate limiter for CPU-intensive understand operations (max 6 requests per minute)
+const understandRateLimiter = rateLimit({
+  windowMs: 60_000,
+  max: 6,
+  standardHeaders: true,
+  legacyHeaders: false,
+  message: { error: 'Too many requests — please wait before analysing another track' },
+});
+
 const upload = multer({
   storage: multer.memoryStorage(),
   limits: { fileSize: 50 * 1024 * 1024 }, // 50MB max
@@ -322,4 +333,48 @@ router.delete('/:id', authMiddleware, async (req: AuthenticatedRequest, res: Res
   }
 });
 
+// Understand a reference track with ace-understand
+router.post('/:id/understand', understandRateLimiter, authMiddleware, async (req: AuthenticatedRequest, res: Response) => {
+  try {
+    const result = await pool.query(
+      'SELECT user_id, storage_key FROM reference_tracks WHERE id = $1',
+      [req.params.id]
+    );
+    if (result.rows.length === 0) {
+      res.status(404).json({ error: 'Track not found' });
+      return;
+    }
+    if (result.rows[0].user_id !== req.user!.id) {
+      res.status(403).json({ error: 'Access denied' });
+      return;
+    }
+
+    const audioUrl = `/audio/${result.rows[0].storage_key}`;
+    const understood = await runUnderstand(audioUrl);
+    res.json(understood);
+  } catch (error) {
+    const msg = error instanceof Error ? error.message : 'Failed to understand audio';
+    console.error('Understand reference track error:', error);
+    res.status(500).json({ error: msg });
+  }
+});
+
+// Understand audio by URL (for source/generated audio without a reference track DB entry)
+router.post('/understand-url', understandRateLimiter, authMiddleware, async (req: AuthenticatedRequest, res: Response) => {
+  const { audioUrl } = req.body as { audioUrl?: string };
+  if (!audioUrl || typeof audioUrl !== 'string') {
+    res.status(400).json({ error: 'audioUrl is required' });
+    return;
+  }
+
+  try {
+    const understood = await runUnderstand(audioUrl);
+    res.json(understood);
+  } catch (error) {
+    const msg = error instanceof Error ? error.message : 'Failed to understand audio';
+    console.error('Understand URL error:', error);
+    res.status(500).json({ error: msg });
+  }
+});
+
 export default router;
diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts
index a6b975c..d62a2b9 100644
--- a/server/src/services/acestep.ts
+++ b/server/src/services/acestep.ts
@@ -9,9 +9,10 @@
  */
 
 import { spawn } from 'child_process';
-import { writeFile, mkdir, readFile } from 'fs/promises';
+import { writeFile, mkdir, readFile, mkdtemp, rm } from 'fs/promises';
 import { execFileSync } from 'child_process';
 import { existsSync, readdirSync } from 'fs';
+import { tmpdir } from 'os';
 import path from 'path';
 import { fileURLToPath } from 'url';
 import { config } from '../config/index.js';
@@ -41,7 +42,7 @@ function getAudioDuration(filePath: string): number {
 // ---------------------------------------------------------------------------
 
 export interface GenerationParams {
-  customMode: boolean;
+  customMode?: boolean; // kept for backward compatibility; ignored in unified mode
   songDescription?: string;
   lyrics: string;
   style: string;
@@ -59,7 +60,7 @@ export interface GenerationParams {
   seed?: number;
   thinking?: boolean;
   enhance?: boolean;
-  audioFormat?: 'mp3' | 'flac';
+  audioFormat?: 'wav' | 'mp3';
   inferMethod?: 'ode' | 'sde';
   shift?: number;
   lmTemperature?: number;
@@ -82,10 +83,20 @@ export interface GenerationParams {
   useAdg?: boolean;
   cfgIntervalStart?: number;
   cfgIntervalEnd?: number;
+  customTimesteps?: string;
   useCotMetas?: boolean;
   useCotCaption?: boolean;
   useCotLanguage?: boolean;
   autogen?: boolean;
+  constrainedDecodingDebug?: boolean;
+  allowLmBatch?: boolean;
+  getScores?: boolean;
+  getLrc?: boolean;
+  scoreScale?: number;
+  lmBatchChunkSize?: number;
+  trackName?: string;
+  completeTrackClasses?: string[];
+  isFormatCaption?: boolean;
   ditModel?: string;
 }
 
@@ -118,6 +129,8 @@ interface ActiveJob {
   queuePosition?: number;
   progress?: number;
   stage?: string;
+  /** All raw lines emitted by ace-qwen3 / dit-vae (stdout + stderr), in order. */
+  logs: string[];
 }
 
 const activeJobs = new Map<string, ActiveJob>();
@@ -308,8 +321,20 @@ function runBinary(
     let stdout = '';
     let stderr = '';
     let lineBuf = '';
+    let stdoutLineBuf = '';
 
-    proc.stdout.on('data', (chunk: Buffer) => { stdout += chunk.toString(); });
+    proc.stdout.on('data', (chunk: Buffer) => {
+      const text = chunk.toString();
+      stdout += text;
+      // Stream stdout lines to onLine as well so they appear in the debug log
+      stdoutLineBuf += text;
+      const lines = stdoutLineBuf.split('\n');
+      stdoutLineBuf = lines.pop() ?? '';
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (trimmed && onLine) onLine(`[stdout] ${trimmed}`);
+      }
+    });
     proc.stderr.on('data', (chunk: Buffer) => {
       const text = chunk.toString();
       stderr += text;
@@ -324,8 +349,10 @@ function runBinary(
 
     proc.on('close', (code) => {
       // Flush any partial last line that didn't end with a newline
+      if (stdoutLineBuf.trim() && onLine) onLine(`[stdout] ${stdoutLineBuf.trim()}`);
       if (lineBuf.trim() && onLine) onLine(lineBuf.trim());
       lineBuf = '';
+      stdoutLineBuf = '';
 
       if (code === 0) {
         resolve({ stdout, stderr });
@@ -382,6 +409,9 @@ function makeLmProgressHandler(job: ActiveJob): (line: string) => void {
   const PHASE1_STEP_CEIL = 400;
 
   return (line: string) => {
+    // Always capture the raw line for the debug log
+    job.logs.push(line);
+
     // Phase1 LM decode: "[Phase1] step 100, 1 active, 19.0 tok/s"
     const p1 = line.match(/^\[Phase1\] step (\d+),.*?([\d.]+) tok\/s/);
     if (p1) {
@@ -426,6 +456,9 @@ function makeDitVaeProgressHandler(job: ActiveJob): (line: string) => void {
   let ditTotalSteps = 8;
 
   return (line: string) => {
+    // Always capture the raw line for the debug log
+    job.logs.push(line);
+
     // DiT starting — capture step count: "[DiT] Starting: T=3470, S=1735, …, steps=8, …"
     const ditStart = line.match(/^\[DiT\] Starting:.*?steps=(\d+)/);
     if (ditStart) {
@@ -483,18 +516,18 @@ async function runViaSpawn(
   const taskType    = params.taskType || 'text2music';
   const isCover     = taskType === 'cover' || taskType === 'audio2audio';
   const isRepaint   = taskType === 'repaint';
+  const isLego      = taskType === 'lego';
   // Passthrough: taskType explicitly set, or audio codes provided without
   // a source audio file (legacy callers that omit the taskType field).
   const isPassthru  = taskType === 'passthrough' || Boolean(params.audioCodes && !params.sourceAudioUrl);
   // LLM (ace-qwen3) is only needed for plain text-to-music generation.
-  // Cover, repaint, and passthrough all skip it.
-  const skipLm      = isCover || isRepaint || isPassthru;
+  // Cover, repaint, lego, and passthrough all skip it.
+  const skipLm      = isCover || isRepaint || isLego || isPassthru;
 
   // ── Debug: log what the UI/API client requested ──────────────────────────
   console.log(
     `[Job ${jobId}] Request received:` +
     `\n  mode          = ${taskType}` +
-    `\n  customMode    = ${params.customMode}` +
     `\n  ditModel      = ${params.ditModel || '(default)'}` +
     `\n  sourceAudio   = ${params.sourceAudioUrl || 'none'}` +
     `\n  repaintRegion = [${params.repaintingStart ?? 'start'}, ${params.repaintingEnd ?? 'end'}]` +
@@ -513,7 +546,8 @@ async function runViaSpawn(
     // (cover / repaint / passthrough).  Only include the fields each binary
     // actually understands so the format stays clean and predictable.
     const caption = params.style || 'pop music';
-    const prompt  = params.customMode ? caption : (params.songDescription || caption);
+    // Use song description when provided (user's natural-language intent), falling back to style/caption
+    const prompt  = params.songDescription || caption;
     // Instrumental: pass the special "[Instrumental]" lyrics marker so the LLM
     // skips lyrics generation (as documented in the acestep.cpp README).
     const lyrics  = params.instrumental ? '[Instrumental]' : (params.lyrics || '');
@@ -535,7 +569,7 @@ async function runViaSpawn(
     if (params.timeSignature)                   requestJson.timesignature = params.timeSignature;
 
     if (skipLm) {
-      // ── Cover / repaint / passthrough: ace-qwen3 is skipped ─────────────
+      // ── Cover / repaint / lego / passthrough: ace-qwen3 is skipped ──────
       // Add only the mode-specific fields that dit-vae cares about.
       if (isPassthru) {
         if (!params.audioCodes) {
@@ -554,6 +588,25 @@ async function runViaSpawn(
         // Note: sourceAudioUrl is guaranteed here — validated in processGeneration.
         requestJson.repainting_start = params.repaintingStart ?? -1;
         requestJson.repainting_end   = params.repaintingEnd   ?? -1;
+      } else if (isLego) {
+        // Lego: generate a new instrument track layered over an existing backing track.
+        // Requires the base model (acestep-v15-base) and --src-audio.
+        // The "lego" field holds the track name (e.g. "guitar", "drums").
+        if (!params.trackName) {
+          throw new Error("task_type='lego' requires a track name (e.g. 'guitar')");
+        }
+        requestJson.lego = params.trackName;
+        // Which existing tracks are "complete" and should not be overwritten.
+        if (params.completeTrackClasses && params.completeTrackClasses.length > 0) {
+          requestJson.complete_track_classes = params.completeTrackClasses;
+        }
+        // Lego has strict parameter requirements per the spec — always enforce them
+        // regardless of what the frontend sent, so the binary never rejects the request.
+        requestJson.inference_steps = 50;
+        requestJson.guidance_scale  = 7.0;
+        // shift=1.0 is a hard requirement for lego (the spec example always uses 1.0;
+        // using the normal default of 3.0 causes dit-vae to reject the request).
+        requestJson.shift = 1.0;
       }
     } else {
       // ── Text-to-music: include LM parameters for ace-qwen3 ──────────────
@@ -563,12 +616,15 @@ async function runViaSpawn(
       requestJson.lm_top_p           = params.lmTopP           ?? 0.9;
       requestJson.lm_top_k           = params.lmTopK           ?? 0;
       requestJson.lm_negative_prompt = params.lmNegativePrompt || '';
+      requestJson.use_cot_caption    = params.useCotCaption    ?? true;
     }
 
     const requestPath = path.join(tmpDir, 'request.json');
     await writeFile(requestPath, JSON.stringify(requestJson, null, 2));
     console.log(`[Job ${jobId}] Request JSON written to ${requestPath}:`);
     console.log(JSON.stringify(requestJson, null, 2));
+    job.logs.push(`=== Job ${jobId} started — mode: ${taskType} ===`);
+    job.logs.push(`Request JSON: ${JSON.stringify(requestJson, null, 2)}`);
 
     // ── Step 1: ace-qwen3 — LLM (lyrics + audio codes) ────────────────────
     // Skipped when:
@@ -590,7 +646,9 @@ async function runViaSpawn(
       if (batchSize > 1) lmArgs.push('--batch', String(batchSize));
       lmArgs.push(...parseExtraArgs(process.env.ACE_QWEN3_EXTRA_ARGS));
 
-      console.log(`[Job ${jobId}] Running ace-qwen3:\n  ${lmBin} ${lmArgs.join(' ')}`);
+      const lmCmd = `${lmBin} ${lmArgs.join(' ')}`;
+      console.log(`[Job ${jobId}] Running ace-qwen3:\n  ${lmCmd}`);
+      job.logs.push(`\n--- Running ace-qwen3 ---\n$ ${lmCmd}`);
       await runBinary(lmBin, lmArgs, 'ace-qwen3', undefined, makeLmProgressHandler(job));
 
       // Collect enriched JSON files produced by ace-qwen3:
@@ -618,9 +676,25 @@ async function runViaSpawn(
 
     const ditVaeBin        = config.acestep.ditVaeBin!;
     const textEncoderModel = config.acestep.textEncoderModel;
-    const ditModel         = resolveParamDitModel(params.ditModel);
     const vaeModel         = config.acestep.vaeModel;
 
+    // Lego mode mandates the base DiT model — no other variant will work.
+    // Override whatever the frontend sent and fail early with a clear message
+    // if the base model has not been downloaded yet.
+    let ditModel: string;
+    if (isLego) {
+      const baseModel = config.acestep.baseModel;
+      if (!baseModel) {
+        throw new Error(
+          'Lego mode requires the base DiT model (acestep-v15-base) ' +
+          '— download it via the Model Manager first'
+        );
+      }
+      ditModel = baseModel;
+    } else {
+      ditModel = resolveParamDitModel(params.ditModel);
+    }
+
     if (!textEncoderModel) throw new Error('Text-encoder model not found — run models.sh first');
     if (!ditModel)         throw new Error('DiT model not found — run models.sh first');
     if (!vaeModel)         throw new Error('VAE model not found — run models.sh first');
@@ -655,20 +729,30 @@ async function runViaSpawn(
       ditArgs.push('--lora-scale', String(loraState.scale));
     }
 
+    // WAV format: pass --wav so the binary outputs WAV; MP3 (default): no flag,
+    // the binary outputs MP3 natively (upstream acestep-cpp has native MP3 support).
+    const wantWav = (params.audioFormat === 'wav');
+    if (wantWav) {
+      ditArgs.push('--wav');
+    }
+
     ditArgs.push(...parseExtraArgs(process.env.DIT_VAE_EXTRA_ARGS));
 
-    console.log(`[Job ${jobId}] Running dit-vae:\n  ${ditVaeBin} ${ditArgs.join(' ')}`);
+    const ditCmd = `${ditVaeBin} ${ditArgs.join(' ')}`;
+    console.log(`[Job ${jobId}] Running dit-vae:\n  ${ditCmd}`);
+    job.logs.push(`\n--- Running dit-vae ---\n$ ${ditCmd}`);
     await runBinary(ditVaeBin, ditArgs, 'dit-vae', undefined, makeDitVaeProgressHandler(job));
 
-    // ── Collect generated WAV files ─────────────────────────────────────────
-    // dit-vae places output WAVs alongside each enriched JSON:
-    //   request0.json → request00.wav, request01.wav, …
-    //   request1.json → request10.wav, request11.wav, …
+    // ── Collect generated audio files ──────────────────────────────────────
+    // dit-vae places output files alongside each enriched JSON:
+    //   With --wav:  request0.json → request00.wav, request01.wav, …
+    //   Without --wav: request0.json → request00.mp3, request01.mp3, …
     const { copyFile, rm } = await import('fs/promises');
+    const finalExt = wantWav ? 'wav' : 'mp3';
     let rawAudioPaths: string[] = [];
     try {
       rawAudioPaths = readdirSync(tmpDir)
-        .filter(f => /^request\d+\.wav$/.test(f))
+        .filter(f => new RegExp(`^request\\d+\\.${finalExt}$`).test(f))
         .sort()
         .map(f => path.join(tmpDir, f));
     } catch { /* ignore */ }
@@ -677,10 +761,10 @@ async function runViaSpawn(
       throw new Error('dit-vae produced no audio files');
     }
 
-    // Move WAVs to AUDIO_DIR with a stable, job-scoped name
+    // Copy files to AUDIO_DIR with a stable, job-scoped name
     const audioPaths: string[] = [];
     for (let i = 0; i < rawAudioPaths.length; i++) {
-      const dest = path.join(AUDIO_DIR, `${jobId}_${i}.wav`);
+      const dest = path.join(AUDIO_DIR, `${jobId}_${i}.${finalExt}`);
       await copyFile(rawAudioPaths[i], dest);
       audioPaths.push(dest);
     }
@@ -706,12 +790,18 @@ async function runViaSpawn(
       status: 'succeeded',
     };
     job.rawResponse = enrichedMeta;
+    job.logs.push(`\n=== Job ${jobId} completed successfully — ${audioUrls.length} file(s): ${audioUrls.join(', ')} ===`);
     console.log(`[Job ${jobId}] Completed successfully with ${audioUrls.length} audio file(s): ${audioUrls.join(', ')}`);
 
     // Clean up tmp directory
     await rm(tmpDir, { recursive: true, force: true }).catch(() => { /* best-effort */ });
 
   } catch (err) {
+    // Append error to the debug log before re-throwing
+    if (activeJobs.has(jobId)) {
+      const j = activeJobs.get(jobId)!;
+      j.logs.push(`\n=== Job ${jobId} FAILED: ${(err as Error).message} ===`);
+    }
     // Best-effort cleanup on failure
     try {
       const { rm } = await import('fs/promises');
@@ -727,7 +817,7 @@ async function runViaSpawn(
 
 function buildHttpRequest(params: GenerationParams): Record<string, unknown> {
   const caption = params.style || 'pop music';
-  const prompt = params.customMode ? caption : (params.songDescription || caption);
+  const prompt = params.songDescription || caption;
   const lyrics = params.instrumental ? '' : (params.lyrics || '');
   const isThinking = params.thinking ?? false;
   const isEnhance  = params.enhance  ?? false;
@@ -918,6 +1008,7 @@ export async function generateMusicViaAPI(params: GenerationParams): Promise<{ j
     startTime: Date.now(),
     status: 'queued',
     queuePosition: jobQueue.length + 1,
+    logs: [],
   };
 
   activeJobs.set(jobId, job);
@@ -945,7 +1036,6 @@ async function processGeneration(
   console.log(
     `[Job ${jobId}] Starting generation (${mode} mode):` +
     `\n  taskType    = ${params.taskType || 'text2music'}` +
-    `\n  customMode  = ${params.customMode}` +
     `\n  ditModel    = ${params.ditModel || '(default)'}` +
     `\n  sourceAudio = ${params.sourceAudioUrl || 'none'}` +
     `\n  audioCodes  = ${params.audioCodes ? '[provided]' : 'none'}`
@@ -966,6 +1056,13 @@ async function processGeneration(
     return;
   }
 
+  if (params.taskType === 'lego' && !params.sourceAudioUrl) {
+    job.status = 'failed';
+    job.error  = "task_type='lego' requires a source audio (--src-audio)";
+    console.error(`[Job ${jobId}] Validation failed: ${job.error}`);
+    return;
+  }
+
   try {
     job.stage = 'Generating music...';
     if (useSpawnMode(params)) {
@@ -1014,6 +1111,113 @@ export function getJobRawResponse(jobId: string): unknown | null {
   return activeJobs.get(jobId)?.rawResponse ?? null;
 }
 
+/**
+ * Returns the captured log lines for a job (all raw output from ace-qwen3 + dit-vae).
+ * Optionally accepts an `after` offset to return only new lines since the last poll.
+ */
+export function getJobLogs(jobId: string, after = 0): { lines: string[]; total: number; status: string } | null {
+  const job = activeJobs.get(jobId);
+  if (!job) return null;
+  return {
+    lines: job.logs.slice(after),
+    total: job.logs.length,
+    status: job.status,
+  };
+}
+
+/**
+ * Returns a summary of all in-memory jobs (most recent first), for the debug log list.
+ */
+export function listActiveJobs(): Array<{ jobId: string; status: string; startTime: number; stage?: string; logCount: number }> {
+  const result: Array<{ jobId: string; status: string; startTime: number; stage?: string; logCount: number }> = [];
+  for (const [jobId, job] of activeJobs) {
+    result.push({ jobId, status: job.status, startTime: job.startTime, stage: job.stage, logCount: job.logs.length });
+  }
+  return result.sort((a, b) => b.startTime - a.startTime);
+}
+
+// ---------------------------------------------------------------------------
+// Ace Understand — reverse pipeline: audio → metadata + lyrics
+// ---------------------------------------------------------------------------
+
+export interface UnderstandResult {
+  caption?: string;
+  lyrics?: string;
+  bpm?: number;
+  duration?: number;
+  keyscale?: string;
+  timesignature?: string;
+  vocal_language?: string;
+  seed?: number;
+  inference_steps?: number;
+  guidance_scale?: number;
+  shift?: number;
+  audio_cover_strength?: number;
+  repainting_start?: number;
+  repainting_end?: number;
+  lm_temperature?: number;
+  lm_cfg_scale?: number;
+  lm_top_p?: number;
+  lm_top_k?: number;
+  lm_negative_prompt?: string;
+  use_cot_caption?: boolean;
+  audio_codes?: string;
+  [key: string]: unknown;
+}
+
+/**
+ * Run ace-understand on a source audio file and return the parsed result JSON.
+ *
+ * The binary performs a reverse pipeline: VAE-encodes the audio, FSQ-tokenises
+ * the latent, then uses the LM to generate metadata (caption, lyrics, bpm, etc.)
+ * — the same fields that ace-qwen3 would fill for generation.
+ */
+export async function runUnderstand(audioUrl: string): Promise<UnderstandResult> {
+  const understandBin = config.acestep.understandBin;
+  if (!understandBin) {
+    throw new Error('ace-understand binary not found — rebuild acestep.cpp or set ACE_UNDERSTAND_BIN');
+  }
+
+  const lmModel         = config.acestep.lmModel;
+  const ditModel        = config.acestep.ditModel;
+  const vaeModel        = config.acestep.vaeModel;
+
+  if (!lmModel)   throw new Error('LM model not found — run models.sh first');
+  if (!ditModel)  throw new Error('DiT model not found — run models.sh first');
+  if (!vaeModel)  throw new Error('VAE model not found — run models.sh first');
+
+  const srcAudioPath = resolveAudioPath(audioUrl);
+  if (!existsSync(srcAudioPath)) {
+    throw new Error(`Audio file not found: ${srcAudioPath}`);
+  }
+
+  // Write output JSON to a temp file so we can parse it reliably.
+  const tmpDir = await mkdtemp(path.join(tmpdir(), 'ace-understand-'));
+  const outJsonPath = path.join(tmpDir, 'understand.json');
+
+  try {
+    const args: string[] = [
+      '--src-audio', srcAudioPath,
+      '--dit',       ditModel,
+      '--vae',       vaeModel,
+      '--model',     lmModel,
+      '-o',          outJsonPath,
+    ];
+
+    console.log(`[understand] Running ace-understand:\n  ${understandBin} ${args.join(' ')}`);
+
+    await runBinary(understandBin, args, 'ace-understand');
+
+    // Read and parse the output JSON
+    const raw = await readFile(outJsonPath, 'utf-8');
+    const result: UnderstandResult = JSON.parse(raw);
+    console.log('[understand] Result:', JSON.stringify(result, null, 2));
+    return result;
+  } finally {
+    await rm(tmpDir, { recursive: true, force: true }).catch(() => { /* best-effort */ });
+  }
+}
+
 export async function discoverEndpoints(): Promise<unknown> {
   const mode = useSpawnMode() ? 'spawn' : 'http';
   return {
diff --git a/services/api.ts b/services/api.ts
index 20a5436..9483dbc 100644
--- a/services/api.ts
+++ b/services/api.ts
@@ -266,7 +266,7 @@ export interface GenerationParams {
   randomSeed?: boolean;
   seed?: number;
   thinking?: boolean;
-  audioFormat?: 'mp3' | 'flac';
+  audioFormat?: 'wav' | 'mp3';
   inferMethod?: 'ode' | 'sde';
   shift?: number;
 
@@ -418,6 +418,12 @@ export const generateApi = {
     scale: number;
     path: string;
   }> => api('/api/lora/status', { token }),
+
+  understandReferenceTrack: (trackId: string, token: string): Promise<Record<string, unknown>> =>
+    api(`/api/reference-tracks/${trackId}/understand`, { method: 'POST', token }),
+
+  understandAudioUrl: (audioUrl: string, token: string): Promise<Record<string, unknown>> =>
+    api('/api/reference-tracks/understand-url', { method: 'POST', body: { audioUrl }, token }),
 };
 
 // Users API
diff --git a/types.ts b/types.ts
index 8f3927a..cca52c0 100644
--- a/types.ts
+++ b/types.ts
@@ -79,7 +79,7 @@ export interface GenerationParams {
   seed: number;
   thinking: boolean;
   enhance?: boolean;
-  audioFormat: 'mp3' | 'flac';
+  audioFormat: 'wav' | 'mp3';
   inferMethod: 'ode' | 'sde';
   shift: number;
 
@@ -152,4 +152,4 @@ export interface UserProfile {
 }
 
 // Simplified views for ACE-Step UI
-export type View = 'create' | 'library' | 'models' | 'profile' | 'song' | 'playlist' | 'search' | 'news';
+export type View = 'create' | 'library' | 'models' | 'profile' | 'song' | 'playlist' | 'search' | 'news' | 'debug';