From 75311b5fa1827b8e6d984f3ee1b4cbe91dc26ddc Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:08:17 +0000 Subject: [PATCH 01/22] Initial plan From 05391fd3475460d79526afee2c990db628bb123c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 15:30:17 +0000 Subject: [PATCH 02/22] Changes before error encountered Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- components/CreatePanel.tsx | 259 ++++++++++++++++++++++++++-- i18n/translations.ts | 76 ++++++++ server/src/config/index.ts | 14 ++ server/src/routes/referenceTrack.ts | 45 +++++ server/src/services/acestep.ts | 118 ++++++++++++- services/api.ts | 6 + 6 files changed, 504 insertions(+), 14 deletions(-) diff --git a/components/CreatePanel.tsx b/components/CreatePanel.tsx index b9a2a8f..90ac5e1 100644 --- a/components/CreatePanel.tsx +++ b/components/CreatePanel.tsx @@ -242,6 +242,9 @@ export const CreatePanel: React.FC = ({ // The SFT model GGUF file to download when not present (Q8_0 is the default quality tier) const SFT_MODEL_FILE = 'acestep-v15-sft-Q8_0.gguf'; + // The base DiT model name โ€” required for lego mode + const BASE_MODEL_NAME = 'acestep-v15-base'; + // Fallback model list when backend is unavailable const availableModels = useMemo(() => { if (fetchedModels.length > 0) { @@ -280,11 +283,22 @@ export const CreatePanel: React.FC = ({ return modelId.includes('sft'); }; + // Check if model is the base variant (required for lego) + const isBaseModel = (modelId: string): boolean => { + return modelId === BASE_MODEL_NAME || modelId.startsWith('acestep-v15-base'); + }; + // SFT model download/availability state for repaint mode type SftStatus = 'idle' | 'checking' | 'available' | 'downloading' | 'unavailable'; const [sftStatus, setSftStatus] = useState('idle'); const sftSseRef = useRef(null); + // Understand state โ€” per audio target + type UnderstandStatus = 'idle' | 'running' | 'done' | 'error'; + const [understandStatus, setUnderstandStatus] = useState>({ reference: 'idle', source: 'idle' }); + const [understandResult, setUnderstandResult] = useState | null>>({ reference: null, source: null }); + const [understandError, setUnderstandError] = useState>({ reference: null, source: null }); + const [isUploadingReference, setIsUploadingReference] = useState(false); const [isUploadingSource, setIsUploadingSource] = useState(false); const [isTranscribingReference, setIsTranscribingReference] = useState(false); @@ -668,6 +682,20 @@ export const CreatePanel: React.FC = ({ localStorage.setItem('ace-model', prevModelBeforeRepaintRef.current); prevModelBeforeRepaintRef.current = null; } + } else if (taskType === 'lego') { + // Entering lego mode: switch to base model if not already on one + if (!isBaseModel(selectedModel)) { + prevModelBeforeRepaintRef.current = selectedModel; + setSelectedModel(BASE_MODEL_NAME); + localStorage.setItem('ace-model', BASE_MODEL_NAME); + } + } else if (prevTaskType === 'lego') { + // Leaving lego mode: restore previous model if it was switched + if (prevModelBeforeRepaintRef.current && isBaseModel(selectedModel)) { + setSelectedModel(prevModelBeforeRepaintRef.current); + localStorage.setItem('ace-model', prevModelBeforeRepaintRef.current); + prevModelBeforeRepaintRef.current = null; + } } }, [taskType, checkAndEnsureSftModel]); @@ -940,6 +968,34 @@ export const CreatePanel: React.FC = ({ setIsTranscribingReference(false); }; + /** Run ace-understand on the audio at the given URL and store the result. */ + const handleUnderstand = async (target: 'reference' | 'source', audioUrl: string) => { + if (!token || !audioUrl) return; + setUnderstandStatus(prev => ({ ...prev, [target]: 'running' })); + setUnderstandResult(prev => ({ ...prev, [target]: null })); + setUnderstandError(prev => ({ ...prev, [target]: null })); + try { + const result = await generateApi.understandAudioUrl(audioUrl, token); + setUnderstandResult(prev => ({ ...prev, [target]: result })); + setUnderstandStatus(prev => ({ ...prev, [target]: 'done' })); + } catch (err) { + const msg = err instanceof Error ? err.message : 'Analysis failed'; + setUnderstandError(prev => ({ ...prev, [target]: msg })); + setUnderstandStatus(prev => ({ ...prev, [target]: 'error' })); + } + }; + + /** Apply understand result fields to the generation form. */ + const applyUnderstandResult = (result: Record) => { + if (typeof result.caption === 'string' && result.caption) setStyle(result.caption); + if (typeof result.lyrics === 'string' && result.lyrics) setLyrics(result.lyrics); + if (typeof result.bpm === 'number' && result.bpm > 0) setBpm(result.bpm); + if (typeof result.duration === 'number' && result.duration > 0) setDuration(Math.round(result.duration)); + if (typeof result.keyscale === 'string' && result.keyscale) setKeyScale(result.keyscale); + if (typeof result.timesignature === 'string' && result.timesignature) setTimeSignature(result.timesignature); + if (typeof result.vocal_language === 'string' && result.vocal_language) setVocalLanguage(result.vocal_language); + }; + const deleteReferenceTrack = async (trackId: string) => { if (!token) return; try { @@ -1018,14 +1074,14 @@ export const CreatePanel: React.FC = ({ return `${minutes}:${String(seconds).padStart(2, '0')}`; }; - /** Clear the source audio and reset task type if it was cover/repaint. */ + /** Clear the source audio and reset task type if it was cover/repaint/lego. */ const handleClearSourceAudio = () => { setSourceAudioUrl(''); setSourceAudioTitle(''); setSourcePlaying(false); setSourceTime(0); setSourceDuration(0); - if (taskType === 'cover' || taskType === 'repaint') setTaskType('text2music'); + if (taskType === 'cover' || taskType === 'repaint' || taskType === 'lego') setTaskType('text2music'); }; /** @@ -1413,7 +1469,7 @@ export const CreatePanel: React.FC = ({
- {t('cover')} / {t('repaintMode')} + {t('cover')} / {t('repaintMode')} / {t('legoMode')} optional @@ -1423,6 +1479,7 @@ export const CreatePanel: React.FC = ({
{/* Source audio mini-player */} {sourceAudioUrl && ( + <>
+ {/* Understand button */} +
+ {/* Understand result panel */} + {understandStatus.source !== 'idle' && ( +
+ {understandStatus.source === 'running' && {t('understandRunning')}} + {understandStatus.source === 'error' && {t('understandError')}: {understandError.source}} + {understandStatus.source === 'done' && understandResult.source && ( + <> +
{t('understandResult')}
+ {understandResult.source.caption &&
๐ŸŽต {String(understandResult.source.caption).slice(0, 80)}{String(understandResult.source.caption).length > 80 ? 'โ€ฆ' : ''}
} +
+ {understandResult.source.bpm && BPM: {String(understandResult.source.bpm)}} + {understandResult.source.keyscale && Key: {String(understandResult.source.keyscale)}} + {understandResult.source.duration && Duration: {Math.round(Number(understandResult.source.duration))}s} +
+ + + )} +
+ )} + )} {/* Cover / Repaint mode controls โ€” shown when source audio is loaded */} @@ -1485,7 +1582,7 @@ export const CreatePanel: React.FC = ({ type="button" onClick={() => setTaskType('cover')} className={`flex-1 py-1.5 rounded-md text-[11px] font-medium transition-all ${ - taskType !== 'repaint' + taskType !== 'repaint' && taskType !== 'lego' ? 'bg-white dark:bg-zinc-700 text-zinc-900 dark:text-white shadow-sm' : 'text-zinc-500 dark:text-zinc-400 hover:text-zinc-700 dark:hover:text-zinc-200' }`} @@ -1503,15 +1600,26 @@ export const CreatePanel: React.FC = ({ > {t('repaintMode')} +
{/* Mode description */}

- {taskType === 'repaint' ? t('repaintModeDescription') : t('coverModeDescription')} + {taskType === 'repaint' ? t('repaintModeDescription') : taskType === 'lego' ? t('legoModeDescription') : t('coverModeDescription')}

{/* Cover strength slider (cover mode only) */} - {taskType !== 'repaint' && ( + {taskType !== 'repaint' && taskType !== 'lego' && (
= ({
)} + {/* Lego track selector (lego mode only) */} + {taskType === 'lego' && ( +
+ + +

{t('legoBaseModelRequired')}

+
+ )} + {/* SFT model status banner (repaint only) */} {taskType === 'repaint' && sftStatus !== 'idle' && (
= ({
{/* Reference Audio Player */} {audioTab === 'reference' && referenceAudioUrl && ( + <>
+ {/* Understand button */} +
+ {/* Understand result panel (reference) */} + {understandStatus.reference !== 'idle' && ( +
+ {understandStatus.reference === 'running' && {t('understandRunning')}} + {understandStatus.reference === 'error' && {t('understandError')}: {understandError.reference}} + {understandStatus.reference === 'done' && understandResult.reference && ( + <> +
{t('understandResult')}
+ {understandResult.reference.caption &&
๐ŸŽต {String(understandResult.reference.caption).slice(0, 80)}{String(understandResult.reference.caption).length > 80 ? 'โ€ฆ' : ''}
} +
+ {understandResult.reference.bpm && BPM: {String(understandResult.reference.bpm)}} + {understandResult.reference.keyscale && Key: {String(understandResult.reference.keyscale)}} + {understandResult.reference.duration && Duration: {Math.round(Number(understandResult.reference.duration))}s} +
+ + + )} +
+ )} + )} {/* Source/Cover Audio Player */} {audioTab === 'source' && sourceAudioUrl && ( + <>
+ {/* Understand button */} + + {/* Understand result panel (source) */} + {understandStatus.source !== 'idle' && ( +
+ {understandStatus.source === 'running' && {t('understandRunning')}} + {understandStatus.source === 'error' && {t('understandError')}: {understandError.source}} + {understandStatus.source === 'done' && understandResult.source && ( + <> +
{t('understandResult')}
+ {understandResult.source.caption &&
๐ŸŽต {String(understandResult.source.caption).slice(0, 80)}{String(understandResult.source.caption).length > 80 ? 'โ€ฆ' : ''}
} +
+ {understandResult.source.bpm && BPM: {String(understandResult.source.bpm)}} + {understandResult.source.keyscale && Key: {String(understandResult.source.keyscale)}} + {understandResult.source.duration && Duration: {Math.round(Number(understandResult.source.duration))}s} +
+ + + )} +
+ )} + )} - {/* Cover / Repaint mode toggle (shown when source audio is loaded) */} + {/* Cover / Repaint / Lego mode toggle (shown when source audio is loaded) */} {audioTab === 'source' && sourceAudioUrl && (
- {/* Mode toggle: Cover vs Repaint */} + {/* Mode toggle: Cover vs Repaint vs Lego */}
+
{/* Mode description */}

- {taskType === 'repaint' ? t('repaintModeDescription') : t('coverModeDescription')} + {taskType === 'repaint' ? t('repaintModeDescription') : taskType === 'lego' ? t('legoModeDescription') : t('coverModeDescription')}

{/* Cover strength slider (only in cover mode) */} - {taskType !== 'repaint' && ( + {taskType !== 'repaint' && taskType !== 'lego' && (
= ({
)} + {/* Lego track selector (lego mode only) */} + {taskType === 'lego' && ( +
+ + +

{t('legoBaseModelRequired')}

+
+ )} + {/* SFT model status banner (shown when repaint mode active) */} {taskType === 'repaint' && sftStatus !== 'idle' && (
{ + try { + const result = await pool.query( + 'SELECT user_id, storage_key FROM reference_tracks WHERE id = $1', + [req.params.id] + ); + if (result.rows.length === 0) { + res.status(404).json({ error: 'Track not found' }); + return; + } + if (result.rows[0].user_id !== req.user!.id) { + res.status(403).json({ error: 'Access denied' }); + return; + } + + const audioUrl = `/audio/${result.rows[0].storage_key}`; + const understood = await runUnderstand(audioUrl); + res.json(understood); + } catch (error) { + const msg = error instanceof Error ? error.message : 'Failed to understand audio'; + console.error('Understand reference track error:', error); + res.status(500).json({ error: msg }); + } +}); + +// Understand audio by URL (for source/generated audio without a reference track DB entry) +router.post('/understand-url', authMiddleware, async (req: AuthenticatedRequest, res: Response) => { + const { audioUrl } = req.body as { audioUrl?: string }; + if (!audioUrl || typeof audioUrl !== 'string') { + res.status(400).json({ error: 'audioUrl is required' }); + return; + } + + try { + const understood = await runUnderstand(audioUrl); + res.json(understood); + } catch (error) { + const msg = error instanceof Error ? error.message : 'Failed to understand audio'; + console.error('Understand URL error:', error); + res.status(500).json({ error: msg }); + } +}); + export default router; diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts index a6b975c..8b8ae5a 100644 --- a/server/src/services/acestep.ts +++ b/server/src/services/acestep.ts @@ -483,12 +483,13 @@ async function runViaSpawn( const taskType = params.taskType || 'text2music'; const isCover = taskType === 'cover' || taskType === 'audio2audio'; const isRepaint = taskType === 'repaint'; + const isLego = taskType === 'lego'; // Passthrough: taskType explicitly set, or audio codes provided without // a source audio file (legacy callers that omit the taskType field). const isPassthru = taskType === 'passthrough' || Boolean(params.audioCodes && !params.sourceAudioUrl); // LLM (ace-qwen3) is only needed for plain text-to-music generation. - // Cover, repaint, and passthrough all skip it. - const skipLm = isCover || isRepaint || isPassthru; + // Cover, repaint, lego, and passthrough all skip it. + const skipLm = isCover || isRepaint || isLego || isPassthru; // โ”€โ”€ Debug: log what the UI/API client requested โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ console.log( @@ -535,7 +536,7 @@ async function runViaSpawn( if (params.timeSignature) requestJson.timesignature = params.timeSignature; if (skipLm) { - // โ”€โ”€ Cover / repaint / passthrough: ace-qwen3 is skipped โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + // โ”€โ”€ Cover / repaint / lego / passthrough: ace-qwen3 is skipped โ”€โ”€โ”€โ”€โ”€โ”€ // Add only the mode-specific fields that dit-vae cares about. if (isPassthru) { if (!params.audioCodes) { @@ -554,6 +555,26 @@ async function runViaSpawn( // Note: sourceAudioUrl is guaranteed here โ€” validated in processGeneration. requestJson.repainting_start = params.repaintingStart ?? -1; requestJson.repainting_end = params.repaintingEnd ?? -1; + } else if (isLego) { + // Lego: generate a new instrument track layered over an existing backing track. + // Requires the base model (acestep-v15-base) and --src-audio. + // The "lego" field holds the track name (e.g. "guitar", "drums"). + if (!params.trackName) { + throw new Error("task_type='lego' requires a track name (e.g. 'guitar')"); + } + requestJson.lego = params.trackName; + // Lego forces all DiT steps to use source context (audio_cover_strength=1.0 + // per the README โ€” dit-vae applies this internally when lego is set). + // Use recommended base-model settings if the caller hasn't specified them. + if (!params.inferenceSteps || params.inferenceSteps <= 8) { + requestJson.inference_steps = 50; + } + if (!params.guidanceScale || params.guidanceScale <= 0) { + requestJson.guidance_scale = 7.0; + } + if (!params.shift || params.shift >= 3.0) { + requestJson.shift = 1.0; + } } } else { // โ”€โ”€ Text-to-music: include LM parameters for ace-qwen3 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ @@ -966,6 +987,13 @@ async function processGeneration( return; } + if (params.taskType === 'lego' && !params.sourceAudioUrl) { + job.status = 'failed'; + job.error = "task_type='lego' requires a source audio (--src-audio)"; + console.error(`[Job ${jobId}] Validation failed: ${job.error}`); + return; + } + try { job.stage = 'Generating music...'; if (useSpawnMode(params)) { @@ -1014,6 +1042,90 @@ export function getJobRawResponse(jobId: string): unknown | null { return activeJobs.get(jobId)?.rawResponse ?? null; } +// --------------------------------------------------------------------------- +// Ace Understand โ€” reverse pipeline: audio โ†’ metadata + lyrics +// --------------------------------------------------------------------------- + +export interface UnderstandResult { + caption?: string; + lyrics?: string; + bpm?: number; + duration?: number; + keyscale?: string; + timesignature?: string; + vocal_language?: string; + seed?: number; + inference_steps?: number; + guidance_scale?: number; + shift?: number; + audio_cover_strength?: number; + repainting_start?: number; + repainting_end?: number; + lm_temperature?: number; + lm_cfg_scale?: number; + lm_top_p?: number; + lm_top_k?: number; + lm_negative_prompt?: string; + use_cot_caption?: boolean; + audio_codes?: string; + [key: string]: unknown; +} + +/** + * Run ace-understand on a source audio file and return the parsed result JSON. + * + * The binary performs a reverse pipeline: VAE-encodes the audio, FSQ-tokenises + * the latent, then uses the LM to generate metadata (caption, lyrics, bpm, etc.) + * โ€” the same fields that ace-qwen3 would fill for generation. + */ +export async function runUnderstand(audioUrl: string): Promise { + const understandBin = config.acestep.understandBin; + if (!understandBin) { + throw new Error('ace-understand binary not found โ€” rebuild acestep.cpp or set ACE_UNDERSTAND_BIN'); + } + + const lmModel = config.acestep.lmModel; + const ditModel = config.acestep.ditModel; + const vaeModel = config.acestep.vaeModel; + + if (!lmModel) throw new Error('LM model not found โ€” run models.sh first'); + if (!ditModel) throw new Error('DiT model not found โ€” run models.sh first'); + if (!vaeModel) throw new Error('VAE model not found โ€” run models.sh first'); + + const srcAudioPath = resolveAudioPath(audioUrl); + if (!existsSync(srcAudioPath)) { + throw new Error(`Audio file not found: ${srcAudioPath}`); + } + + // Write output JSON to a temp file so we can parse it reliably. + const { mkdtemp, rm, readFile: fsReadFile } = await import('fs/promises'); + const { tmpdir } = await import('os'); + const tmpDir = await mkdtemp(path.join(tmpdir(), 'ace-understand-')); + const outJsonPath = path.join(tmpDir, 'understand.json'); + + try { + const args: string[] = [ + '--src-audio', srcAudioPath, + '--dit', ditModel, + '--vae', vaeModel, + '--model', lmModel, + '-o', outJsonPath, + ]; + + console.log(`[understand] Running ace-understand:\n ${understandBin} ${args.join(' ')}`); + + await runBinary(understandBin, args, 'ace-understand'); + + // Read and parse the output JSON + const raw = await fsReadFile(outJsonPath, 'utf-8'); + const result: UnderstandResult = JSON.parse(raw); + console.log('[understand] Result:', JSON.stringify(result, null, 2)); + return result; + } finally { + await rm(tmpDir, { recursive: true, force: true }).catch(() => { /* best-effort */ }); + } +} + export async function discoverEndpoints(): Promise { const mode = useSpawnMode() ? 'spawn' : 'http'; return { diff --git a/services/api.ts b/services/api.ts index 20a5436..8a56c0d 100644 --- a/services/api.ts +++ b/services/api.ts @@ -418,6 +418,12 @@ export const generateApi = { scale: number; path: string; }> => api('/api/lora/status', { token }), + + understandReferenceTrack: (trackId: string, token: string): Promise> => + api(`/api/reference-tracks/${trackId}/understand`, { method: 'POST', token }), + + understandAudioUrl: (audioUrl: string, token: string): Promise> => + api('/api/reference-tracks/understand-url', { method: 'POST', body: { audioUrl }, token }), }; // Users API From d515840a7080b31e3971e9628c17f3a0eb830591 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:15:57 +0000 Subject: [PATCH 03/22] Complete LEGO mode, Ace Understand, and security fixes Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- components/CreatePanel.tsx | 2 +- server/package-lock.json | 28 ++++++++++++++++++++++++++++ server/package.json | 1 + server/src/routes/generate.ts | 4 ++-- server/src/routes/referenceTrack.ts | 16 +++++++++++++++- server/src/services/acestep.ts | 25 ++++++++++++++++--------- 6 files changed, 63 insertions(+), 13 deletions(-) diff --git a/components/CreatePanel.tsx b/components/CreatePanel.tsx index 90ac5e1..669cb57 100644 --- a/components/CreatePanel.tsx +++ b/components/CreatePanel.tsx @@ -285,7 +285,7 @@ export const CreatePanel: React.FC = ({ // Check if model is the base variant (required for lego) const isBaseModel = (modelId: string): boolean => { - return modelId === BASE_MODEL_NAME || modelId.startsWith('acestep-v15-base'); + return modelId.startsWith('acestep-v15-base'); }; // SFT model download/availability state for repaint mode diff --git a/server/package-lock.json b/server/package-lock.json index e252cb4..a9087f3 100644 --- a/server/package-lock.json +++ b/server/package-lock.json @@ -13,6 +13,7 @@ "cors": "^2.8.5", "dotenv": "^16.3.1", "express": "^4.18.2", + "express-rate-limit": "^8.3.1", "helmet": "^8.1.0", "jsonwebtoken": "^9.0.2", "multer": "^2.0.2", @@ -1160,6 +1161,24 @@ "url": "https://opencollective.com/express" } }, + "node_modules/express-rate-limit": { + "version": "8.3.1", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.3.1.tgz", + "integrity": "sha512-D1dKN+cmyPWuvB+G2SREQDzPY1agpBIcTa9sJxOPMCNeH3gwzhqJRDWCXW3gg0y//+LQ/8j52JbMROWyrKdMdw==", + "license": "MIT", + "dependencies": { + "ip-address": "10.1.0" + }, + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, "node_modules/file-uri-to-path": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", @@ -1397,6 +1416,15 @@ "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", "license": "ISC" }, + "node_modules/ip-address": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", + "integrity": "sha512-XXADHxXmvT9+CRxhXg56LJovE+bmWnEWB78LB83VZTprKTmaC5QfruXocxzTZ2Kl0DNwKuBdlIhjL8LeY8Sf8Q==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, "node_modules/ipaddr.js": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", diff --git a/server/package.json b/server/package.json index 027abad..e41890a 100644 --- a/server/package.json +++ b/server/package.json @@ -15,6 +15,7 @@ "cors": "^2.8.5", "dotenv": "^16.3.1", "express": "^4.18.2", + "express-rate-limit": "^8.3.1", "helmet": "^8.1.0", "jsonwebtoken": "^9.0.2", "multer": "^2.0.2", diff --git a/server/src/routes/generate.ts b/server/src/routes/generate.ts index 5402cb1..84ece95 100644 --- a/server/src/routes/generate.ts +++ b/server/src/routes/generate.ts @@ -271,9 +271,9 @@ router.post('/', authMiddleware, async (req: AuthenticatedRequest, res: Response } // In custom mode, at least one content field is required โ€” unless the request - // is for cover, audio2audio, or repaint mode and a source audio is provided + // is for cover, audio2audio, repaint, or lego mode and a source audio is provided // (the source audio itself is the primary input; style/lyrics are optional). - const requiresSourceAudio = taskType === 'cover' || taskType === 'audio2audio' || taskType === 'repaint'; + const requiresSourceAudio = taskType === 'cover' || taskType === 'audio2audio' || taskType === 'repaint' || taskType === 'lego'; if (customMode && !style && !lyrics && !referenceAudioUrl && !(requiresSourceAudio && sourceAudioUrl)) { res.status(400).json({ error: 'Style, lyrics, or reference audio required for custom mode' }); return; diff --git a/server/src/routes/referenceTrack.ts b/server/src/routes/referenceTrack.ts index df32845..dfd6996 100644 --- a/server/src/routes/referenceTrack.ts +++ b/server/src/routes/referenceTrack.ts @@ -4,6 +4,7 @@ import path from 'path'; import os from 'os'; import { promises as fs } from 'fs'; import { fileURLToPath } from 'url'; +import rateLimit from 'express-rate-limit'; import { pool } from '../db/pool.js'; import { authMiddleware, AuthenticatedRequest } from '../middleware/auth.js'; import { getStorageProvider } from '../services/storage/factory.js'; @@ -15,6 +16,19 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const AUDIO_DIR = path.join(__dirname, '../../public/audio'); +// Per-IP rate limiter for CPU-intensive understand operations (max 6 requests per minute) +const understandRateLimiter = rateLimit({ + windowMs: 60_000, + max: 6, + standardHeaders: true, + legacyHeaders: false, + message: { error: 'Too many requests โ€” please wait before analysing another track' }, +}); + +// Apply rate limiting to all understand routes +router.use('/understand-url', understandRateLimiter); +router.use('/:id/understand', understandRateLimiter); + const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 50 * 1024 * 1024 }, // 50MB max @@ -350,7 +364,7 @@ router.post('/:id/understand', authMiddleware, async (req: AuthenticatedRequest, }); // Understand audio by URL (for source/generated audio without a reference track DB entry) -router.post('/understand-url', authMiddleware, async (req: AuthenticatedRequest, res: Response) => { +router.post('/understand-url', understandRateLimiter, authMiddleware, async (req: AuthenticatedRequest, res: Response) => { const { audioUrl } = req.body as { audioUrl?: string }; if (!audioUrl || typeof audioUrl !== 'string') { res.status(400).json({ error: 'audioUrl is required' }); diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts index 8b8ae5a..5a1c360 100644 --- a/server/src/services/acestep.ts +++ b/server/src/services/acestep.ts @@ -9,9 +9,10 @@ */ import { spawn } from 'child_process'; -import { writeFile, mkdir, readFile } from 'fs/promises'; +import { writeFile, mkdir, readFile, mkdtemp, rm } from 'fs/promises'; import { execFileSync } from 'child_process'; import { existsSync, readdirSync } from 'fs'; +import { tmpdir } from 'os'; import path from 'path'; import { fileURLToPath } from 'url'; import { config } from '../config/index.js'; @@ -82,10 +83,20 @@ export interface GenerationParams { useAdg?: boolean; cfgIntervalStart?: number; cfgIntervalEnd?: number; + customTimesteps?: string; useCotMetas?: boolean; useCotCaption?: boolean; useCotLanguage?: boolean; autogen?: boolean; + constrainedDecodingDebug?: boolean; + allowLmBatch?: boolean; + getScores?: boolean; + getLrc?: boolean; + scoreScale?: number; + lmBatchChunkSize?: number; + trackName?: string; + completeTrackClasses?: string[]; + isFormatCaption?: boolean; ditModel?: string; } @@ -563,16 +574,14 @@ async function runViaSpawn( throw new Error("task_type='lego' requires a track name (e.g. 'guitar')"); } requestJson.lego = params.trackName; - // Lego forces all DiT steps to use source context (audio_cover_strength=1.0 - // per the README โ€” dit-vae applies this internally when lego is set). // Use recommended base-model settings if the caller hasn't specified them. - if (!params.inferenceSteps || params.inferenceSteps <= 8) { + if (params.inferenceSteps === undefined || params.inferenceSteps === null) { requestJson.inference_steps = 50; } - if (!params.guidanceScale || params.guidanceScale <= 0) { + if (params.guidanceScale === undefined || params.guidanceScale === null) { requestJson.guidance_scale = 7.0; } - if (!params.shift || params.shift >= 3.0) { + if (params.shift === undefined || params.shift === null) { requestJson.shift = 1.0; } } @@ -1098,8 +1107,6 @@ export async function runUnderstand(audioUrl: string): Promise } // Write output JSON to a temp file so we can parse it reliably. - const { mkdtemp, rm, readFile: fsReadFile } = await import('fs/promises'); - const { tmpdir } = await import('os'); const tmpDir = await mkdtemp(path.join(tmpdir(), 'ace-understand-')); const outJsonPath = path.join(tmpDir, 'understand.json'); @@ -1117,7 +1124,7 @@ export async function runUnderstand(audioUrl: string): Promise await runBinary(understandBin, args, 'ace-understand'); // Read and parse the output JSON - const raw = await fsReadFile(outJsonPath, 'utf-8'); + const raw = await readFile(outJsonPath, 'utf-8'); const result: UnderstandResult = JSON.parse(raw); console.log('[understand] Result:', JSON.stringify(result, null, 2)); return result; From ccca16f8d2eb0293f25a9bddae2b3097c8c905c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:15:30 +0000 Subject: [PATCH 04/22] fix: add --wav flag to all dit-vae invocations Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- server/src/routes/referenceTrack.ts | 6 +----- server/src/services/acestep.ts | 4 ++++ 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/routes/referenceTrack.ts b/server/src/routes/referenceTrack.ts index dfd6996..542bf5c 100644 --- a/server/src/routes/referenceTrack.ts +++ b/server/src/routes/referenceTrack.ts @@ -25,10 +25,6 @@ const understandRateLimiter = rateLimit({ message: { error: 'Too many requests โ€” please wait before analysing another track' }, }); -// Apply rate limiting to all understand routes -router.use('/understand-url', understandRateLimiter); -router.use('/:id/understand', understandRateLimiter); - const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 50 * 1024 * 1024 }, // 50MB max @@ -338,7 +334,7 @@ router.delete('/:id', authMiddleware, async (req: AuthenticatedRequest, res: Res }); // Understand a reference track with ace-understand -router.post('/:id/understand', authMiddleware, async (req: AuthenticatedRequest, res: Response) => { +router.post('/:id/understand', understandRateLimiter, authMiddleware, async (req: AuthenticatedRequest, res: Response) => { try { const result = await pool.query( 'SELECT user_id, storage_key FROM reference_tracks WHERE id = $1', diff --git a/server/src/services/acestep.ts b/server/src/services/acestep.ts index 5a1c360..aeec6ff 100644 --- a/server/src/services/acestep.ts +++ b/server/src/services/acestep.ts @@ -685,6 +685,10 @@ async function runViaSpawn( ditArgs.push('--lora-scale', String(loraState.scale)); } + // Always request WAV output (instead of the default FLAC) so the + // file-collection glob (*.wav) stays consistent across all modes. + ditArgs.push('--wav'); + ditArgs.push(...parseExtraArgs(process.env.DIT_VAE_EXTRA_ARGS)); console.log(`[Job ${jobId}] Running dit-vae:\n ${ditVaeBin} ${ditArgs.join(' ')}`); From 50657f0b0988d8b20518e8b5139fd7cbd5b2a66e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 12 Mar 2026 17:26:38 +0000 Subject: [PATCH 05/22] fix: move LEGO controls out of Advanced Settings into inline Cover/Repaint/Lego block Co-authored-by: lmangani <1423657+lmangani@users.noreply.github.com> --- components/CreatePanel.tsx | 99 ++++++++++++++++++---------------- server/src/services/acestep.ts | 4 ++ 2 files changed, 56 insertions(+), 47 deletions(-) diff --git a/components/CreatePanel.tsx b/components/CreatePanel.tsx index 669cb57..aa50ef8 100644 --- a/components/CreatePanel.tsx +++ b/components/CreatePanel.tsx @@ -214,6 +214,11 @@ export const CreatePanel: React.FC = ({ const [trackName, setTrackName] = useState(''); const [completeTrackClasses, setCompleteTrackClasses] = useState(''); const [isFormatCaption, setIsFormatCaption] = useState(false); + // Parsed array โ€” memoised so the split doesn't run on every render + const completeTrackClassesParsed = useMemo( + () => completeTrackClasses.split(',').map(s => s.trim()).filter(Boolean), + [completeTrackClasses] + ); const [maxDurationWithLm, setMaxDurationWithLm] = useState(240); const [maxDurationWithoutLm, setMaxDurationWithoutLm] = useState(240); @@ -1224,13 +1229,7 @@ export const CreatePanel: React.FC = ({ scoreScale, lmBatchChunkSize, trackName: trackName.trim() || undefined, - completeTrackClasses: (() => { - const parsed = completeTrackClasses - .split(',') - .map((item) => item.trim()) - .filter(Boolean); - return parsed.length ? parsed : undefined; - })(), + completeTrackClasses: completeTrackClassesParsed.length ? completeTrackClassesParsed : undefined, isFormatCaption, loraLoaded, }); @@ -1687,6 +1686,29 @@ export const CreatePanel: React.FC = ({ ))} + {/* Which existing tracks to preserve */} +
+ +
+ {TRACK_NAMES.map(name => { + const isChecked = completeTrackClassesParsed.includes(name); + return ( + + ); + })} +
+

{t('legoBaseModelRequired')}

)} @@ -2182,6 +2204,29 @@ export const CreatePanel: React.FC = ({ ))} + {/* Which existing tracks to preserve */} +
+ +
+ {TRACK_NAMES.map(name => { + const isChecked = completeTrackClassesParsed.includes(name); + return ( + + ); + })} +
+

{t('legoBaseModelRequired')}

)} @@ -3052,46 +3097,6 @@ export const CreatePanel: React.FC = ({ -
- - -
- -
- -
- {TRACK_NAMES.map(name => { - const selected = completeTrackClasses.split(',').map(s => s.trim()).filter(Boolean); - const isChecked = selected.includes(name); - return ( - - ); - })} -
-
-
- {/* Mode Toggle */} -
- - -
- {/* Model Selection */}
- {/* SIMPLE MODE */} - {!customMode && ( -
- {/* Song Description */} -
-
- - {t('describeYourSong')} - - -
-