Skip to content

Commit 0b22ca2

Browse files
authored
update gradio (#98)
1 parent 1b47091 commit 0b22ca2

1 file changed

Lines changed: 19 additions & 17 deletions

File tree

gradio_demo.py

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,10 @@ def load_examples_from_jsonl():
184184

185185
# Model configuration
186186
SYSTEM_PROMPT = "You are a speech synthesizer that generates natural, realistic, and human-like conversational audio from dialogue text."
187-
MODEL_PATH = "fnlp/MOSS-TTSD-v0.5"
188-
SPT_CONFIG_PATH = "XY_Tokenizer/config/xy_tokenizer_32k_config.yaml"
189-
SPT_CHECKPOINT_PATH = "XY_Tokenizer/weights/xy_tokenizer.ckpt"
187+
MODEL_PATH = "fnlp/MOSS-TTSD-v0.7"
188+
# Align SPT config/weights with CLI inference
189+
SPT_CONFIG_PATH = "XY_Tokenizer/config/MOSS_TTSD_tokenizer.yaml"
190+
SPT_CHECKPOINT_PATH = "XY_Tokenizer/weights/MOSS_TTSD_tokenizer"
190191
MAX_CHANNELS = 8
191192

192193
# Global variables for caching loaded models
@@ -251,29 +252,29 @@ def process_single_audio_generation(
251252

252253
# Handle different audio input modes (mutually exclusive)
253254
if audio_mode == "Single":
254-
# Use single audio mode
255+
# Strict single-audio requirement
256+
if not prompt_audio_single:
257+
return (
258+
None,
259+
"Error: In Single mode, please provide one prompt_audio and its text.",
260+
)
255261
item["prompt_audio"] = prompt_audio_single
256262
item["prompt_text"] = prompt_text_single
257-
elif audio_mode == "Role" and prompt_audio_1 and prompt_audio_2:
258-
# Use role audio mode (requires both audio files)
263+
elif audio_mode == "Role":
264+
# Strict role-audio requirement: both speakers must be provided
265+
if not (prompt_audio_1 and prompt_audio_2):
266+
return (
267+
None,
268+
"Error: In Role mode, please provide both Role1 and Role2 reference audios.",
269+
)
259270
item["prompt_audio_speaker1"] = prompt_audio_1
260271
item["prompt_text_speaker1"] = prompt_text_1 if prompt_text_1 else ""
261272
item["prompt_audio_speaker2"] = prompt_audio_2
262273
item["prompt_text_speaker2"] = prompt_text_2 if prompt_text_2 else ""
263-
elif audio_mode == "Role" and prompt_audio_1:
264-
# Only Role 1 audio provided, treat as single audio
265-
print("Only Role 1 audio provided, treating as single audio.")
266-
item["prompt_audio"] = prompt_audio_1
267-
item["prompt_text"] = prompt_text_1 if prompt_text_1 else ""
268-
elif audio_mode == "Role" and prompt_audio_2:
269-
# Only Role 2 audio provided, treat as single audio
270-
print("Only Role 2 audio provided, treating as single audio.")
271-
item["prompt_audio"] = prompt_audio_2
272-
item["prompt_text"] = prompt_text_2 if prompt_text_2 else ""
273274
else:
274275
return (
275276
None,
276-
"Error: Please select a mode and provide corresponding audio files\n- Single Audio Mode: Provide one audio file and corresponding text\n- Role Mode: Provide audio files for Role1 and Role2",
277+
"Error: Please select an audio input mode (Single or Role).",
277278
)
278279

279280
# Set random seed to ensure reproducible results
@@ -290,6 +291,7 @@ def process_single_audio_generation(
290291
system_prompt=SYSTEM_PROMPT,
291292
start_idx=0,
292293
use_normalize=use_normalize,
294+
silence_duration=0.1,
293295
)
294296

295297
# Check results

0 commit comments

Comments
 (0)