Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions livekit-agents/livekit/agents/stt/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,22 @@ class SpeechData:
speaker_id: str | None = None
is_primary_speaker: bool | None = None
words: list[TimedString] | None = None
input_language: LanguageCode | None = None
"""the detected/input language spoken by the user. populated by STT services that support
translation, where ``language`` holds the target language and ``input_language`` holds the
original spoken language"""
input_text: str | None = None
"""the original transcription in the input language, when translation is active"""

def __post_init__(self) -> None:
if not isinstance(self.language, LanguageCode) and isinstance(self.language, str):
self.language = LanguageCode(self.language)
if (
self.input_language is not None
and not isinstance(self.input_language, LanguageCode)
and isinstance(self.input_language, str)
):
self.input_language = LanguageCode(self.input_language)


@dataclass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1103,14 +1103,22 @@ def _process_gladia_message(self, data: dict) -> None:
target_language = translation_data.get("target_language", "")
language = translated_utterance.get("language", target_language)

original_utterance = translation_data.get("utterance", {})
original_language = original_utterance.get("language", "")
original_text = original_utterance.get("text", "") or None

# Get the translated text
translated_text = translated_utterance.get("text", "").strip()
words = translated_utterance.get("words", [])

if translated_text and language:
# Create speech data for the translation
speech_data = stt.SpeechData(
language=LanguageCode(language), # Use the target language
language=LanguageCode(language),
input_language=LanguageCode(original_language)
if original_language
else None,
input_text=original_text,
start_time=translated_utterance.get("start", 0) + self.start_time_offset,
end_time=translated_utterance.get("end", 0) + self.start_time_offset,
confidence=translated_utterance.get("confidence", 1.0),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,15 @@
See https://docs.livekit.io/agents/integrations/stt/soniox/ for more information.
"""

from .stt import STT, ContextGeneralItem, ContextObject, ContextTranslationTerm, STTOptions
from .models import SonioxLanguages, SonioxRTModels
from .stt import (
STT,
ContextGeneralItem,
ContextObject,
ContextTranslationTerm,
STTOptions,
TranslationConfig,
)
from .version import __version__

__all__ = [
Expand All @@ -26,6 +34,9 @@
"ContextObject",
"ContextGeneralItem",
"ContextTranslationTerm",
"TranslationConfig",
"SonioxLanguages",
"SonioxRTModels",
"__version__",
]

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from typing import Literal

SonioxLanguages = Literal[
"af",
"ar",
"az",
"be",
"bg",
"bn",
"bs",
"ca",
"cs",
"cy",
"da",
"de",
"el",
"en",
"es",
"et",
"eu",
"fa",
"fi",
"fr",
"gl",
"gu",
"he",
"hi",
"hr",
"hu",
"id",
"it",
"ja",
"kk",
"kn",
"ko",
"lv",
"lt",
"mk",
"ml",
"mr",
"ms",
"nl",
"no",
"pa",
"pl",
"pt",
"ro",
"ru",
"sk",
"sl",
"sq",
"sr",
"sv",
"sw",
"ta",
"te",
"th",
"tl",
"tr",
"uk",
"ur",
"vi",
"zh",
]

SonioxRTModels = Literal[
"stt-rt-v4",
"stt-rt-v3",
"stt-rt-v3-preview",
"stt-rt-preview-v2",
]
Loading
Loading