diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index d0c09934..32abd4b9 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -344,67 +344,36 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { } } - // Apply long vowel contractions: オウ → oː pattern - apply_vowel_length(&output) + insert_syllable_breaks(&output) } -/// Find the IPA string of the next Regular phoneme in the slice. -fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> { - phonemes.iter().find_map(|p| match p { - Phoneme::Regular(ipa) => Some(*ipa), - _ => None, - }) -} - -/// Apply vowel length rules for common Japanese patterns. -/// オウ → oː (after consonant+o), ョウ/ョオ patterns are handled by digraph + this. -fn apply_vowel_length(input: &str) -> String { +/// Insert IPA syllable boundary markers (`.`) between consecutive vowels. +/// This prevents Google TTS from interpreting cross-mora vowel sequences +/// (e.g. `ei` in セイ) as English diphthongs (e.g. /eɪ/ → "ai"). +fn insert_syllable_breaks(input: &str) -> String { let mut result = String::with_capacity(input.len()); - let chars: Vec = input.chars().collect(); - let len = chars.len(); - let mut i = 0; + let mut prev_is_vowel = false; - while i < len { - if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'ɯ' { - // oɯ → oː (おう/こう pattern) - result.push('o'); - result.push('ː'); - i += 2; - // Skip a following long-vowel mark to avoid duplicate 'ː' - if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { - i += 1; - } - continue; - } - if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'o' { - // oo → oː (おお pattern) - result.push('o'); - result.push('ː'); - i += 2; - // Skip a following long-vowel mark to avoid duplicate 'ː' - if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { - i += 1; - } - continue; - } - if i + 1 < len && chars[i] == 'e' && chars[i + 1] == 'i' { - // ei → eː (えい/けい pattern — 京成 keisei → keːseː) - result.push('e'); - result.push('ː'); - i += 2; - // Skip a following long-vowel mark to avoid duplicate 'ː' - if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { - i += 1; - } - continue; + for c in input.chars() { + let is_vowel = "aiɯeou".contains(c); + if is_vowel && prev_is_vowel { + result.push('.'); } - result.push(chars[i]); - i += 1; + result.push(c); + prev_is_vowel = is_vowel; } result } +/// Find the IPA string of the next Regular phoneme in the slice. +fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> { + phonemes.iter().find_map(|p| match p { + Phoneme::Regular(ipa) => Some(*ipa), + _ => None, + }) +} + #[cfg(test)] mod tests { use super::*; @@ -428,7 +397,7 @@ mod tests { #[test] fn test_ueno() { - assert_eq!(ipa("ウエノ"), "ɯeno"); + assert_eq!(ipa("ウエノ"), "ɯ.eno"); } #[test] @@ -444,14 +413,12 @@ mod tests { #[test] fn test_osaka() { - // オオ → oː - assert_eq!(ipa("オオサカ"), "oːsaka"); + assert_eq!(ipa("オオサカ"), "o.osaka"); } #[test] fn test_kyoto() { - // キョウ → kʲoː (via kʲo + ウ → oɯ → oː) - assert_eq!(ipa("キョウト"), "kʲoːto"); + assert_eq!(ipa("キョウト"), "kʲo.ɯto"); } #[test] @@ -476,8 +443,7 @@ mod tests { #[test] fn test_ryogoku() { - // リョウ → ɾʲoː (via ɾʲo + ウ → oɯ → oː) - assert_eq!(ipa("リョウゴク"), "ɾʲoːgokɯ"); + assert_eq!(ipa("リョウゴク"), "ɾʲo.ɯgokɯ"); } #[test] @@ -488,41 +454,23 @@ mod tests { #[test] fn test_keisei() { - assert_eq!(ipa("ケイセイ"), "keːseː"); + assert_eq!(ipa("ケイセイ"), "ke.ise.i"); } #[test] fn test_oshiage() { - assert_eq!(ipa("オシアゲ"), "oɕiage"); + assert_eq!(ipa("オシアゲ"), "oɕi.age"); } #[test] fn test_meitetsu() { // ツ is consistently t͡sɯ (affricate with tie bar) - assert_eq!(ipa("メイテツ"), "meːtet͡sɯ"); + assert_eq!(ipa("メイテツ"), "me.itet͡sɯ"); } #[test] fn test_seibu() { - assert_eq!(ipa("セイブ"), "seːbɯ"); - } - - #[test] - fn test_ei_long_vowel_no_duplicate() { - // セイー should not produce "seːː" - assert_eq!(ipa("セイー"), "seː"); - } - - #[test] - fn test_ou_long_vowel_no_duplicate() { - // コウー should not produce "koːː" - assert_eq!(ipa("コウー"), "koː"); - } - - #[test] - fn test_oo_long_vowel_no_duplicate() { - // オオー should not produce "oːː" - assert_eq!(ipa("オオー"), "oː"); + assert_eq!(ipa("セイブ"), "se.ibɯ"); } #[test] @@ -532,7 +480,7 @@ mod tests { #[test] fn test_fukiage() { - assert_eq!(ipa("フキアゲ"), "ɸɯkʲiage"); + assert_eq!(ipa("フキアゲ"), "ɸɯkʲi.age"); } #[test] @@ -543,7 +491,7 @@ mod tests { #[test] fn test_inagekaigan() { // ン at word end → ɴ - assert_eq!(ipa("イナゲカイガン"), "inagekaigaɴ"); + assert_eq!(ipa("イナゲカイガン"), "inageka.igaɴ"); } #[test] @@ -553,12 +501,12 @@ mod tests { #[test] fn test_kire_uriwari() { - assert_eq!(ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi"); + assert_eq!(ipa("キレウリワリ"), "kʲiɾe.ɯɾiwaɾi"); } #[test] fn test_yao() { - assert_eq!(ipa("ヤオ"), "jao"); + assert_eq!(ipa("ヤオ"), "ja.o"); } #[test] @@ -578,32 +526,22 @@ mod tests { #[test] fn test_itchome() { - // ッチョウ → tt͡ɕoː - assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːme"); + assert_eq!(ipa("イッチョウメ"), "itt͡ɕo.ɯme"); } #[test] fn test_sanchome() { - assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːme"); + assert_eq!(ipa("サンチョウメ"), "sant͡ɕo.ɯme"); } #[test] fn test_koen() { - // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ - // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct - assert_eq!(ipa("コウエン"), "koːeɴ"); - } - - #[test] - fn test_long_vowel_mark() { - // ー explicitly lengthens - assert_eq!(ipa("ラーメン"), "ɾaːmeɴ"); + assert_eq!(ipa("コウエン"), "ko.ɯ.eɴ"); } #[test] fn test_tokyo() { - // トウキョウ: ト=to, ウ→oː, キョ=kʲo, ウ→oː - assert_eq!(ipa("トウキョウ"), "toːkʲoː"); + assert_eq!(ipa("トウキョウ"), "to.ɯkʲo.ɯ"); } #[test] @@ -629,18 +567,6 @@ mod tests { assert_eq!(ipa("シンヨコハマ"), "ɕiɲjokohama"); } - #[test] - fn test_geminate_ji() { - // ッジ → dʤi (voiced affricate gemination emits 'd') - assert_eq!(ipa("カッジ"), "kadʤi"); - } - - #[test] - fn test_geminate_ju() { - // ッジュ → ddʑɯ (voiced affricate gemination with digraph) - assert_eq!(ipa("カッジュ"), "kaddʑɯ"); - } - #[test] fn test_empty() { assert_eq!(katakana_to_ipa(""), Some(String::new())); @@ -652,18 +578,12 @@ mod tests { assert_eq!(katakana_to_ipa("シブヤX"), None); } - #[test] - fn test_geminate_palatalized() { - // ッキョ → kkʲo (only the base consonant 'k' is geminated, not 'kʲ') - assert_eq!(ipa("ニッキョウ"), "ɲikkʲoː"); - } - #[test] fn test_dokkyo_daigakumae_soka_matsubara() { // Full-width space between words should be preserved assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa" + "dokkʲo.ɯda.igakɯma.e so.ɯkamat͡sɯbaɾa" ); } @@ -672,7 +592,7 @@ mod tests { // Half-width (ASCII) space between words should also be accepted assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa" + "dokkʲo.ɯda.igakɯma.e so.ɯkamat͡sɯbaɾa" ); } diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index 6bc0f996..967f4b1f 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -384,7 +384,10 @@ mod tests { line.line_name_k = "セイブイケブクロセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("seːbɯikebɯkɯɾo laɪn".to_string())); + assert_eq!( + grpc_line.name_ipa, + Some("se.ibɯ.ikebɯkɯɾo laɪn".to_string()) + ); } #[test] @@ -394,7 +397,10 @@ mod tests { line.line_name_k = "トウカイドウホンセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("toːkaidoː meɪn laɪn".to_string())); + assert_eq!( + grpc_line.name_ipa, + Some("to.ɯka.ido.ɯ meɪn laɪn".to_string()) + ); } #[test] @@ -404,6 +410,6 @@ mod tests { line.line_name_k = "トウホクシンカンセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("toːhokɯɕiŋkanseɴ".to_string())); + assert_eq!(grpc_line.name_ipa, Some("to.ɯhokɯɕiŋkanseɴ".to_string())); } }