From feeac30b48be80b1937ef44c9a2091ee10b1ec87 Mon Sep 17 00:00:00 2001 From: Tsubasa SEKIGUCHI Date: Fri, 6 Mar 2026 13:33:45 +0000 Subject: [PATCH 1/4] =?UTF-8?q?fix:=20IPA=E6=AF=8D=E9=9F=B3=E9=95=B7?= =?UTF-8?q?=E9=9F=B3=E5=8C=96=E3=83=AB=E3=83=BC=E3=83=AB=E3=82=92=E5=89=8A?= =?UTF-8?q?=E9=99=A4=E3=81=97Google=20TTS=E3=81=AE=E8=AA=A4=E8=AA=AD?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit apply_vowel_length関数(ei→eː, oɯ→oː, oo→oː)を削除。 Google TTSがeːを英語の/eɪ/と解釈し「名鉄」が「まいてつ」と 発音される問題を修正。カタカナの音素をそのまま出力するようにした。 Co-Authored-By: Claude Opus 4.6 --- stationapi/src/domain/ipa.rs | 94 +++++++----------------------------- 1 file changed, 18 insertions(+), 76 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index d0c09934..39797c70 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -344,8 +344,7 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { } } - // Apply long vowel contractions: オウ → oː pattern - apply_vowel_length(&output) + output } /// Find the IPA string of the next Regular phoneme in the slice. @@ -356,54 +355,6 @@ fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> { }) } -/// Apply vowel length rules for common Japanese patterns. -/// オウ → oː (after consonant+o), ョウ/ョオ patterns are handled by digraph + this. -fn apply_vowel_length(input: &str) -> String { - let mut result = String::with_capacity(input.len()); - let chars: Vec = input.chars().collect(); - let len = chars.len(); - let mut i = 0; - - while i < len { - if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'ɯ' { - // oɯ → oː (おう/こう pattern) - result.push('o'); - result.push('ː'); - i += 2; - // Skip a following long-vowel mark to avoid duplicate 'ː' - if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { - i += 1; - } - continue; - } - if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'o' { - // oo → oː (おお pattern) - result.push('o'); - result.push('ː'); - i += 2; - // Skip a following long-vowel mark to avoid duplicate 'ː' - if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { - i += 1; - } - continue; - } - if i + 1 < len && chars[i] == 'e' && chars[i + 1] == 'i' { - // ei → eː (えい/けい pattern — 京成 keisei → keːseː) - result.push('e'); - result.push('ː'); - i += 2; - // Skip a following long-vowel mark to avoid duplicate 'ː' - if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { - i += 1; - } - continue; - } - result.push(chars[i]); - i += 1; - } - - result -} #[cfg(test)] mod tests { @@ -444,14 +395,12 @@ mod tests { #[test] fn test_osaka() { - // オオ → oː - assert_eq!(ipa("オオサカ"), "oːsaka"); + assert_eq!(ipa("オオサカ"), "oosaka"); } #[test] fn test_kyoto() { - // キョウ → kʲoː (via kʲo + ウ → oɯ → oː) - assert_eq!(ipa("キョウト"), "kʲoːto"); + assert_eq!(ipa("キョウト"), "kʲoɯto"); } #[test] @@ -476,8 +425,7 @@ mod tests { #[test] fn test_ryogoku() { - // リョウ → ɾʲoː (via ɾʲo + ウ → oɯ → oː) - assert_eq!(ipa("リョウゴク"), "ɾʲoːgokɯ"); + assert_eq!(ipa("リョウゴク"), "ɾʲoɯgokɯ"); } #[test] @@ -488,7 +436,7 @@ mod tests { #[test] fn test_keisei() { - assert_eq!(ipa("ケイセイ"), "keːseː"); + assert_eq!(ipa("ケイセイ"), "keisei"); } #[test] @@ -499,30 +447,28 @@ mod tests { #[test] fn test_meitetsu() { // ツ is consistently t͡sɯ (affricate with tie bar) - assert_eq!(ipa("メイテツ"), "meːtet͡sɯ"); + assert_eq!(ipa("メイテツ"), "meitet͡sɯ"); } #[test] fn test_seibu() { - assert_eq!(ipa("セイブ"), "seːbɯ"); + assert_eq!(ipa("セイブ"), "seibɯ"); } #[test] fn test_ei_long_vowel_no_duplicate() { - // セイー should not produce "seːː" - assert_eq!(ipa("セイー"), "seː"); + // セイー should produce "seiː" (ー lengthens the preceding イ) + assert_eq!(ipa("セイー"), "seiː"); } #[test] fn test_ou_long_vowel_no_duplicate() { - // コウー should not produce "koːː" - assert_eq!(ipa("コウー"), "koː"); + assert_eq!(ipa("コウー"), "koɯː"); } #[test] fn test_oo_long_vowel_no_duplicate() { - // オオー should not produce "oːː" - assert_eq!(ipa("オオー"), "oː"); + assert_eq!(ipa("オオー"), "ooː"); } #[test] @@ -578,20 +524,17 @@ mod tests { #[test] fn test_itchome() { - // ッチョウ → tt͡ɕoː - assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːme"); + assert_eq!(ipa("イッチョウメ"), "itt͡ɕoɯme"); } #[test] fn test_sanchome() { - assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːme"); + assert_eq!(ipa("サンチョウメ"), "sant͡ɕoɯme"); } #[test] fn test_koen() { - // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ - // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct - assert_eq!(ipa("コウエン"), "koːeɴ"); + assert_eq!(ipa("コウエン"), "koɯeɴ"); } #[test] @@ -602,8 +545,7 @@ mod tests { #[test] fn test_tokyo() { - // トウキョウ: ト=to, ウ→oː, キョ=kʲo, ウ→oː - assert_eq!(ipa("トウキョウ"), "toːkʲoː"); + assert_eq!(ipa("トウキョウ"), "toɯkʲoɯ"); } #[test] @@ -655,7 +597,7 @@ mod tests { #[test] fn test_geminate_palatalized() { // ッキョ → kkʲo (only the base consonant 'k' is geminated, not 'kʲ') - assert_eq!(ipa("ニッキョウ"), "ɲikkʲoː"); + assert_eq!(ipa("ニッキョウ"), "ɲikkʲoɯ"); } #[test] @@ -663,7 +605,7 @@ mod tests { // Full-width space between words should be preserved assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa" + "dokkʲoɯdaigakɯmae soɯkamat͡sɯbaɾa" ); } @@ -672,7 +614,7 @@ mod tests { // Half-width (ASCII) space between words should also be accepted assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa" + "dokkʲoɯdaigakɯmae soɯkamat͡sɯbaɾa" ); } From 75805971c0ba063bccdcbb3d3de899c8c05a1a8f Mon Sep 17 00:00:00 2001 From: Tsubasa SEKIGUCHI Date: Fri, 6 Mar 2026 13:38:52 +0000 Subject: [PATCH 2/4] =?UTF-8?q?fix:=20fmt=E3=81=AE=E4=BD=99=E5=88=86?= =?UTF-8?q?=E3=81=AA=E7=A9=BA=E8=A1=8C=E3=81=A8Line=20DTO=E3=83=86?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=81=AE=E6=9C=9F=E5=BE=85=E5=80=A4=E3=82=92?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- stationapi/src/domain/ipa.rs | 1 - stationapi/src/use_case/dto/line.rs | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 39797c70..7511f092 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -355,7 +355,6 @@ fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> { }) } - #[cfg(test)] mod tests { use super::*; diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index 6bc0f996..2aeb5eb5 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -384,7 +384,7 @@ mod tests { line.line_name_k = "セイブイケブクロセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("seːbɯikebɯkɯɾo laɪn".to_string())); + assert_eq!(grpc_line.name_ipa, Some("seibɯikebɯkɯɾo laɪn".to_string())); } #[test] @@ -394,7 +394,7 @@ mod tests { line.line_name_k = "トウカイドウホンセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("toːkaidoː meɪn laɪn".to_string())); + assert_eq!(grpc_line.name_ipa, Some("toɯkaidoɯ meɪn laɪn".to_string())); } #[test] @@ -404,6 +404,6 @@ mod tests { line.line_name_k = "トウホクシンカンセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("toːhokɯɕiŋkanseɴ".to_string())); + assert_eq!(grpc_line.name_ipa, Some("toɯhokɯɕiŋkanseɴ".to_string())); } } From 1b00af044b4df3845cc7da4f0ee3bf75309349f5 Mon Sep 17 00:00:00 2001 From: Tsubasa SEKIGUCHI Date: Fri, 6 Mar 2026 13:50:04 +0000 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20=E9=80=A3=E7=B6=9A=E6=AF=8D=E9=9F=B3?= =?UTF-8?q?=E9=96=93=E3=81=ABIPA=E9=9F=B3=E7=AF=80=E5=8C=BA=E5=88=87?= =?UTF-8?q?=E3=82=8A=E3=82=92=E6=8C=BF=E5=85=A5=E3=81=97Google=20TTS?= =?UTF-8?q?=E3=81=AE=E4=BA=8C=E9=87=8D=E6=AF=8D=E9=9F=B3=E8=AA=A4=E8=AA=AD?= =?UTF-8?q?=E3=82=92=E9=98=B2=E6=AD=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Google TTSが連続母音(ei, ou等)を英語の二重母音として解釈し 「西武」が「サイブ」と発音される問題を修正。 insert_syllable_breaks関数で母音間に「.」を挿入し、 各母音が独立した音節であることをTTSに伝える。 Co-Authored-By: Claude Opus 4.6 --- stationapi/src/domain/ipa.rs | 65 +++++++++++++++++++---------- stationapi/src/use_case/dto/line.rs | 12 ++++-- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 7511f092..cd34545f 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -344,7 +344,26 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String { } } - output + insert_syllable_breaks(&output) +} + +/// Insert IPA syllable boundary markers (`.`) between consecutive vowels. +/// This prevents Google TTS from interpreting cross-mora vowel sequences +/// (e.g. `ei` in セイ) as English diphthongs (e.g. /eɪ/ → "ai"). +fn insert_syllable_breaks(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let mut prev_is_vowel = false; + + for c in input.chars() { + let is_vowel = "aiɯeou".contains(c); + if is_vowel && prev_is_vowel { + result.push('.'); + } + result.push(c); + prev_is_vowel = is_vowel; + } + + result } /// Find the IPA string of the next Regular phoneme in the slice. @@ -378,7 +397,7 @@ mod tests { #[test] fn test_ueno() { - assert_eq!(ipa("ウエノ"), "ɯeno"); + assert_eq!(ipa("ウエノ"), "ɯ.eno"); } #[test] @@ -394,12 +413,12 @@ mod tests { #[test] fn test_osaka() { - assert_eq!(ipa("オオサカ"), "oosaka"); + assert_eq!(ipa("オオサカ"), "o.osaka"); } #[test] fn test_kyoto() { - assert_eq!(ipa("キョウト"), "kʲoɯto"); + assert_eq!(ipa("キョウト"), "kʲo.ɯto"); } #[test] @@ -424,7 +443,7 @@ mod tests { #[test] fn test_ryogoku() { - assert_eq!(ipa("リョウゴク"), "ɾʲoɯgokɯ"); + assert_eq!(ipa("リョウゴク"), "ɾʲo.ɯgokɯ"); } #[test] @@ -435,39 +454,39 @@ mod tests { #[test] fn test_keisei() { - assert_eq!(ipa("ケイセイ"), "keisei"); + assert_eq!(ipa("ケイセイ"), "ke.ise.i"); } #[test] fn test_oshiage() { - assert_eq!(ipa("オシアゲ"), "oɕiage"); + assert_eq!(ipa("オシアゲ"), "oɕi.age"); } #[test] fn test_meitetsu() { // ツ is consistently t͡sɯ (affricate with tie bar) - assert_eq!(ipa("メイテツ"), "meitet͡sɯ"); + assert_eq!(ipa("メイテツ"), "me.itet͡sɯ"); } #[test] fn test_seibu() { - assert_eq!(ipa("セイブ"), "seibɯ"); + assert_eq!(ipa("セイブ"), "se.ibɯ"); } #[test] fn test_ei_long_vowel_no_duplicate() { // セイー should produce "seiː" (ー lengthens the preceding イ) - assert_eq!(ipa("セイー"), "seiː"); + assert_eq!(ipa("セイー"), "se.iː"); } #[test] fn test_ou_long_vowel_no_duplicate() { - assert_eq!(ipa("コウー"), "koɯː"); + assert_eq!(ipa("コウー"), "ko.ɯː"); } #[test] fn test_oo_long_vowel_no_duplicate() { - assert_eq!(ipa("オオー"), "ooː"); + assert_eq!(ipa("オオー"), "o.oː"); } #[test] @@ -477,7 +496,7 @@ mod tests { #[test] fn test_fukiage() { - assert_eq!(ipa("フキアゲ"), "ɸɯkʲiage"); + assert_eq!(ipa("フキアゲ"), "ɸɯkʲi.age"); } #[test] @@ -488,7 +507,7 @@ mod tests { #[test] fn test_inagekaigan() { // ン at word end → ɴ - assert_eq!(ipa("イナゲカイガン"), "inagekaigaɴ"); + assert_eq!(ipa("イナゲカイガン"), "inageka.igaɴ"); } #[test] @@ -498,12 +517,12 @@ mod tests { #[test] fn test_kire_uriwari() { - assert_eq!(ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi"); + assert_eq!(ipa("キレウリワリ"), "kʲiɾe.ɯɾiwaɾi"); } #[test] fn test_yao() { - assert_eq!(ipa("ヤオ"), "jao"); + assert_eq!(ipa("ヤオ"), "ja.o"); } #[test] @@ -523,17 +542,17 @@ mod tests { #[test] fn test_itchome() { - assert_eq!(ipa("イッチョウメ"), "itt͡ɕoɯme"); + assert_eq!(ipa("イッチョウメ"), "itt͡ɕo.ɯme"); } #[test] fn test_sanchome() { - assert_eq!(ipa("サンチョウメ"), "sant͡ɕoɯme"); + assert_eq!(ipa("サンチョウメ"), "sant͡ɕo.ɯme"); } #[test] fn test_koen() { - assert_eq!(ipa("コウエン"), "koɯeɴ"); + assert_eq!(ipa("コウエン"), "ko.ɯ.eɴ"); } #[test] @@ -544,7 +563,7 @@ mod tests { #[test] fn test_tokyo() { - assert_eq!(ipa("トウキョウ"), "toɯkʲoɯ"); + assert_eq!(ipa("トウキョウ"), "to.ɯkʲo.ɯ"); } #[test] @@ -596,7 +615,7 @@ mod tests { #[test] fn test_geminate_palatalized() { // ッキョ → kkʲo (only the base consonant 'k' is geminated, not 'kʲ') - assert_eq!(ipa("ニッキョウ"), "ɲikkʲoɯ"); + assert_eq!(ipa("ニッキョウ"), "ɲikkʲo.ɯ"); } #[test] @@ -604,7 +623,7 @@ mod tests { // Full-width space between words should be preserved assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoɯdaigakɯmae soɯkamat͡sɯbaɾa" + "dokkʲo.ɯda.igakɯma.e so.ɯkamat͡sɯbaɾa" ); } @@ -613,7 +632,7 @@ mod tests { // Half-width (ASCII) space between words should also be accepted assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoɯdaigakɯmae soɯkamat͡sɯbaɾa" + "dokkʲo.ɯda.igakɯma.e so.ɯkamat͡sɯbaɾa" ); } diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index 2aeb5eb5..967f4b1f 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -384,7 +384,10 @@ mod tests { line.line_name_k = "セイブイケブクロセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("seibɯikebɯkɯɾo laɪn".to_string())); + assert_eq!( + grpc_line.name_ipa, + Some("se.ibɯ.ikebɯkɯɾo laɪn".to_string()) + ); } #[test] @@ -394,7 +397,10 @@ mod tests { line.line_name_k = "トウカイドウホンセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("toɯkaidoɯ meɪn laɪn".to_string())); + assert_eq!( + grpc_line.name_ipa, + Some("to.ɯka.ido.ɯ meɪn laɪn".to_string()) + ); } #[test] @@ -404,6 +410,6 @@ mod tests { line.line_name_k = "トウホクシンカンセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("toɯhokɯɕiŋkanseɴ".to_string())); + assert_eq!(grpc_line.name_ipa, Some("to.ɯhokɯɕiŋkanseɴ".to_string())); } } From 3b86968b49b47c98c7049cc306cab17bf5a3a5cd Mon Sep 17 00:00:00 2001 From: Tsubasa SEKIGUCHI Date: Fri, 6 Mar 2026 13:52:01 +0000 Subject: [PATCH 4/4] =?UTF-8?q?test:=20=E5=AE=9F=E5=9C=A8=E3=81=97?= =?UTF-8?q?=E3=81=AA=E3=81=84=E9=A7=85=E5=90=8D=E3=83=BB=E8=B7=AF=E7=B7=9A?= =?UTF-8?q?=E5=90=8D=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88=E3=82=B1=E3=83=BC?= =?UTF-8?q?=E3=82=B9=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit セイー、コウー、オオー、ラーメン、カッジ、カッジュ、ニッキョウ など架空の組み合わせのテストを削除。 ニッキョウのパターンはドッキョウダイガクマエのテストでカバー済み。 Co-Authored-By: Claude Opus 4.6 --- stationapi/src/domain/ipa.rs | 40 ------------------------------------ 1 file changed, 40 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index cd34545f..32abd4b9 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -473,22 +473,6 @@ mod tests { assert_eq!(ipa("セイブ"), "se.ibɯ"); } - #[test] - fn test_ei_long_vowel_no_duplicate() { - // セイー should produce "seiː" (ー lengthens the preceding イ) - assert_eq!(ipa("セイー"), "se.iː"); - } - - #[test] - fn test_ou_long_vowel_no_duplicate() { - assert_eq!(ipa("コウー"), "ko.ɯː"); - } - - #[test] - fn test_oo_long_vowel_no_duplicate() { - assert_eq!(ipa("オオー"), "o.oː"); - } - #[test] fn test_toride() { assert_eq!(ipa("トリデ"), "toɾide"); @@ -555,12 +539,6 @@ mod tests { assert_eq!(ipa("コウエン"), "ko.ɯ.eɴ"); } - #[test] - fn test_long_vowel_mark() { - // ー explicitly lengthens - assert_eq!(ipa("ラーメン"), "ɾaːmeɴ"); - } - #[test] fn test_tokyo() { assert_eq!(ipa("トウキョウ"), "to.ɯkʲo.ɯ"); @@ -589,18 +567,6 @@ mod tests { assert_eq!(ipa("シンヨコハマ"), "ɕiɲjokohama"); } - #[test] - fn test_geminate_ji() { - // ッジ → dʤi (voiced affricate gemination emits 'd') - assert_eq!(ipa("カッジ"), "kadʤi"); - } - - #[test] - fn test_geminate_ju() { - // ッジュ → ddʑɯ (voiced affricate gemination with digraph) - assert_eq!(ipa("カッジュ"), "kaddʑɯ"); - } - #[test] fn test_empty() { assert_eq!(katakana_to_ipa(""), Some(String::new())); @@ -612,12 +578,6 @@ mod tests { assert_eq!(katakana_to_ipa("シブヤX"), None); } - #[test] - fn test_geminate_palatalized() { - // ッキョ → kkʲo (only the base consonant 'k' is geminated, not 'kʲ') - assert_eq!(ipa("ニッキョウ"), "ɲikkʲo.ɯ"); - } - #[test] fn test_dokkyo_daigakumae_soka_matsubara() { // Full-width space between words should be preserved