From fa3d16de3487b99cc8d39b3d0fbe42beafd7a737 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 12:34:16 +0000 Subject: [PATCH 1/4] =?UTF-8?q?fix:=20use=20mid=20front=20vowel=20[e]=20in?= =?UTF-8?q?stead=20of=20[=C9=9B]=20in=20IPA=20to=20fix=20Google=20TTS=20pr?= =?UTF-8?q?onunciation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The open-mid front vowel ɛ was causing Google TTS to misinterpret Japanese エ-row sounds. For example, セイブ (Seibu/西武) generated IPA "sɛibɯ" which TTS read as "さいぶ" instead of "せいぶ". Changes: - Replace all ɛ mappings with e (mid front vowel) for エ-row kana - Add ei → eː vowel contraction rule (parallel to existing oɯ → oː) so エイ sequences produce long [eː] as in natural Japanese speech https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN --- stationapi/src/domain/ipa.rs | 81 ++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index fa65402c..19a6e23c 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -120,43 +120,43 @@ fn lookup_single(c: char) -> Option { 'ア' | 'ァ' => return Some(Phoneme::Regular("a")), 'イ' | 'ィ' => return Some(Phoneme::Regular("i")), 'ウ' | 'ゥ' => return Some(Phoneme::Regular("ɯ")), - 'エ' | 'ェ' => return Some(Phoneme::Regular("ɛ")), + 'エ' | 'ェ' => return Some(Phoneme::Regular("e")), 'オ' | 'ォ' => return Some(Phoneme::Regular("o")), // カ行 'カ' => "ka", 'キ' => "kʲi", 'ク' => "kɯ", - 'ケ' => "kɛ", + 'ケ' => "ke", 'コ' => "ko", // サ行 'サ' => "sa", 'シ' => "ɕi", 'ス' => "sɯ", - 'セ' => "sɛ", + 'セ' => "se", 'ソ' => "so", // タ行 'タ' => "ta", 'チ' => "t͡ɕi", 'ツ' => "t͡sɯ", - 'テ' => "tɛ", + 'テ' => "te", 'ト' => "to", // ナ行 'ナ' => "na", 'ニ' => "ɲi", 'ヌ' => "nɯ", - 'ネ' => "nɛ", + 'ネ' => "ne", 'ノ' => "no", // ハ行 'ハ' => "ha", 'ヒ' => "çi", 'フ' => "ɸɯ", - 'ヘ' => "hɛ", + 'ヘ' => "he", 'ホ' => "ho", // マ行 'マ' => "ma", 'ミ' => "mi", 'ム' => "mɯ", - 'メ' => "mɛ", + 'メ' => "me", 'モ' => "mo", // ヤ行 'ヤ' | 'ャ' => "ja", @@ -166,42 +166,42 @@ fn lookup_single(c: char) -> Option { 'ラ' => "ɾa", 'リ' => "ɾi", 'ル' => "ɾɯ", - 'レ' => "ɾɛ", + 'レ' => "ɾe", 'ロ' => "ɾo", // ワ行 'ワ' => "wa", 'ヰ' => "i", - 'ヱ' => "ɛ", + 'ヱ' => "e", 'ヲ' => "o", // ガ行 'ガ' => "ga", 'ギ' => "gi", 'グ' => "gɯ", - 'ゲ' => "gɛ", + 'ゲ' => "ge", 'ゴ' => "go", // ザ行 'ザ' => "za", 'ジ' => "ʤi", 'ズ' => "zɯ", - 'ゼ' => "zɛ", + 'ゼ' => "ze", 'ゾ' => "zo", // ダ行 'ダ' => "da", 'ヂ' => "dʑi", 'ヅ' => "dzɯ", - 'デ' => "dɛ", + 'デ' => "de", 'ド' => "do", // バ行 'バ' => "ba", 'ビ' => "bi", 'ブ' => "bɯ", - 'ベ' => "bɛ", + 'ベ' => "be", 'ボ' => "bo", // パ行 'パ' => "pa", 'ピ' => "pi", 'プ' => "pɯ", - 'ペ' => "pɛ", + 'ペ' => "pe", 'ポ' => "po", // 特殊 'ン' => return Some(Phoneme::MoraicNasal), @@ -229,7 +229,7 @@ fn split_onset(ipa: &str) -> (&str, &str) { // Find where the first vowel-like character starts let vowel_start = ipa .char_indices() - .find(|(_, c)| "aiɯɛeouəɐ".contains(*c)) + .find(|(_, c)| "aiɯeouəɐ".contains(*c)) .map(|(i, _)| i) .unwrap_or(ipa.len()); ipa.split_at(vowel_start) @@ -248,7 +248,7 @@ fn last_vowel(ipa: &str) -> Option<&'static str> { 'a' => return Some("a"), 'i' => return Some("i"), 'ɯ' => return Some("ɯ"), - 'ɛ' => return Some("ɛ"), + 'e' => return Some("e"), 'o' => return Some("o"), 'u' => return Some("u"), _ => continue, @@ -379,6 +379,13 @@ fn apply_vowel_length(input: &str) -> String { i += 2; continue; } + if i + 1 < len && chars[i] == 'e' && chars[i + 1] == 'i' { + // ei → eː (えい/けい pattern — 京成 keisei → keːseː) + result.push('e'); + result.push('ː'); + i += 2; + continue; + } result.push(chars[i]); i += 1; } @@ -409,12 +416,12 @@ mod tests { #[test] fn test_ueno() { - assert_eq!(ipa("ウエノ"), "ɯɛno"); + assert_eq!(ipa("ウエノ"), "ɯeno"); } #[test] fn test_ikebukuro() { - assert_eq!(ipa("イケブクロ"), "ikɛbɯkɯɾo"); + assert_eq!(ipa("イケブクロ"), "ikebɯkɯɾo"); } #[test] @@ -469,54 +476,54 @@ mod tests { #[test] fn test_keisei() { - assert_eq!(ipa("ケイセイ"), "kɛisɛi"); + assert_eq!(ipa("ケイセイ"), "keːseː"); } #[test] fn test_oshiage() { - assert_eq!(ipa("オシアゲ"), "oɕiagɛ"); + assert_eq!(ipa("オシアゲ"), "oɕiage"); } #[test] fn test_meitetsu() { // ツ is consistently t͡sɯ (affricate with tie bar) - assert_eq!(ipa("メイテツ"), "mɛitɛt͡sɯ"); + assert_eq!(ipa("メイテツ"), "meːtet͡sɯ"); } #[test] fn test_seibu() { - assert_eq!(ipa("セイブ"), "sɛibɯ"); + assert_eq!(ipa("セイブ"), "seːbɯ"); } #[test] fn test_toride() { - assert_eq!(ipa("トリデ"), "toɾidɛ"); + assert_eq!(ipa("トリデ"), "toɾide"); } #[test] fn test_fukiage() { - assert_eq!(ipa("フキアゲ"), "ɸɯkʲiagɛ"); + assert_eq!(ipa("フキアゲ"), "ɸɯkʲiage"); } #[test] fn test_fuse() { - assert_eq!(ipa("フセ"), "ɸɯsɛ"); + assert_eq!(ipa("フセ"), "ɸɯse"); } #[test] fn test_inagekaigan() { // ン at word end → ɴ - assert_eq!(ipa("イナゲカイガン"), "inagɛkaigaɴ"); + assert_eq!(ipa("イナゲカイガン"), "inagekaigaɴ"); } #[test] fn test_inage() { - assert_eq!(ipa("イナゲ"), "inagɛ"); + assert_eq!(ipa("イナゲ"), "inage"); } #[test] fn test_kire_uriwari() { - assert_eq!(ipa("キレウリワリ"), "kʲiɾɛɯɾiwaɾi"); + assert_eq!(ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi"); } #[test] @@ -526,41 +533,41 @@ mod tests { #[test] fn test_mejiro() { - assert_eq!(ipa("メジロ"), "mɛʤiɾo"); + assert_eq!(ipa("メジロ"), "meʤiɾo"); } #[test] fn test_isesaki() { - assert_eq!(ipa("イセサキ"), "isɛsakʲi"); + assert_eq!(ipa("イセサキ"), "isesakʲi"); } #[test] fn test_ube() { - assert_eq!(ipa("ウベ"), "ɯbɛ"); + assert_eq!(ipa("ウベ"), "ɯbe"); } #[test] fn test_itchome() { // ッチョウ → tt͡ɕoː - assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːmɛ"); + assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːme"); } #[test] fn test_sanchome() { - assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːmɛ"); + assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːme"); } #[test] fn test_koen() { // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct - assert_eq!(ipa("コウエン"), "koːɛɴ"); + assert_eq!(ipa("コウエン"), "koːeɴ"); } #[test] fn test_long_vowel_mark() { // ー explicitly lengthens - assert_eq!(ipa("ラーメン"), "ɾaːmɛɴ"); + assert_eq!(ipa("ラーメン"), "ɾaːmeɴ"); } #[test] @@ -626,7 +633,7 @@ mod tests { // Full-width space between words should be preserved assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoːdaigakɯmaɛ soːkamat͡sɯbaɾa" + "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa" ); } @@ -635,7 +642,7 @@ mod tests { // Half-width (ASCII) space between words should also be accepted assert_eq!( ipa("ドッキョウダイガクマエ ソウカマツバラ"), - "dokkʲoːdaigakɯmaɛ soːkamat͡sɯbaɾa" + "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa" ); } From 2d1a027a0a5018f8cfba5a5abb8402093231d76a Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 12:46:29 +0000 Subject: [PATCH 2/4] =?UTF-8?q?fix:=20update=20line=20DTO=20test=20expecta?= =?UTF-8?q?tions=20to=20use=20[e]=20instead=20of=20[=C9=9B]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update the new name_ipa tests added in dev to match the ɛ→e vowel change. https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN --- stationapi/src/use_case/dto/line.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs index cf090db3..6bc0f996 100644 --- a/stationapi/src/use_case/dto/line.rs +++ b/stationapi/src/use_case/dto/line.rs @@ -384,7 +384,7 @@ mod tests { line.line_name_k = "セイブイケブクロセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("sɛibɯikɛbɯkɯɾo laɪn".to_string())); + assert_eq!(grpc_line.name_ipa, Some("seːbɯikebɯkɯɾo laɪn".to_string())); } #[test] @@ -404,6 +404,6 @@ mod tests { line.line_name_k = "トウホクシンカンセン".to_string(); let grpc_line: GrpcLine = line.into(); - assert_eq!(grpc_line.name_ipa, Some("toːhokɯɕiŋkansɛɴ".to_string())); + assert_eq!(grpc_line.name_ipa, Some("toːhokɯɕiŋkanseɴ".to_string())); } } From fec36decebd49e06034078b2f4f236fe5a27c8be Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 12:55:24 +0000 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20prevent=20duplicate=20=CB=90=20in=20?= =?UTF-8?q?ei=E2=86=92e=CB=90=20vowel=20contraction?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When input like "eiː" is processed by apply_vowel_length, the ei→eː rule already appends ː, then the trailing ː would be pushed again. Skip a following long-vowel mark after the contraction to avoid "eːː". https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN --- stationapi/src/domain/ipa.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 19a6e23c..2084c579 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -384,6 +384,10 @@ fn apply_vowel_length(input: &str) -> String { result.push('e'); result.push('ː'); i += 2; + // Skip a following long-vowel mark to avoid duplicate 'ː' + if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { + i += 1; + } continue; } result.push(chars[i]); @@ -495,6 +499,12 @@ mod tests { assert_eq!(ipa("セイブ"), "seːbɯ"); } + #[test] + fn test_ei_long_vowel_no_duplicate() { + // セイー should not produce "seːː" + assert_eq!(ipa("セイー"), "seː"); + } + #[test] fn test_toride() { assert_eq!(ipa("トリデ"), "toɾide"); From 4b073c1c191f64c4e05dc5a22f0b209dee63cc29 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 6 Mar 2026 13:01:38 +0000 Subject: [PATCH 4/4] =?UTF-8?q?fix:=20prevent=20duplicate=20=CB=90=20in=20?= =?UTF-8?q?o=C9=AF=E2=86=92o=CB=90=20and=20oo=E2=86=92o=CB=90=20vowel=20co?= =?UTF-8?q?ntractions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Same issue as the ei→eː branch: if the input already has a trailing long-vowel mark after the digraph, the contraction would produce "oːː". Add the same skip-next-ː guard to both branches. https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN --- stationapi/src/domain/ipa.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs index 2084c579..d0c09934 100644 --- a/stationapi/src/domain/ipa.rs +++ b/stationapi/src/domain/ipa.rs @@ -370,6 +370,10 @@ fn apply_vowel_length(input: &str) -> String { result.push('o'); result.push('ː'); i += 2; + // Skip a following long-vowel mark to avoid duplicate 'ː' + if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { + i += 1; + } continue; } if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'o' { @@ -377,6 +381,10 @@ fn apply_vowel_length(input: &str) -> String { result.push('o'); result.push('ː'); i += 2; + // Skip a following long-vowel mark to avoid duplicate 'ː' + if i < len && (chars[i] == 'ː' || chars[i] == 'ー') { + i += 1; + } continue; } if i + 1 < len && chars[i] == 'e' && chars[i + 1] == 'i' { @@ -505,6 +513,18 @@ mod tests { assert_eq!(ipa("セイー"), "seː"); } + #[test] + fn test_ou_long_vowel_no_duplicate() { + // コウー should not produce "koːː" + assert_eq!(ipa("コウー"), "koː"); + } + + #[test] + fn test_oo_long_vowel_no_duplicate() { + // オオー should not produce "oːː" + assert_eq!(ipa("オオー"), "oː"); + } + #[test] fn test_toride() { assert_eq!(ipa("トリデ"), "toɾide");