From fa3d16de3487b99cc8d39b3d0fbe42beafd7a737 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 12:34:16 +0000
Subject: [PATCH 1/4] =?UTF-8?q?fix:=20use=20mid=20front=20vowel=20[e]=20in?=
 =?UTF-8?q?stead=20of=20[=C9=9B]=20in=20IPA=20to=20fix=20Google=20TTS=20pr?=
 =?UTF-8?q?onunciation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The open-mid front vowel ɛ was causing Google TTS to misinterpret
Japanese エ-row sounds. For example, セイブ (Seibu/西武) generated
IPA "sɛibɯ" which TTS read as "さいぶ" instead of "せいぶ".

Changes:
- Replace all ɛ mappings with e (mid front vowel) for エ-row kana
- Add ei → eː vowel contraction rule (parallel to existing oɯ → oː)
  so エイ sequences produce long [eː] as in natural Japanese speech

https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN
---
 stationapi/src/domain/ipa.rs | 81 ++++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 37 deletions(-)
diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs
index fa65402c..19a6e23c 100644
--- a/stationapi/src/domain/ipa.rs
+++ b/stationapi/src/domain/ipa.rs
@@ -120,43 +120,43 @@ fn lookup_single(c: char) -> Option<Phoneme> {
         'ア' | 'ァ' => return Some(Phoneme::Regular("a")),
         'イ' | 'ィ' => return Some(Phoneme::Regular("i")),
         'ウ' | 'ゥ' => return Some(Phoneme::Regular("ɯ")),
-        'エ' | 'ェ' => return Some(Phoneme::Regular("ɛ")),
+        'エ' | 'ェ' => return Some(Phoneme::Regular("e")),
         'オ' | 'ォ' => return Some(Phoneme::Regular("o")),
         // カ行
         'カ' => "ka",
         'キ' => "kʲi",
         'ク' => "kɯ",
-        'ケ' => "kɛ",
+        'ケ' => "ke",
         'コ' => "ko",
         // サ行
         'サ' => "sa",
         'シ' => "ɕi",
         'ス' => "sɯ",
-        'セ' => "sɛ",
+        'セ' => "se",
         'ソ' => "so",
         // タ行
         'タ' => "ta",
         'チ' => "t͡ɕi",
         'ツ' => "t͡sɯ",
-        'テ' => "tɛ",
+        'テ' => "te",
         'ト' => "to",
         // ナ行
         'ナ' => "na",
         'ニ' => "ɲi",
         'ヌ' => "nɯ",
-        'ネ' => "nɛ",
+        'ネ' => "ne",
         'ノ' => "no",
         // ハ行
         'ハ' => "ha",
         'ヒ' => "çi",
         'フ' => "ɸɯ",
-        'ヘ' => "hɛ",
+        'ヘ' => "he",
         'ホ' => "ho",
         // マ行
         'マ' => "ma",
         'ミ' => "mi",
         'ム' => "mɯ",
-        'メ' => "mɛ",
+        'メ' => "me",
         'モ' => "mo",
         // ヤ行
         'ヤ' | 'ャ' => "ja",
@@ -166,42 +166,42 @@ fn lookup_single(c: char) -> Option<Phoneme> {
         'ラ' => "ɾa",
         'リ' => "ɾi",
         'ル' => "ɾɯ",
-        'レ' => "ɾɛ",
+        'レ' => "ɾe",
         'ロ' => "ɾo",
         // ワ行
         'ワ' => "wa",
         'ヰ' => "i",
-        'ヱ' => "ɛ",
+        'ヱ' => "e",
         'ヲ' => "o",
         // ガ行
         'ガ' => "ga",
         'ギ' => "gi",
         'グ' => "gɯ",
-        'ゲ' => "gɛ",
+        'ゲ' => "ge",
         'ゴ' => "go",
         // ザ行
         'ザ' => "za",
         'ジ' => "ʤi",
         'ズ' => "zɯ",
-        'ゼ' => "zɛ",
+        'ゼ' => "ze",
         'ゾ' => "zo",
         // ダ行
         'ダ' => "da",
         'ヂ' => "dʑi",
         'ヅ' => "dzɯ",
-        'デ' => "dɛ",
+        'デ' => "de",
         'ド' => "do",
         // バ行
         'バ' => "ba",
         'ビ' => "bi",
         'ブ' => "bɯ",
-        'ベ' => "bɛ",
+        'ベ' => "be",
         'ボ' => "bo",
         // パ行
         'パ' => "pa",
         'ピ' => "pi",
         'プ' => "pɯ",
-        'ペ' => "pɛ",
+        'ペ' => "pe",
         'ポ' => "po",
         // 特殊
         'ン' => return Some(Phoneme::MoraicNasal),
@@ -229,7 +229,7 @@ fn split_onset(ipa: &str) -> (&str, &str) {
     // Find where the first vowel-like character starts
     let vowel_start = ipa
         .char_indices()
-        .find(|(_, c)| "aiɯɛeouəɐ".contains(*c))
+        .find(|(_, c)| "aiɯeouəɐ".contains(*c))
         .map(|(i, _)| i)
         .unwrap_or(ipa.len());
     ipa.split_at(vowel_start)
@@ -248,7 +248,7 @@ fn last_vowel(ipa: &str) -> Option<&'static str> {
             'a' => return Some("a"),
             'i' => return Some("i"),
             'ɯ' => return Some("ɯ"),
-            'ɛ' => return Some("ɛ"),
+            'e' => return Some("e"),
             'o' => return Some("o"),
             'u' => return Some("u"),
             _ => continue,
@@ -379,6 +379,13 @@ fn apply_vowel_length(input: &str) -> String {
             i += 2;
             continue;
         }
+        if i + 1 < len && chars[i] == 'e' && chars[i + 1] == 'i' {
+            // ei → eː (えい/けい pattern — 京成 keisei → keːseː)
+            result.push('e');
+            result.push('ː');
+            i += 2;
+            continue;
+        }
         result.push(chars[i]);
         i += 1;
     }
@@ -409,12 +416,12 @@ mod tests {
 
     #[test]
     fn test_ueno() {
-        assert_eq!(ipa("ウエノ"), "ɯɛno");
+        assert_eq!(ipa("ウエノ"), "ɯeno");
     }
 
     #[test]
     fn test_ikebukuro() {
-        assert_eq!(ipa("イケブクロ"), "ikɛbɯkɯɾo");
+        assert_eq!(ipa("イケブクロ"), "ikebɯkɯɾo");
     }
 
     #[test]
@@ -469,54 +476,54 @@ mod tests {
 
     #[test]
     fn test_keisei() {
-        assert_eq!(ipa("ケイセイ"), "kɛisɛi");
+        assert_eq!(ipa("ケイセイ"), "keːseː");
     }
 
     #[test]
     fn test_oshiage() {
-        assert_eq!(ipa("オシアゲ"), "oɕiagɛ");
+        assert_eq!(ipa("オシアゲ"), "oɕiage");
     }
 
     #[test]
     fn test_meitetsu() {
         // ツ is consistently t͡sɯ (affricate with tie bar)
-        assert_eq!(ipa("メイテツ"), "mɛitɛt͡sɯ");
+        assert_eq!(ipa("メイテツ"), "meːtet͡sɯ");
     }
 
     #[test]
     fn test_seibu() {
-        assert_eq!(ipa("セイブ"), "sɛibɯ");
+        assert_eq!(ipa("セイブ"), "seːbɯ");
     }
 
     #[test]
     fn test_toride() {
-        assert_eq!(ipa("トリデ"), "toɾidɛ");
+        assert_eq!(ipa("トリデ"), "toɾide");
     }
 
     #[test]
     fn test_fukiage() {
-        assert_eq!(ipa("フキアゲ"), "ɸɯkʲiagɛ");
+        assert_eq!(ipa("フキアゲ"), "ɸɯkʲiage");
     }
 
     #[test]
     fn test_fuse() {
-        assert_eq!(ipa("フセ"), "ɸɯsɛ");
+        assert_eq!(ipa("フセ"), "ɸɯse");
     }
 
     #[test]
     fn test_inagekaigan() {
         // ン at word end → ɴ
-        assert_eq!(ipa("イナゲカイガン"), "inagɛkaigaɴ");
+        assert_eq!(ipa("イナゲカイガン"), "inagekaigaɴ");
     }
 
     #[test]
     fn test_inage() {
-        assert_eq!(ipa("イナゲ"), "inagɛ");
+        assert_eq!(ipa("イナゲ"), "inage");
     }
 
     #[test]
     fn test_kire_uriwari() {
-        assert_eq!(ipa("キレウリワリ"), "kʲiɾɛɯɾiwaɾi");
+        assert_eq!(ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi");
     }
 
     #[test]
@@ -526,41 +533,41 @@ mod tests {
 
     #[test]
     fn test_mejiro() {
-        assert_eq!(ipa("メジロ"), "mɛʤiɾo");
+        assert_eq!(ipa("メジロ"), "meʤiɾo");
     }
 
     #[test]
     fn test_isesaki() {
-        assert_eq!(ipa("イセサキ"), "isɛsakʲi");
+        assert_eq!(ipa("イセサキ"), "isesakʲi");
     }
 
     #[test]
     fn test_ube() {
-        assert_eq!(ipa("ウベ"), "ɯbɛ");
+        assert_eq!(ipa("ウベ"), "ɯbe");
     }
 
     #[test]
     fn test_itchome() {
         // ッチョウ → tt͡ɕoː
-        assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːmɛ");
+        assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːme");
     }
 
     #[test]
     fn test_sanchome() {
-        assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːmɛ");
+        assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːme");
     }
 
     #[test]
     fn test_koen() {
         // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ
         // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct
-        assert_eq!(ipa("コウエン"), "koːɛɴ");
+        assert_eq!(ipa("コウエン"), "koːeɴ");
     }
 
     #[test]
     fn test_long_vowel_mark() {
         // ー explicitly lengthens
-        assert_eq!(ipa("ラーメン"), "ɾaːmɛɴ");
+        assert_eq!(ipa("ラーメン"), "ɾaːmeɴ");
     }
 
     #[test]
@@ -626,7 +633,7 @@ mod tests {
         // Full-width space between words should be preserved
         assert_eq!(
             ipa("ドッキョウダイガクマエ　ソウカマツバラ"),
-            "dokkʲoːdaigakɯmaɛ soːkamat͡sɯbaɾa"
+            "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa"
         );
     }
 
@@ -635,7 +642,7 @@ mod tests {
         // Half-width (ASCII) space between words should also be accepted
         assert_eq!(
             ipa("ドッキョウダイガクマエ ソウカマツバラ"),
-            "dokkʲoːdaigakɯmaɛ soːkamat͡sɯbaɾa"
+            "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa"
         );
     }
 

From 2d1a027a0a5018f8cfba5a5abb8402093231d76a Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 12:46:29 +0000
Subject: [PATCH 2/4] =?UTF-8?q?fix:=20update=20line=20DTO=20test=20expecta?=
 =?UTF-8?q?tions=20to=20use=20[e]=20instead=20of=20[=C9=9B]?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update the new name_ipa tests added in dev to match the ɛ→e vowel change.

https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN
---
 stationapi/src/use_case/dto/line.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stationapi/src/use_case/dto/line.rs b/stationapi/src/use_case/dto/line.rs
index cf090db3..6bc0f996 100644
--- a/stationapi/src/use_case/dto/line.rs
+++ b/stationapi/src/use_case/dto/line.rs
@@ -384,7 +384,7 @@ mod tests {
         line.line_name_k = "セイブイケブクロセン".to_string();
         let grpc_line: GrpcLine = line.into();
 
-        assert_eq!(grpc_line.name_ipa, Some("sɛibɯikɛbɯkɯɾo laɪn".to_string()));
+        assert_eq!(grpc_line.name_ipa, Some("seːbɯikebɯkɯɾo laɪn".to_string()));
     }
 
     #[test]
@@ -404,6 +404,6 @@ mod tests {
         line.line_name_k = "トウホクシンカンセン".to_string();
         let grpc_line: GrpcLine = line.into();
 
-        assert_eq!(grpc_line.name_ipa, Some("toːhokɯɕiŋkansɛɴ".to_string()));
+        assert_eq!(grpc_line.name_ipa, Some("toːhokɯɕiŋkanseɴ".to_string()));
     }
 }

From fec36decebd49e06034078b2f4f236fe5a27c8be Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 12:55:24 +0000
Subject: [PATCH 3/4] =?UTF-8?q?fix:=20prevent=20duplicate=20=CB=90=20in=20?=
 =?UTF-8?q?ei=E2=86=92e=CB=90=20vowel=20contraction?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When input like "eiː" is processed by apply_vowel_length, the ei→eː
rule already appends ː, then the trailing ː would be pushed again.
Skip a following long-vowel mark after the contraction to avoid "eːː".

https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN
---
 stationapi/src/domain/ipa.rs | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs
index 19a6e23c..2084c579 100644
--- a/stationapi/src/domain/ipa.rs
+++ b/stationapi/src/domain/ipa.rs
@@ -384,6 +384,10 @@ fn apply_vowel_length(input: &str) -> String {
             result.push('e');
             result.push('ː');
             i += 2;
+            // Skip a following long-vowel mark to avoid duplicate 'ː'
+            if i < len && (chars[i] == 'ː' || chars[i] == 'ー') {
+                i += 1;
+            }
             continue;
         }
         result.push(chars[i]);
@@ -495,6 +499,12 @@ mod tests {
         assert_eq!(ipa("セイブ"), "seːbɯ");
     }
 
+    #[test]
+    fn test_ei_long_vowel_no_duplicate() {
+        // セイー should not produce "seːː"
+        assert_eq!(ipa("セイー"), "seː");
+    }
+
     #[test]
     fn test_toride() {
         assert_eq!(ipa("トリデ"), "toɾide");

From 4b073c1c191f64c4e05dc5a22f0b209dee63cc29 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 6 Mar 2026 13:01:38 +0000
Subject: [PATCH 4/4] =?UTF-8?q?fix:=20prevent=20duplicate=20=CB=90=20in=20?=
 =?UTF-8?q?o=C9=AF=E2=86=92o=CB=90=20and=20oo=E2=86=92o=CB=90=20vowel=20co?=
 =?UTF-8?q?ntractions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Same issue as the ei→eː branch: if the input already has a trailing
long-vowel mark after the digraph, the contraction would produce "oːː".
Add the same skip-next-ː guard to both branches.

https://claude.ai/code/session_0199yco9qEy3UF4UoBegNEzN
---
 stationapi/src/domain/ipa.rs | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/stationapi/src/domain/ipa.rs b/stationapi/src/domain/ipa.rs
index 2084c579..d0c09934 100644
--- a/stationapi/src/domain/ipa.rs
+++ b/stationapi/src/domain/ipa.rs
@@ -370,6 +370,10 @@ fn apply_vowel_length(input: &str) -> String {
             result.push('o');
             result.push('ː');
             i += 2;
+            // Skip a following long-vowel mark to avoid duplicate 'ː'
+            if i < len && (chars[i] == 'ː' || chars[i] == 'ー') {
+                i += 1;
+            }
             continue;
         }
         if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'o' {
@@ -377,6 +381,10 @@ fn apply_vowel_length(input: &str) -> String {
             result.push('o');
             result.push('ː');
             i += 2;
+            // Skip a following long-vowel mark to avoid duplicate 'ː'
+            if i < len && (chars[i] == 'ː' || chars[i] == 'ー') {
+                i += 1;
+            }
             continue;
         }
         if i + 1 < len && chars[i] == 'e' && chars[i + 1] == 'i' {
@@ -505,6 +513,18 @@ mod tests {
         assert_eq!(ipa("セイー"), "seː");
     }
 
+    #[test]
+    fn test_ou_long_vowel_no_duplicate() {
+        // コウー should not produce "koːː"
+        assert_eq!(ipa("コウー"), "koː");
+    }
+
+    #[test]
+    fn test_oo_long_vowel_no_duplicate() {
+        // オオー should not produce "oːː"
+        assert_eq!(ipa("オオー"), "oː");
+    }
+
     #[test]
     fn test_toride() {
         assert_eq!(ipa("トリデ"), "toɾide");