TrainLCD · TinyKitten · Mar 7, 2026 · Mar 6, 2026 · Mar 6, 2026 · Mar 6, 2026
@@ -344,67 +344,36 @@ fn apply_phonological_rules(phonemes: &[Phoneme]) -> String {
         }
     }
 
-    // Apply long vowel contractions: オウ → oː pattern
-    apply_vowel_length(&output)
+    insert_syllable_breaks(&output)
 }
 
-/// Find the IPA string of the next Regular phoneme in the slice.
-fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> {
-    phonemes.iter().find_map(|p| match p {
-        Phoneme::Regular(ipa) => Some(*ipa),
-        _ => None,
-    })
-}
-
-/// Apply vowel length rules for common Japanese patterns.
-/// オウ → oː (after consonant+o), ョウ/ョオ patterns are handled by digraph + this.
-fn apply_vowel_length(input: &str) -> String {
+/// Insert IPA syllable boundary markers (`.`) between consecutive vowels.
+/// This prevents Google TTS from interpreting cross-mora vowel sequences
+/// (e.g. `ei` in セイ) as English diphthongs (e.g. /eɪ/ → "ai").
+fn insert_syllable_breaks(input: &str) -> String {
     let mut result = String::with_capacity(input.len());
-    let chars: Vec<char> = input.chars().collect();
-    let len = chars.len();
-    let mut i = 0;
+    let mut prev_is_vowel = false;
 
-    while i < len {
-        if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'ɯ' {
-            // oɯ → oː (おう/こう pattern)
-            result.push('o');
-            result.push('ː');
-            i += 2;
-            // Skip a following long-vowel mark to avoid duplicate 'ː'
-            if i < len && (chars[i] == 'ː' || chars[i] == 'ー') {
-                i += 1;
-            }
-            continue;
-        }
-        if i + 1 < len && chars[i] == 'o' && chars[i + 1] == 'o' {
-            // oo → oː (おお pattern)
-            result.push('o');
-            result.push('ː');
-            i += 2;
-            // Skip a following long-vowel mark to avoid duplicate 'ː'
-            if i < len && (chars[i] == 'ː' || chars[i] == 'ー') {
-                i += 1;
-            }
-            continue;
-        }
-        if i + 1 < len && chars[i] == 'e' && chars[i + 1] == 'i' {
-            // ei → eː (えい/けい pattern — 京成 keisei → keːseː)
-            result.push('e');
-            result.push('ː');
-            i += 2;
-            // Skip a following long-vowel mark to avoid duplicate 'ː'
-            if i < len && (chars[i] == 'ː' || chars[i] == 'ー') {
-                i += 1;
-            }
-            continue;
+    for c in input.chars() {
+        let is_vowel = "aiɯeou".contains(c);
+        if is_vowel && prev_is_vowel {
+            result.push('.');
         }
-        result.push(chars[i]);
-        i += 1;
+        result.push(c);
+        prev_is_vowel = is_vowel;
     }
 
     result
 }
 
+/// Find the IPA string of the next Regular phoneme in the slice.
+fn find_next_regular(phonemes: &[Phoneme]) -> Option<&'static str> {
+    phonemes.iter().find_map(|p| match p {
+        Phoneme::Regular(ipa) => Some(*ipa),
+        _ => None,
+    })
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -428,7 +397,7 @@ mod tests {
 
     #[test]
     fn test_ueno() {
-        assert_eq!(ipa("ウエノ"), "ɯeno");
+        assert_eq!(ipa("ウエノ"), "ɯ.eno");
     }
 
     #[test]
@@ -444,14 +413,12 @@ mod tests {
 
     #[test]
     fn test_osaka() {
-        // オオ → oː
-        assert_eq!(ipa("オオサカ"), "oːsaka");
+        assert_eq!(ipa("オオサカ"), "o.osaka");
     }
 
     #[test]
     fn test_kyoto() {
-        // キョウ → kʲoː (via kʲo + ウ → oɯ → oː)
-        assert_eq!(ipa("キョウト"), "kʲoːto");
+        assert_eq!(ipa("キョウト"), "kʲo.ɯto");
     }
 
     #[test]
@@ -476,8 +443,7 @@ mod tests {
 
     #[test]
     fn test_ryogoku() {
-        // リョウ → ɾʲoː (via ɾʲo + ウ → oɯ → oː)
-        assert_eq!(ipa("リョウゴク"), "ɾʲoːgokɯ");
+        assert_eq!(ipa("リョウゴク"), "ɾʲo.ɯgokɯ");
     }
 
     #[test]
@@ -488,41 +454,23 @@ mod tests {
 
     #[test]
     fn test_keisei() {
-        assert_eq!(ipa("ケイセイ"), "keːseː");
+        assert_eq!(ipa("ケイセイ"), "ke.ise.i");
     }
 
     #[test]
     fn test_oshiage() {
-        assert_eq!(ipa("オシアゲ"), "oɕiage");
+        assert_eq!(ipa("オシアゲ"), "oɕi.age");
     }
 
     #[test]
     fn test_meitetsu() {
         // ツ is consistently t͡sɯ (affricate with tie bar)
-        assert_eq!(ipa("メイテツ"), "meːtet͡sɯ");
+        assert_eq!(ipa("メイテツ"), "me.itet͡sɯ");
     }
 
     #[test]
     fn test_seibu() {
-        assert_eq!(ipa("セイブ"), "seːbɯ");
-    }
-
-    #[test]
-    fn test_ei_long_vowel_no_duplicate() {
-        // セイー should not produce "seːː"
-        assert_eq!(ipa("セイー"), "seː");
-    }
-
-    #[test]
-    fn test_ou_long_vowel_no_duplicate() {
-        // コウー should not produce "koːː"
-        assert_eq!(ipa("コウー"), "koː");
-    }
-
-    #[test]
-    fn test_oo_long_vowel_no_duplicate() {
-        // オオー should not produce "oːː"
-        assert_eq!(ipa("オオー"), "oː");
+        assert_eq!(ipa("セイブ"), "se.ibɯ");
     }
 
     #[test]
@@ -532,7 +480,7 @@ mod tests {
 
     #[test]
     fn test_fukiage() {
-        assert_eq!(ipa("フキアゲ"), "ɸɯkʲiage");
+        assert_eq!(ipa("フキアゲ"), "ɸɯkʲi.age");
     }
 
     #[test]
@@ -543,7 +491,7 @@ mod tests {
     #[test]
     fn test_inagekaigan() {
         // ン at word end → ɴ
-        assert_eq!(ipa("イナゲカイガン"), "inagekaigaɴ");
+        assert_eq!(ipa("イナゲカイガン"), "inageka.igaɴ");
     }
 
     #[test]
@@ -553,12 +501,12 @@ mod tests {
 
     #[test]
     fn test_kire_uriwari() {
-        assert_eq!(ipa("キレウリワリ"), "kʲiɾeɯɾiwaɾi");
+        assert_eq!(ipa("キレウリワリ"), "kʲiɾe.ɯɾiwaɾi");
     }
 
     #[test]
     fn test_yao() {
-        assert_eq!(ipa("ヤオ"), "jao");
+        assert_eq!(ipa("ヤオ"), "ja.o");
     }
 
     #[test]
@@ -578,32 +526,22 @@ mod tests {
 
     #[test]
     fn test_itchome() {
-        // ッチョウ → tt͡ɕoː
-        assert_eq!(ipa("イッチョウメ"), "itt͡ɕoːme");
+        assert_eq!(ipa("イッチョウメ"), "itt͡ɕo.ɯme");
     }
 
     #[test]
     fn test_sanchome() {
-        assert_eq!(ipa("サンチョウメ"), "sant͡ɕoːme");
+        assert_eq!(ipa("サンチョウメ"), "sant͡ɕo.ɯme");
     }
 
     #[test]
     fn test_koen() {
-        // コウエン: コ=ko, ウ→長音化でoː, エン=eɴ → koːeɴ
-        // Note: the original hardcoded value was "koeɴ" but phonologically "koːeɴ" is correct
-        assert_eq!(ipa("コウエン"), "koːeɴ");
-    }
-
-    #[test]
-    fn test_long_vowel_mark() {
-        // ー explicitly lengthens
-        assert_eq!(ipa("ラーメン"), "ɾaːmeɴ");
+        assert_eq!(ipa("コウエン"), "ko.ɯ.eɴ");
     }
 
     #[test]
     fn test_tokyo() {
-        // トウキョウ: ト=to, ウ→oː, キョ=kʲo, ウ→oː
-        assert_eq!(ipa("トウキョウ"), "toːkʲoː");
+        assert_eq!(ipa("トウキョウ"), "to.ɯkʲo.ɯ");
     }
 
     #[test]
@@ -629,18 +567,6 @@ mod tests {
         assert_eq!(ipa("シンヨコハマ"), "ɕiɲjokohama");
     }
 
-    #[test]
-    fn test_geminate_ji() {
-        // ッジ → dʤi (voiced affricate gemination emits 'd')
-        assert_eq!(ipa("カッジ"), "kadʤi");
-    }
-
-    #[test]
-    fn test_geminate_ju() {
-        // ッジュ → ddʑɯ (voiced affricate gemination with digraph)
-        assert_eq!(ipa("カッジュ"), "kaddʑɯ");
-    }
-
     #[test]
     fn test_empty() {
         assert_eq!(katakana_to_ipa(""), Some(String::new()));
@@ -652,18 +578,12 @@ mod tests {
         assert_eq!(katakana_to_ipa("シブヤX"), None);
     }
 
-    #[test]
-    fn test_geminate_palatalized() {
-        // ッキョ → kkʲo (only the base consonant 'k' is geminated, not 'kʲ')
-        assert_eq!(ipa("ニッキョウ"), "ɲikkʲoː");
-    }
-
     #[test]
     fn test_dokkyo_daigakumae_soka_matsubara() {
         // Full-width space between words should be preserved
         assert_eq!(
             ipa("ドッキョウダイガクマエ　ソウカマツバラ"),
-            "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa"
+            "dokkʲo.ɯda.igakɯma.e so.ɯkamat͡sɯbaɾa"
         );
     }
 
@@ -672,7 +592,7 @@ mod tests {
         // Half-width (ASCII) space between words should also be accepted
         assert_eq!(
             ipa("ドッキョウダイガクマエ ソウカマツバラ"),
-            "dokkʲoːdaigakɯmae soːkamat͡sɯbaɾa"
+            "dokkʲo.ɯda.igakɯma.e so.ɯkamat͡sɯbaɾa"
         );
     }
 

@@ -384,7 +384,10 @@ mod tests {
         line.line_name_k = "セイブイケブクロセン".to_string();
         let grpc_line: GrpcLine = line.into();
 
-        assert_eq!(grpc_line.name_ipa, Some("seːbɯikebɯkɯɾo laɪn".to_string()));
+        assert_eq!(
+            grpc_line.name_ipa,
+            Some("se.ibɯ.ikebɯkɯɾo laɪn".to_string())
+        );
     }
 
     #[test]
@@ -394,7 +397,10 @@ mod tests {
         line.line_name_k = "トウカイドウホンセン".to_string();
         let grpc_line: GrpcLine = line.into();
 
-        assert_eq!(grpc_line.name_ipa, Some("toːkaidoː meɪn laɪn".to_string()));
+        assert_eq!(
+            grpc_line.name_ipa,
+            Some("to.ɯka.ido.ɯ meɪn laɪn".to_string())
+        );
     }
 
     #[test]
@@ -404,6 +410,6 @@ mod tests {
         line.line_name_k = "トウホクシンカンセン".to_string();
         let grpc_line: GrpcLine = line.into();
 
-        assert_eq!(grpc_line.name_ipa, Some("toːhokɯɕiŋkanseɴ".to_string()));
+        assert_eq!(grpc_line.name_ipa, Some("to.ɯhokɯɕiŋkanseɴ".to_string()));
     }
 }