keymanapp · jahorton · Apr 9, 2026 · mcdurdin · Apr 10, 2026 · jahorton
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/transition-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/transition-helpers.ts
@@ -9,7 +9,9 @@
 
 import { LexicalModelTypes } from '@keymanapp/common-types';
 
+import { ContextToken } from './context-token.js';
 import { ContextTokenization } from './context-tokenization.js';
+import { SearchQuotientCluster } from './search-quotient-cluster.js';
 import { legacySubsetKeyer, TokenizationSubset, TokenizationSubsetBuilder } from './tokenization-subsets.js';
 
 import Distribution = LexicalModelTypes.Distribution;
@@ -80,11 +82,9 @@ export function precomputeTransitions(
 /**
  * Given results from `precomputeTransitions`, this function generates the
  * context variations that should result from the current context variants and
- * input.  The one best matching the user's visible context will be set at index
- * 0.
+ * input.
  * @param precomputedTransitionSubsets
  * @param transformDistribution
- * @param keyMatchingUserContext
  * @returns
  */
 export function transitionTokenizations(
@@ -124,10 +124,61 @@ export function transitionTokenizations(
       return;
     }
 
-    // Of particular note - there may no longer be a specific, single set of edits
-    // for the transition; there will be different paths to reach a tokenization!
-    throw new Error("Multi-tokenization transitions not yet supported.");
+    finalTokenizations.set(key, mergeAlignedTokenizations(independentTransitionResults));
   });
 
   return finalTokenizations;
+}
+
+/**
+ * Given two or more instances of ContextTokenization, this function will
+ * attempt to merge each token's SearchQuotientNodes as necessary to result in a
+ * single instance.
+ *
+ * An error will be thrown if the instances do not sufficiently converge to the
+ * same tokenization pattern.
 if(path.inputCount != inputCount || path.codepointLength != codepointLength) { 
   throw new Error(`SearchQuotientNode does not share same properties as others in the cluster:  inputCount ${path.inputCount} vs ${inputCount}, codepointLength ${path.codepointLength} vs ${codepointLength}`); 
 } 
 // If there's a source-range key mismatch - via mismatch in count or in actual ID, we have an error. 
 if(path.sourceRangeKey != sourceRangeKey) { 
   throw new Error(`SearchQuotientNode does not share the same source identifiers as others in the cluster`); 
 } 
 if(path.inputCount != inputCount || path.codepointLength != codepointLength) { 
   throw new Error(`SearchQuotientNode does not share same properties as others in the cluster:  inputCount ${path.inputCount} vs ${inputCount}, codepointLength ${path.codepointLength} vs ${codepointLength}`); 
 } 
  
 // If there's a source-range key mismatch - via mismatch in count or in actual ID, we have an error. 
 if(path.sourceRangeKey != sourceRangeKey) { 
   throw new Error(`SearchQuotientNode does not share the same source identifiers as others in the cluster`); 
 } 
+ * @param tokenizations
+ * @returns
+ */
+export function mergeAlignedTokenizations(tokenizations: ContextTokenization[]): ContextTokenization {
+  const finalizedTokens: ContextToken[] = [];
+
+  // Iterate through the token indices as long as at least one tokenization
+  // remains with that index.
+  //
+  // Assumption:  all have at least one token.
+  for(
+    let i = 0;
+    tokenizations.length > 0;
+    // Two co-related iteration steps.  Could be expressed as just one with ++i in functor conditional.
+    i++, tokenizations = tokenizations.filter((tokenization) => tokenization.tokens.length > i)
+  ) {
+    const bucket: ContextToken[] = [];
+
+    tokenizations.forEach((tokenization) => {
+      // Check for matches already within the bucket.
+      const token = tokenization.tokens[i];
+
+      if(!bucket.find((t) => t.searchModule.isSameNode(token.searchModule))) {
+        bucket.push(token);
+      }
+    });
+
+    if(bucket.length == 1) {
+      finalizedTokens.push(bucket[0]);
+    } else {
+      const constituentSpurs = bucket.flatMap((token) => {
+        const quotientNode = token.searchModule;
+
+        if(quotientNode instanceof SearchQuotientCluster) {
+          return quotientNode.parents;
+        } else {
+          return quotientNode;
+        }
+      })
+      finalizedTokens.push(new ContextToken(new SearchQuotientCluster(constituentSpurs)));
+    }
+  }
+
+  return new ContextTokenization(finalizedTokens);
 }
diff --git a/...st/auto/headless/engine/predictive-text/worker-thread/context/transition-helpers.tests.ts b/...st/auto/headless/engine/predictive-text/worker-thread/context/transition-helpers.tests.ts
@@ -20,10 +20,12 @@ import {
   generateSubsetId,
   LegacyQuotientRoot,
   LegacyQuotientSpur,
+  mergeAlignedTokenizations,
   models,
   PathInputProperties,
   precomputationSubsetKeyer,
   precomputeTransitions,
+  SearchQuotientCluster,
   TokenizationSubset,
   TokenizationTransitionEdits,
   TransitionEdge,
@@ -276,21 +278,29 @@ function generateFixturesForTransitionSource() {
   );
 
   const distrib2Subsets: Map<string, TokenizationSubset> = new Map();
+  const distrib2Tokenizations: Map<string, ContextTokenization> = new Map();
   transitionSet2.flat().forEach((tuple) => {
-    const oldEntry = distrib2Subsets.get(tuple.key);
+    const key = tuple.key;
+    const oldEntry = distrib2Subsets.get(key);
     if(!oldEntry) {
-      distrib2Subsets.set(tuple.key, tuple.subset)
+      distrib2Subsets.set(key, tuple.subset)
+      distrib2Tokenizations.set(key, tuple.transitionedTokenization);
     } else {
+      // merge tokenizations
       const combinedSubset: TokenizationSubset = {
         key: oldEntry.key,
         transitionEdges: new Map(oldEntry.transitionEdges)
       };
+      const oldTokenization = distrib2Tokenizations.get(key);
 
       tuple.subset.transitionEdges.forEach((value, key) => {
         combinedSubset.transitionEdges.set(key, value);
       });
 
-      distrib2Subsets.set(tuple.key, combinedSubset);
+      const mergedTokenization = mergeAlignedTokenizations([oldTokenization, tuple.transitionedTokenization]);
+
+      distrib2Subsets.set(key, combinedSubset);
+      distrib2Tokenizations.set(key, mergedTokenization);
     }
   });
 
@@ -309,7 +319,7 @@ function generateFixturesForTransitionSource() {
         transitionSet: transitionSet2,
         subsets: distrib2Subsets,
         contextKey: transitionSet2[0][0].key,
-        tokenizations: null as Map<string, ContextTokenization>
+        tokenizations: distrib2Tokenizations
       }
     }
   }
@@ -461,12 +471,29 @@ describe('transitionTokenizations', () => {
     }
   });
 
-  // Issue:  we don't yet have converging tokenizations implemented.  That's kind of a prerequisite here.
-  it.skip('handles the "cafe" fixture\'s second input correctly', () => {
+  it('handles the "cafe" fixture\'s second input correctly', () => {
     const { cafe } = generateFixturesForTransitionSource();
 
     const resultTokenizationMap = transitionTokenizations(cafe.key2.subsets, cafe.key2.dist);
+    const resultTokenizations = [...resultTokenizationMap.values()];
+
+    // Both source tokenizations have one spur each that converges to the same target tokenization.
+    assert.equal(resultTokenizations.length, 3);
+
+    const simpleTransitionTokenizations = resultTokenizations.filter((t) => !(t.tail.searchModule instanceof SearchQuotientCluster));
+    const complexTransitionTokenizations = resultTokenizations.filter((t) => t.tail.searchModule instanceof SearchQuotientCluster);
 
-    assert.equal(resultTokenizationMap.size, 3);
+    assert.equal(simpleTransitionTokenizations.length, 2);
+    assert.equal(complexTransitionTokenizations.length, 1);
+
+
+    for(let key of resultTokenizationMap.keys()) {
+      const msg = `Invalid assumption for tokenization with key ${key}`;
+
+      const actual = resultTokenizationMap.get(key);
+      const expected = cafe.key2.tokenizations.get(key);
+
+      assertMatchingTokenization(actual, expected, msg);
+    }
   });
 });