diff --git a/scriptshifter/rest_api.py b/scriptshifter/rest_api.py index 197639f..fbba1e7 100644 --- a/scriptshifter/rest_api.py +++ b/scriptshifter/rest_api.py @@ -73,7 +73,10 @@ def handle_exception(e: ApiError): def index(): return render_template( "index.html", - languages=list_tables(), + languages=sorted( + list_tables().items(), + key=lambda k: k[1]["label"] + ), version_info=(GIT_TAG, GIT_COMMIT), feedback_form=SMTP_HOST is not None or FEEDBACK_PATH is not None) diff --git a/scriptshifter/tables/__init__.py b/scriptshifter/tables/__init__.py index d693c27..96d9818 100644 --- a/scriptshifter/tables/__init__.py +++ b/scriptshifter/tables/__init__.py @@ -65,6 +65,8 @@ logger = logging.getLogger(__name__) tbl_index = None # Module-level index of all scripts. +proc_aliases = set() # Set of alias tables already created. +aliases = {} # Map of language to alias. class Token(str): @@ -165,7 +167,9 @@ def init_db(): conn.executescript(fh.read()) # Populate tables. - global tbl_index + global tbl_index, proc_aliases, aliases + proc_aliases = set() + aliases = {} with open(path.join(path.dirname(TABLE_DIR), "index.yml")) as fh: tbl_index = load(fh, Loader=Loader) try: @@ -205,6 +209,10 @@ def populate_table(conn, tname, tdata): """ logger.info(f"Populating table: {tname}") + check_q = "SELECT id FROM tbl_language WHERE name = ?" + if conn.execute(check_q, (tname,)).fetchone(): + return + res = conn.execute( """INSERT INTO tbl_language ( name, label, marc_code, description @@ -217,6 +225,20 @@ def populate_table(conn, tname, tdata): tid = res.lastrowid data = load_table(tname) + if "alias_of" in data: + # If an alias, insert the alias ID. + ref_name = data["alias_of"] + logger.info(f"{tname} is an alias of {ref_name}.") + ref_data = conn.execute(check_q, (ref_name,)).fetchone() + # Check if the ref table has already been populated. + if not ref_data: + populate_table(conn, ref_name, tbl_index[ref_name]) + ref_data = conn.execute(check_q, (ref_name,)).fetchone() + ref_id = ref_data[0] + conn.execute( + "UPDATE tbl_language SET ref_id = ? WHERE id = ?", + (ref_id, tid)) + flags = 0 if "script_to_roman" in data: flags |= FEAT_S2R @@ -340,16 +362,20 @@ def load_table(tname): The table file is parsed into an in-memory configuration that contains the language & script metadata and parsing rules. """ + if "alias_of" in tbl_index.get(tname, {}): + conf_name = tbl_index[tname]["alias_of"] + aliases[tname] = conf_name - try: - fname = path.join(TABLE_DIR, tbl_index[tname]["conf"]) - except KeyError: - # If no `conf` key is provided, use the conventional table name + .yml. - fname = path.join(TABLE_DIR, tname + ".yml") + return {"alias_of": conf_name} + + else: + # If no `alias_of` key is provided, use the regular table name + .yml. + conf_name = tname + + fname = path.join(TABLE_DIR, conf_name + ".yml") if not access(fname, R_OK): raise ValueError( f"No transliteration table `{fname}` found for {tname}!") - with open(fname) as fh: tdata = load(fh, Loader=Loader) @@ -568,9 +594,13 @@ def get_language(lang): def get_lang_general(conn, lang): """ Language general attributes. """ + ref_q = "SELECT id, ref_id FROM tbl_language WHERE name = ?" + ref_data = conn.execute(ref_q, (lang,)).fetchone() + lang_id = ref_data[1] if ref_data[1] else ref_data[0] + lang_q = conn.execute( """SELECT id, name, label, features, marc_code, description - FROM tbl_language WHERE name = ?""", (lang,)) + FROM tbl_language WHERE id = ?""", (lang_id,)) lang_data = lang_q.fetchone() if not lang_data: @@ -579,7 +609,7 @@ def get_lang_general(conn, lang): return { "id": lang_data[0], "data": { - "name": lang_data[1], + "name": lang, "label": lang_data[2], "has_s2r": bool(lang_data[3] & FEAT_S2R), "has_r2s": bool(lang_data[3] & FEAT_R2S), @@ -591,6 +621,7 @@ def get_lang_general(conn, lang): def get_lang_normalize(conn, lang_id): + lang_id = _get_ref(conn, lang_id) qry = conn.execute( """SELECT src, dest FROM tbl_normalize WHERE lang_id = ?""", @@ -602,6 +633,7 @@ def get_lang_ignore(conn, lang_id): """ Ignore list as a tuple. """ + lang_id = _get_ref(conn, lang_id) qry = conn.execute( """SELECT rule, features FROM tbl_ignore WHERE lang_id = ?""", @@ -618,6 +650,7 @@ def get_lang_map(conn, lang_id, t_dir): Generator of tuples (source, destination). """ + lang_id = _get_ref(conn, lang_id) qry = conn.execute( """SELECT src, dest FROM tbl_trans_map WHERE lang_id = ? AND dir = ? @@ -630,6 +663,7 @@ def get_lang_map(conn, lang_id, t_dir): def get_lang_options(conn, lang_id): """ Language options as a tuple of dictionaries. """ + lang_id = _get_ref(conn, lang_id) qry = conn.execute( """SELECT name, label, description, dtype, options, default_v FROM tbl_option @@ -650,6 +684,7 @@ def get_lang_options(conn, lang_id): def get_lang_hooks(conn, lang_id, t_dir): + lang_id = _get_ref(conn, lang_id) """ Language hooks in sorting order. """ hooks = defaultdict(list) @@ -672,9 +707,18 @@ def get_lang_hooks(conn, lang_id, t_dir): def get_lang_dcap(conn, lang_id): + lang_id = _get_ref(conn, lang_id) qry = conn.execute( """SELECT rule FROM tbl_double_cap WHERE lang_id = ?""", (lang_id,)) return tuple(row[0] for row in qry) + + +def _get_ref(conn, lang_id): + ref_data = conn.execute( + """SELECT ref_id FROM tbl_language WHERE id = ?""", + (lang_id,)).fetchone() + + return ref_data[0] if ref_data and ref_data[0] else lang_id diff --git a/scriptshifter/tables/data/assamese.yml b/scriptshifter/tables/data/assamese.yml index 7f95279..60041ca 100644 --- a/scriptshifter/tables/data/assamese.yml +++ b/scriptshifter/tables/data/assamese.yml @@ -6,8 +6,7 @@ general: version: 1.0.0 date: 2025-11-30 parents: - - _ignore_base - - _bengali_base + - bengali roman_to_script: map: diff --git a/scriptshifter/tables/data/_bengali_base.yml b/scriptshifter/tables/data/bengali.yml similarity index 100% rename from scriptshifter/tables/data/_bengali_base.yml rename to scriptshifter/tables/data/bengali.yml diff --git a/scriptshifter/tables/data/bodo_bengali.yml b/scriptshifter/tables/data/bodo_bengali.yml index 7768d2b..8ba59f6 100644 --- a/scriptshifter/tables/data/bodo_bengali.yml +++ b/scriptshifter/tables/data/bodo_bengali.yml @@ -6,7 +6,7 @@ general: version: 1.0.0 date: 2026-01-08 parents: - - _bengali_base + - bengali roman_to_script: map: diff --git a/scriptshifter/tables/data/_gurmukhi_base.yml b/scriptshifter/tables/data/gurmukhi_generic.yml similarity index 99% rename from scriptshifter/tables/data/_gurmukhi_base.yml rename to scriptshifter/tables/data/gurmukhi_generic.yml index 6897ea2..a834d4c 100644 --- a/scriptshifter/tables/data/_gurmukhi_base.yml +++ b/scriptshifter/tables/data/gurmukhi_generic.yml @@ -1,6 +1,6 @@ --- general: - name: Gurmukhi base + name: Gurmukhi (generic) case_sensitive: false description: Bidirectional base mapping for the Gurmukhi script. version: 1.0.0 diff --git a/scriptshifter/tables/data/manipuri_bengali.yml b/scriptshifter/tables/data/manipuri_bengali.yml index 18a9d5e..59dd514 100644 --- a/scriptshifter/tables/data/manipuri_bengali.yml +++ b/scriptshifter/tables/data/manipuri_bengali.yml @@ -6,7 +6,7 @@ general: version: 0.0.0 date: 2025-12-23 parents: - - _bengali_base + - bengali roman_to_script: map: diff --git a/scriptshifter/tables/data/panjabi_gurmukhi.yml b/scriptshifter/tables/data/panjabi_gurmukhi.yml index e8a62ff..6c41854 100644 --- a/scriptshifter/tables/data/panjabi_gurmukhi.yml +++ b/scriptshifter/tables/data/panjabi_gurmukhi.yml @@ -6,7 +6,7 @@ general: version: 1.0.0 date: 2025-12-23 parents: - - _gurmukhi_base + - gurmukhi_generic roman_to_script: hooks: diff --git a/scriptshifter/tables/index.yml b/scriptshifter/tables/index.yml index 9c87dd2..df29230 100644 --- a/scriptshifter/tables/index.yml +++ b/scriptshifter/tables/index.yml @@ -23,7 +23,7 @@ altai_cyrillic: marc_code: alt name: Altai (Cyrillic) amharic: - conf: "ethiopic_generic.yml" + alias_of: ethiopic_generic marc_code: amh name: Amharic arabic: @@ -33,7 +33,7 @@ arabic: marc_code: ara name: Arabic argobba_ethiopic: - conf: "ethiopic_generic.yml" + alias_of: ethiopic_generic marc_code: amh name: Argobba (Ethiopic) armenian: @@ -46,14 +46,14 @@ avaric_cyrillic: marc_code: ava name: Avaric (Cyrillic) awadhi_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: awa name: Awadhi (Devanagari) azerbaijani_cyrillic: marc_code: aze name: Azerbaijani (Cyrillic) balkar_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: krc name: Balkar (Cyrillic) bashkir_cyrillic: @@ -63,11 +63,10 @@ belarusian: marc_code: bel name: Belarusian bengali: - conf: "_bengali_base.yml" marc_code: ben name: Bengali bihari_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: bih name: Bihari (Devanagari) bodo_bengali: @@ -77,7 +76,7 @@ bodo_devanagari: marc_code: sit name: Bodo (Devanagari) braj_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: bra name: Braj (Devanagari) bulgarian: @@ -117,7 +116,7 @@ cyrillic_generic: marc_code: mul name: Cyrillic (Generic) dargwa_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic name: Dargwa (Cyrillic) marc_code: dar devanagari_generic: @@ -133,7 +132,7 @@ dungan_cyrillic: marc_code: sit name: Dungan (Cyrillic) dzongkha_tibetan: - conf: "tibetan.yml" + alias_of: tibetan marc_code: dzo name: Dzongkha (Tibetan) ethiopic_generic: @@ -152,7 +151,7 @@ georgian: marc_code: geo name: Georgian gilyak_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: mis name: Gilyak (Cyrillic) glagolitic: @@ -168,22 +167,24 @@ gujarati: marc_code: guj name: Gujarati gurage_ethiopic: - conf: "ethiopic_generic.yml" + alias_of: ethiopic_generic marc_code: sem name: Gurage (Ethiopic) +gurmukhi_generic: + name: Gurmukhi (generic) hebrew: marc_code: heb name: Hebrew hindi: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: hin name: Hindi (Devanagari) ingush_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: inh name: Ingush (Cyrillic) inuit_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: ipk name: Inuit (Cyrillic) inuktitut: @@ -196,7 +197,7 @@ japanese_katakana: marc_code: jpn name: Japanese (Katakana) kabardian_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: kbd name: Kabardian (Cyrillic) kalmyk_cyrillic: @@ -209,7 +210,7 @@ kannada: marc_code: kan name: Kannada kara-kalpak_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: kaa name: Kara-Kalpak (Cyrillic) karachay-balkar_cyrillic: @@ -237,11 +238,11 @@ komi-permyak_cyrillic: marc_code: kom name: Komi-Permyak (Cyrillic) konkani_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: kok name: Konkani (Devanagari) konkani_kannada: - conf: "kannada.yml" + alias_of: kannada marc_code: kok name: Konkani (Kannada) korean_names: @@ -258,7 +259,7 @@ koryak_cyrillic: marc_code: mis name: Koryak (Cyrillic) kumyk_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: kum name: Kumyk (Cyrillic) kurdish_arabic: @@ -271,18 +272,18 @@ kyrgyz_cyrillic: marc_code: kir name: Kyrgyz (Cyrillic) lak_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: cau name: Lak (Cyrillic) lahnda_gurmukhi: - conf: "_gurmukhi_base.yml" + alias_of: gurmukhi_generic marc_code: lah name: Lahnda (Gurmukhi) lepcha: marc_code: sit name: Lepcha lezghian_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: lez name: Lezghian (Cyrillic) limbu: @@ -295,7 +296,7 @@ macedonian: marc_code: mac name: Macedonian maithili_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: mai name: Maithili (Devanagari) malayalam: @@ -317,7 +318,7 @@ marathi_devanagari: marc_code: mar name: Marathi (Devanagari) mari_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: chm name: Mari (Cyrillic) moldovan_cyrillic: @@ -336,26 +337,26 @@ mordvin_cyrillic: marc_code: fiu name: Mordvin (Cyrillic) nanai_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: tut name: Nanai (Cyrillic) nenets_cyrillic: marc_code: mis name: Nenets (Cyrillic) nepali_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: nep name: Nepali (Devanagari) newari_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: new name: Newari (Devanagari) nivkh_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: mis name: Nivkh (Cyrillic) nogai_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: nog name: Nogai (Cyrillic) odia: @@ -365,26 +366,26 @@ ossetic_cyrillic: marc_code: oss name: Ossetic (Cyrillic) pahari_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: him name: Pahari (Devanagari) pali_bengali: - conf: "_bengali_base.yml" + alias_of: bengali marc_code: pli name: Pali (Bengali) pali_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: pli name: Pali (Devanagari) pali_sinhala: - conf: "sinhala.yml" + alias_of: sinhala marc_code: pli name: Pali (Sinhala) panjabi_gurmukhi: marc_code: pan name: Panjabi (Gurmukhi) permyak_cyrillic: - conf: "komi-permyak_cyrillic.yml" + alias_of: komi-permyak_cyrillic marc_code: kom name: Permyak (Cyrillic) persian: @@ -397,11 +398,11 @@ pushto: marc_code: pus name: Pushto rajasthani_devanagari: - conf: "devanagari_generic.yml" + alias_of: devanagari_generic marc_code: raj name: Rajasthani (Devanagari) romani_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: rom name: Romani (Cyrillic) romanian_cyrillic: @@ -432,7 +433,7 @@ sindhi_arabic: marc_code: snd name: Sindhi (Arabic) sindhi_gurmukhi: - conf: "_gurmukhi_base.yml" + alias_of: gurmukhi_generic marc_code: snd name: Sindhi (Gurmukhi) sinhala: @@ -442,7 +443,7 @@ syriac_cyrillic: marc_code: syc name: Syriac (Cyrillic) tabasaran_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: cau name: Tabasaran (Cyrillic) tajik_cyrillic: @@ -458,7 +459,7 @@ tamil_extended: marc_code: tam name: Tamil (extended) tat_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: ira name: Tat (Cyrillic) tatar_cyrillic: @@ -480,22 +481,22 @@ tibetan_2015_r2r: marc_code: tib name: Tibetan (ñ,ṅ,ś,ź to ny,ng,sh,zh only) tigre_ethiopic: - conf: "ethiopic_generic.yml" + alias_of: ethiopic_generic marc_code: tig name: Tigre (Ethiopic) tigrinya_ethiopic: - conf: "ethiopic_generic.yml" + alias_of: ethiopic_generic marc_code: tir name: Tigrinya (Ethiopic) turkmen_cyrillic: marc_code: tuk name: Turkmen (Cyrillic) tuvinian_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: tyv name: Tuvinian (Cyrillic) udekhe_cyrillic: - conf: "cyrillic_generic.yml" + alias_of: cyrillic_generic marc_code: tut name: Udekhe (Cyrillic) udmurt_cyrillic: diff --git a/scriptshifter/tables/init.sql b/scriptshifter/tables/init.sql index a563d1c..1e72023 100644 --- a/scriptshifter/tables/init.sql +++ b/scriptshifter/tables/init.sql @@ -5,11 +5,14 @@ */ CREATE TABLE tbl_language ( id INTEGER PRIMARY KEY, + ref_id INTEGER NULL, name TEXT UNIQUE, label TEXT, marc_code TEXT, description TEXT, - features TINYINT DEFAULT 0 + features TINYINT DEFAULT 0, + + FOREIGN KEY (ref_id) REFERENCES tbl_language(id) ON DELETE RESTRICT ); /* diff --git a/scriptshifter/templates/index.html b/scriptshifter/templates/index.html index 042367b..4469d42 100644 --- a/scriptshifter/templates/index.html +++ b/scriptshifter/templates/index.html @@ -59,7 +59,7 @@