diff --git a/scriptshifter/rest_api.py b/scriptshifter/rest_api.py
index 197639f..fbba1e7 100644
--- a/scriptshifter/rest_api.py
+++ b/scriptshifter/rest_api.py
@@ -73,7 +73,10 @@ def handle_exception(e: ApiError):
def index():
return render_template(
"index.html",
- languages=list_tables(),
+ languages=sorted(
+ list_tables().items(),
+ key=lambda k: k[1]["label"]
+ ),
version_info=(GIT_TAG, GIT_COMMIT),
feedback_form=SMTP_HOST is not None or FEEDBACK_PATH is not None)
diff --git a/scriptshifter/tables/__init__.py b/scriptshifter/tables/__init__.py
index d693c27..96d9818 100644
--- a/scriptshifter/tables/__init__.py
+++ b/scriptshifter/tables/__init__.py
@@ -65,6 +65,8 @@
logger = logging.getLogger(__name__)
tbl_index = None # Module-level index of all scripts.
+proc_aliases = set() # Set of alias tables already created.
+aliases = {} # Map of language to alias.
class Token(str):
@@ -165,7 +167,9 @@ def init_db():
conn.executescript(fh.read())
# Populate tables.
- global tbl_index
+ global tbl_index, proc_aliases, aliases
+ proc_aliases = set()
+ aliases = {}
with open(path.join(path.dirname(TABLE_DIR), "index.yml")) as fh:
tbl_index = load(fh, Loader=Loader)
try:
@@ -205,6 +209,10 @@ def populate_table(conn, tname, tdata):
"""
logger.info(f"Populating table: {tname}")
+ check_q = "SELECT id FROM tbl_language WHERE name = ?"
+ if conn.execute(check_q, (tname,)).fetchone():
+ return
+
res = conn.execute(
"""INSERT INTO tbl_language (
name, label, marc_code, description
@@ -217,6 +225,20 @@ def populate_table(conn, tname, tdata):
tid = res.lastrowid
data = load_table(tname)
+ if "alias_of" in data:
+ # If an alias, insert the alias ID.
+ ref_name = data["alias_of"]
+ logger.info(f"{tname} is an alias of {ref_name}.")
+ ref_data = conn.execute(check_q, (ref_name,)).fetchone()
+ # Check if the ref table has already been populated.
+ if not ref_data:
+ populate_table(conn, ref_name, tbl_index[ref_name])
+ ref_data = conn.execute(check_q, (ref_name,)).fetchone()
+ ref_id = ref_data[0]
+ conn.execute(
+ "UPDATE tbl_language SET ref_id = ? WHERE id = ?",
+ (ref_id, tid))
+
flags = 0
if "script_to_roman" in data:
flags |= FEAT_S2R
@@ -340,16 +362,20 @@ def load_table(tname):
The table file is parsed into an in-memory configuration that contains
the language & script metadata and parsing rules.
"""
+ if "alias_of" in tbl_index.get(tname, {}):
+ conf_name = tbl_index[tname]["alias_of"]
+ aliases[tname] = conf_name
- try:
- fname = path.join(TABLE_DIR, tbl_index[tname]["conf"])
- except KeyError:
- # If no `conf` key is provided, use the conventional table name + .yml.
- fname = path.join(TABLE_DIR, tname + ".yml")
+ return {"alias_of": conf_name}
+
+ else:
+ # If no `alias_of` key is provided, use the regular table name + .yml.
+ conf_name = tname
+
+ fname = path.join(TABLE_DIR, conf_name + ".yml")
if not access(fname, R_OK):
raise ValueError(
f"No transliteration table `{fname}` found for {tname}!")
-
with open(fname) as fh:
tdata = load(fh, Loader=Loader)
@@ -568,9 +594,13 @@ def get_language(lang):
def get_lang_general(conn, lang):
""" Language general attributes. """
+ ref_q = "SELECT id, ref_id FROM tbl_language WHERE name = ?"
+ ref_data = conn.execute(ref_q, (lang,)).fetchone()
+ lang_id = ref_data[1] if ref_data[1] else ref_data[0]
+
lang_q = conn.execute(
"""SELECT id, name, label, features, marc_code, description
- FROM tbl_language WHERE name = ?""", (lang,))
+ FROM tbl_language WHERE id = ?""", (lang_id,))
lang_data = lang_q.fetchone()
if not lang_data:
@@ -579,7 +609,7 @@ def get_lang_general(conn, lang):
return {
"id": lang_data[0],
"data": {
- "name": lang_data[1],
+ "name": lang,
"label": lang_data[2],
"has_s2r": bool(lang_data[3] & FEAT_S2R),
"has_r2s": bool(lang_data[3] & FEAT_R2S),
@@ -591,6 +621,7 @@ def get_lang_general(conn, lang):
def get_lang_normalize(conn, lang_id):
+ lang_id = _get_ref(conn, lang_id)
qry = conn.execute(
"""SELECT src, dest FROM tbl_normalize
WHERE lang_id = ?""",
@@ -602,6 +633,7 @@ def get_lang_ignore(conn, lang_id):
"""
Ignore list as a tuple.
"""
+ lang_id = _get_ref(conn, lang_id)
qry = conn.execute(
"""SELECT rule, features FROM tbl_ignore
WHERE lang_id = ?""",
@@ -618,6 +650,7 @@ def get_lang_map(conn, lang_id, t_dir):
Generator of tuples (source, destination).
"""
+ lang_id = _get_ref(conn, lang_id)
qry = conn.execute(
"""SELECT src, dest FROM tbl_trans_map
WHERE lang_id = ? AND dir = ?
@@ -630,6 +663,7 @@ def get_lang_map(conn, lang_id, t_dir):
def get_lang_options(conn, lang_id):
""" Language options as a tuple of dictionaries. """
+ lang_id = _get_ref(conn, lang_id)
qry = conn.execute(
"""SELECT name, label, description, dtype, options, default_v
FROM tbl_option
@@ -650,6 +684,7 @@ def get_lang_options(conn, lang_id):
def get_lang_hooks(conn, lang_id, t_dir):
+ lang_id = _get_ref(conn, lang_id)
""" Language hooks in sorting order. """
hooks = defaultdict(list)
@@ -672,9 +707,18 @@ def get_lang_hooks(conn, lang_id, t_dir):
def get_lang_dcap(conn, lang_id):
+ lang_id = _get_ref(conn, lang_id)
qry = conn.execute(
"""SELECT rule
FROM tbl_double_cap WHERE lang_id = ?""",
(lang_id,))
return tuple(row[0] for row in qry)
+
+
+def _get_ref(conn, lang_id):
+ ref_data = conn.execute(
+ """SELECT ref_id FROM tbl_language WHERE id = ?""",
+ (lang_id,)).fetchone()
+
+ return ref_data[0] if ref_data and ref_data[0] else lang_id
diff --git a/scriptshifter/tables/data/assamese.yml b/scriptshifter/tables/data/assamese.yml
index 7f95279..60041ca 100644
--- a/scriptshifter/tables/data/assamese.yml
+++ b/scriptshifter/tables/data/assamese.yml
@@ -6,8 +6,7 @@ general:
version: 1.0.0
date: 2025-11-30
parents:
- - _ignore_base
- - _bengali_base
+ - bengali
roman_to_script:
map:
diff --git a/scriptshifter/tables/data/_bengali_base.yml b/scriptshifter/tables/data/bengali.yml
similarity index 100%
rename from scriptshifter/tables/data/_bengali_base.yml
rename to scriptshifter/tables/data/bengali.yml
diff --git a/scriptshifter/tables/data/bodo_bengali.yml b/scriptshifter/tables/data/bodo_bengali.yml
index 7768d2b..8ba59f6 100644
--- a/scriptshifter/tables/data/bodo_bengali.yml
+++ b/scriptshifter/tables/data/bodo_bengali.yml
@@ -6,7 +6,7 @@ general:
version: 1.0.0
date: 2026-01-08
parents:
- - _bengali_base
+ - bengali
roman_to_script:
map:
diff --git a/scriptshifter/tables/data/_gurmukhi_base.yml b/scriptshifter/tables/data/gurmukhi_generic.yml
similarity index 99%
rename from scriptshifter/tables/data/_gurmukhi_base.yml
rename to scriptshifter/tables/data/gurmukhi_generic.yml
index 6897ea2..a834d4c 100644
--- a/scriptshifter/tables/data/_gurmukhi_base.yml
+++ b/scriptshifter/tables/data/gurmukhi_generic.yml
@@ -1,6 +1,6 @@
---
general:
- name: Gurmukhi base
+ name: Gurmukhi (generic)
case_sensitive: false
description: Bidirectional base mapping for the Gurmukhi script.
version: 1.0.0
diff --git a/scriptshifter/tables/data/manipuri_bengali.yml b/scriptshifter/tables/data/manipuri_bengali.yml
index 18a9d5e..59dd514 100644
--- a/scriptshifter/tables/data/manipuri_bengali.yml
+++ b/scriptshifter/tables/data/manipuri_bengali.yml
@@ -6,7 +6,7 @@ general:
version: 0.0.0
date: 2025-12-23
parents:
- - _bengali_base
+ - bengali
roman_to_script:
map:
diff --git a/scriptshifter/tables/data/panjabi_gurmukhi.yml b/scriptshifter/tables/data/panjabi_gurmukhi.yml
index e8a62ff..6c41854 100644
--- a/scriptshifter/tables/data/panjabi_gurmukhi.yml
+++ b/scriptshifter/tables/data/panjabi_gurmukhi.yml
@@ -6,7 +6,7 @@ general:
version: 1.0.0
date: 2025-12-23
parents:
- - _gurmukhi_base
+ - gurmukhi_generic
roman_to_script:
hooks:
diff --git a/scriptshifter/tables/index.yml b/scriptshifter/tables/index.yml
index 9c87dd2..df29230 100644
--- a/scriptshifter/tables/index.yml
+++ b/scriptshifter/tables/index.yml
@@ -23,7 +23,7 @@ altai_cyrillic:
marc_code: alt
name: Altai (Cyrillic)
amharic:
- conf: "ethiopic_generic.yml"
+ alias_of: ethiopic_generic
marc_code: amh
name: Amharic
arabic:
@@ -33,7 +33,7 @@ arabic:
marc_code: ara
name: Arabic
argobba_ethiopic:
- conf: "ethiopic_generic.yml"
+ alias_of: ethiopic_generic
marc_code: amh
name: Argobba (Ethiopic)
armenian:
@@ -46,14 +46,14 @@ avaric_cyrillic:
marc_code: ava
name: Avaric (Cyrillic)
awadhi_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: awa
name: Awadhi (Devanagari)
azerbaijani_cyrillic:
marc_code: aze
name: Azerbaijani (Cyrillic)
balkar_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: krc
name: Balkar (Cyrillic)
bashkir_cyrillic:
@@ -63,11 +63,10 @@ belarusian:
marc_code: bel
name: Belarusian
bengali:
- conf: "_bengali_base.yml"
marc_code: ben
name: Bengali
bihari_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: bih
name: Bihari (Devanagari)
bodo_bengali:
@@ -77,7 +76,7 @@ bodo_devanagari:
marc_code: sit
name: Bodo (Devanagari)
braj_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: bra
name: Braj (Devanagari)
bulgarian:
@@ -117,7 +116,7 @@ cyrillic_generic:
marc_code: mul
name: Cyrillic (Generic)
dargwa_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
name: Dargwa (Cyrillic)
marc_code: dar
devanagari_generic:
@@ -133,7 +132,7 @@ dungan_cyrillic:
marc_code: sit
name: Dungan (Cyrillic)
dzongkha_tibetan:
- conf: "tibetan.yml"
+ alias_of: tibetan
marc_code: dzo
name: Dzongkha (Tibetan)
ethiopic_generic:
@@ -152,7 +151,7 @@ georgian:
marc_code: geo
name: Georgian
gilyak_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: mis
name: Gilyak (Cyrillic)
glagolitic:
@@ -168,22 +167,24 @@ gujarati:
marc_code: guj
name: Gujarati
gurage_ethiopic:
- conf: "ethiopic_generic.yml"
+ alias_of: ethiopic_generic
marc_code: sem
name: Gurage (Ethiopic)
+gurmukhi_generic:
+ name: Gurmukhi (generic)
hebrew:
marc_code: heb
name: Hebrew
hindi:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: hin
name: Hindi (Devanagari)
ingush_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: inh
name: Ingush (Cyrillic)
inuit_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: ipk
name: Inuit (Cyrillic)
inuktitut:
@@ -196,7 +197,7 @@ japanese_katakana:
marc_code: jpn
name: Japanese (Katakana)
kabardian_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: kbd
name: Kabardian (Cyrillic)
kalmyk_cyrillic:
@@ -209,7 +210,7 @@ kannada:
marc_code: kan
name: Kannada
kara-kalpak_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: kaa
name: Kara-Kalpak (Cyrillic)
karachay-balkar_cyrillic:
@@ -237,11 +238,11 @@ komi-permyak_cyrillic:
marc_code: kom
name: Komi-Permyak (Cyrillic)
konkani_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: kok
name: Konkani (Devanagari)
konkani_kannada:
- conf: "kannada.yml"
+ alias_of: kannada
marc_code: kok
name: Konkani (Kannada)
korean_names:
@@ -258,7 +259,7 @@ koryak_cyrillic:
marc_code: mis
name: Koryak (Cyrillic)
kumyk_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: kum
name: Kumyk (Cyrillic)
kurdish_arabic:
@@ -271,18 +272,18 @@ kyrgyz_cyrillic:
marc_code: kir
name: Kyrgyz (Cyrillic)
lak_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: cau
name: Lak (Cyrillic)
lahnda_gurmukhi:
- conf: "_gurmukhi_base.yml"
+ alias_of: gurmukhi_generic
marc_code: lah
name: Lahnda (Gurmukhi)
lepcha:
marc_code: sit
name: Lepcha
lezghian_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: lez
name: Lezghian (Cyrillic)
limbu:
@@ -295,7 +296,7 @@ macedonian:
marc_code: mac
name: Macedonian
maithili_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: mai
name: Maithili (Devanagari)
malayalam:
@@ -317,7 +318,7 @@ marathi_devanagari:
marc_code: mar
name: Marathi (Devanagari)
mari_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: chm
name: Mari (Cyrillic)
moldovan_cyrillic:
@@ -336,26 +337,26 @@ mordvin_cyrillic:
marc_code: fiu
name: Mordvin (Cyrillic)
nanai_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: tut
name: Nanai (Cyrillic)
nenets_cyrillic:
marc_code: mis
name: Nenets (Cyrillic)
nepali_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: nep
name: Nepali (Devanagari)
newari_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: new
name: Newari (Devanagari)
nivkh_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: mis
name: Nivkh (Cyrillic)
nogai_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: nog
name: Nogai (Cyrillic)
odia:
@@ -365,26 +366,26 @@ ossetic_cyrillic:
marc_code: oss
name: Ossetic (Cyrillic)
pahari_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: him
name: Pahari (Devanagari)
pali_bengali:
- conf: "_bengali_base.yml"
+ alias_of: bengali
marc_code: pli
name: Pali (Bengali)
pali_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: pli
name: Pali (Devanagari)
pali_sinhala:
- conf: "sinhala.yml"
+ alias_of: sinhala
marc_code: pli
name: Pali (Sinhala)
panjabi_gurmukhi:
marc_code: pan
name: Panjabi (Gurmukhi)
permyak_cyrillic:
- conf: "komi-permyak_cyrillic.yml"
+ alias_of: komi-permyak_cyrillic
marc_code: kom
name: Permyak (Cyrillic)
persian:
@@ -397,11 +398,11 @@ pushto:
marc_code: pus
name: Pushto
rajasthani_devanagari:
- conf: "devanagari_generic.yml"
+ alias_of: devanagari_generic
marc_code: raj
name: Rajasthani (Devanagari)
romani_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: rom
name: Romani (Cyrillic)
romanian_cyrillic:
@@ -432,7 +433,7 @@ sindhi_arabic:
marc_code: snd
name: Sindhi (Arabic)
sindhi_gurmukhi:
- conf: "_gurmukhi_base.yml"
+ alias_of: gurmukhi_generic
marc_code: snd
name: Sindhi (Gurmukhi)
sinhala:
@@ -442,7 +443,7 @@ syriac_cyrillic:
marc_code: syc
name: Syriac (Cyrillic)
tabasaran_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: cau
name: Tabasaran (Cyrillic)
tajik_cyrillic:
@@ -458,7 +459,7 @@ tamil_extended:
marc_code: tam
name: Tamil (extended)
tat_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: ira
name: Tat (Cyrillic)
tatar_cyrillic:
@@ -480,22 +481,22 @@ tibetan_2015_r2r:
marc_code: tib
name: Tibetan (ñ,ṅ,ś,ź to ny,ng,sh,zh only)
tigre_ethiopic:
- conf: "ethiopic_generic.yml"
+ alias_of: ethiopic_generic
marc_code: tig
name: Tigre (Ethiopic)
tigrinya_ethiopic:
- conf: "ethiopic_generic.yml"
+ alias_of: ethiopic_generic
marc_code: tir
name: Tigrinya (Ethiopic)
turkmen_cyrillic:
marc_code: tuk
name: Turkmen (Cyrillic)
tuvinian_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: tyv
name: Tuvinian (Cyrillic)
udekhe_cyrillic:
- conf: "cyrillic_generic.yml"
+ alias_of: cyrillic_generic
marc_code: tut
name: Udekhe (Cyrillic)
udmurt_cyrillic:
diff --git a/scriptshifter/tables/init.sql b/scriptshifter/tables/init.sql
index a563d1c..1e72023 100644
--- a/scriptshifter/tables/init.sql
+++ b/scriptshifter/tables/init.sql
@@ -5,11 +5,14 @@
*/
CREATE TABLE tbl_language (
id INTEGER PRIMARY KEY,
+ ref_id INTEGER NULL,
name TEXT UNIQUE,
label TEXT,
marc_code TEXT,
description TEXT,
- features TINYINT DEFAULT 0
+ features TINYINT DEFAULT 0,
+
+ FOREIGN KEY (ref_id) REFERENCES tbl_language(id) ON DELETE RESTRICT
);
/*
diff --git a/scriptshifter/templates/index.html b/scriptshifter/templates/index.html
index 042367b..4469d42 100644
--- a/scriptshifter/templates/index.html
+++ b/scriptshifter/templates/index.html
@@ -59,7 +59,7 @@