From f8767f8544aa87e11d74c988bc1e8e3fd01edabe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micka=C3=ABl=20Schoentgen?= <contact@tiger-222.fr>
Date: Wed, 16 Oct 2024 16:03:24 +0200
Subject: [PATCH] [RU] Decode pronunciations

---
 wikidict/lang/ru/transcriptions.py | 1504 ++++++++++++++++++++++++++++
 1 file changed, 1504 insertions(+)
 create mode 100644 wikidict/lang/ru/transcriptions.py

diff --git a/wikidict/lang/ru/transcriptions.py b/wikidict/lang/ru/transcriptions.py
new file mode 100644
index 00000000..58b7bf87
--- /dev/null
+++ b/wikidict/lang/ru/transcriptions.py
@@ -0,0 +1,1504 @@
+"""
+Python conversion of the "ru-pron" module.
+
+Links:
+  - https://ru.wiktionary.org/wiki/Модуль:ru-pron
+
+Current version from 2020-07-21T09:24:00
+  - https://ru.wiktionary.org/w/index.php?title=Модуль:ru-pron&oldid=11489162
+"""
+
+import re
+import unicodedata
+from collections.abc import Callable
+
+
+# apply rsub() repeatedly until no change
+def sub_repeatedly(pattern: str, repl: str, text: str) -> str:
+    while True:
+        if (new_text := re.sub(pattern, repl, text)) == text:
+            return text
+        text = new_text
+
+
+def translate(text: str, translation: dict[str, str]) -> str:
+    return re.compile("|".join(map(re.escape, translation))).sub(lambda match: translation[match[0]], text)
+
+
+AC = "\u0301"  # acute =  ́
+GR = "\u0300"  # grave =  ̀
+CFLEX = "\u0302"  # circumflex =  ̂
+DUBGR = "\u030f"  # double grave =  ̏
+DOTABOVE = "\u0307"  # dot above =  ̇
+DOTBELOW = "\u0323"  # dot below =  ̣
+BREVE = "\u0306"  # breve  ̆
+DIA = "\u0308"  # diaeresis =  ̈
+CARON = "\u030c"  # caron  ̌
+TEMP_G = "\ufff1"  # substitute to preserve g from changing to v
+
+
+# any accent
+accent = f"{AC}{GR}{DIA}{BREVE}{CARON}"
+# regex for any optional accent(s)
+opt_accent = f"[{accent}]*"
+# any composed Cyrillic vowel with grave accent
+composed_grave_vowel = "ѐЀѝЍ"
+# any Cyrillic vowel except ёЁ
+vowel_no_jo = f"аеиоуяэыюіѣѵАЕИОУЯЭЫЮІѢѴ{composed_grave_vowel}"
+# any Cyrillic vowel, including ёЁ
+vowel = f"{vowel_no_jo}ёЁ"
+
+vow = "aeiouyɛəäạëöü"
+ipa_vow = f"{vow}ɐɪʊɨæɵʉ"
+vowels = f"[{vow}]"
+vowels_c = f"({vowels})"
+# No need to include DUBGR here because we rewrite it to CFLEX very early
+acc = f"{AC}{GR}{CFLEX}{DOTABOVE}{DOTBELOW}"
+accents = f"[{acc}]"
+consonants = r"\A[^аеиоуяэыюіѣѵүАЕИОУЯЭЫЮІѢѴҮѐЀѝЍёЁAEIOUYĚƐaeiouyěɛЪЬъьʹʺ]"
+
+perm_syl_onset = {
+    "bd",
+    "bj",
+    "bz",
+    "bl",
+    "br",
+    "vd",
+    "vz",
+    "vzv",
+    "vzd",
+    "vzr",
+    "vl",
+    "vm",
+    "vn",
+    "vr",
+    "gl",
+    "gn",
+    "gr",
+    "dž",
+    "dn",
+    "dv",
+    "dl",
+    "dr",
+    "dj",
+    "žg",
+    "žd",
+    "žm",
+    "žn",
+    "žr",
+    "zb",
+    "zd",
+    "zl",
+    "zm",
+    "zn",
+    "zv",
+    "zr",
+    "kv",
+    "kl",
+    "kn",
+    "kr",
+    "ks",
+    "kt",
+    "ml",
+    "mn",
+    "nr",
+    "pl",
+    "pn",
+    "pr",
+    "ps",
+    "pt",
+    "pš",
+    "stv",
+    "str",
+    "sp",
+    "st",
+    "stl",
+    "sk",
+    "skv",
+    "skl",
+    "skr",
+    "sl",
+    "sf",
+    "sx",
+    "sc",
+    "sm",
+    "sn",
+    "sv",
+    "sj",
+    "spl",
+    "spr",
+    "sr",
+    "tv",
+    "tk",
+    "tkn",
+    "tl",
+    "tr",
+    "fk",
+    "fl",
+    "fr",
+    "fs",
+    "fsx",
+    "fsp",
+    "fspl",
+    "ft",
+    "fš",
+    "xv",
+    "xl",
+    "xm",
+    "xn",
+    "xr",
+    "cv",
+    "čv",
+    "čl",
+    "čm",
+    "čr",
+    "čt",
+    "šv",
+    "šk",
+    "škv",
+    "šl",
+    "šm",
+    "šn",
+    "šp",
+    "šr",
+    "št",
+    "šč",
+}
+
+# FIXME: Consider changing ӂ internally to ʑ to match ɕ (it is used externally
+# in e.g. дроӂӂи (pronunciation spelling of дрожжи)
+translit_conv = {
+    "c": "t͡s",
+    "č": "t͡ɕ",
+    "ĉ": "t͡ʂ",
+    "g": "ɡ",
+    "ĝ": "d͡ʐ",
+    "ĵ": "d͡z",
+    "ǰ": "d͡ʑ",
+    "ӂ": "ʑ",
+    "š": "ʂ",
+    "ž": "ʐ",
+}
+
+translit_conv_j = {"cʲ": "tʲ͡sʲ", "ĵʲ": "dʲ͡zʲ"}
+
+allophones = {
+    "a": "aɐə",
+    "e": "eɪɪ",
+    "i": "iɪɪ",
+    "o": "oɐə",
+    "u": "uʊʊ",
+    "y": "ɨɨɨ",
+    "ɛ": "ɛɛɛ",
+    "ä": "aɪɪ",
+    "ạ": "aɐə",
+    "ë": "eɪɪ",
+    "ö": "ɵɪɪ",
+    "ü": "uʊʊ",
+    "ə": "əəə",
+}
+
+devoicing = {
+    "b": "p",
+    "d": "t",
+    "g": "k",
+    "z": "s",
+    "v": "f",
+    "ž": "š",
+    "ɣ": "x",
+    "ĵ": "c",
+    "ǰ": "č",
+    "ĝ": "ĉ",
+    "ӂ": "ɕ",
+}
+
+voicing = {
+    "p": "b",
+    "t": "d",
+    "k": "g",
+    "s": "z",
+    "f": "v",
+    "š": "ž",
+    "c": "ĵ",
+    "č": "ǰ",
+    "ĉ": "ĝ",
+    "x": "ɣ",
+    "ɕ": "ӂ",
+}
+
+iotating = {
+    "a": "ä",
+    "e": "ë",
+    "o": "ö",
+    "u": "ü",
+}
+
+retracting = {
+    "e": "ɛ",
+    "i": "y",
+}
+
+fronting = {
+    "a": "æ",
+    "u": "ʉ",
+    "ʊ": "ʉ",
+}
+
+# Prefixes that we recognize specially when they end in a geminated
+# consonant. The first element is the result after applying voicing/devoicing,
+# gemination and other changes. The second element is the original spelling,
+# so that we don't overmatch and get cases like Поттер. We check for these
+# prefixes at the beginning of words and also preceded by ne-, po- and nepo-.
+geminate_pref = {
+    #'abː', #'adː',
+    r"be[szšž]ː": r"be[sz]",
+    #'braomː',
+    r"[vf]ː": "v",
+    r"vo[szšž]ː": r"vo[sz]",
+    r"i[szšž]ː": r"i[sz]",
+    #'^inː',
+    "kontrː": "kontr",
+    "superː": "super",
+    r"tran[szšž]ː": "trans",
+    r"na[tdcč]ː": "nad",
+    r"ni[szšž]ː": r"ni[sz]",
+    r"o[tdcč]ː": "ot",  #'^omː',
+    r"o[bp]ː": "ob",
+    r"obe[szšž]ː": r"obe[sz]",
+    r"po[tdcč]ː": "pod",
+    r"pre[tdcč]ː": "pred",  #'^paszː', '^pozː',
+    r"ra[szšž]ː": r"ra[sz]",
+    r"[szšž]ː": r"[szšž]",  # ž on right for жжёт etc., ш on left for США
+    r"su[bp]ː": "sub",
+    r"me[žš]ː": "mež",
+    r"če?re[szšž]ː": r"če?re[sz]",
+    # certain double prefixes involving ra[zs]-
+    r"predra[szšž]ː": r"predra[sz]",
+    r"bezra[szšž]ː": r"bezra[sz]",
+    r"nara[szšž]ː": r"nara[sz]",
+    r"vra[szšž]ː": r"vra[sz]",
+    r"dora[szšž]ː": r"dora[sz]",
+    # '^sverxː', '^subː', '^tröxː', '^četyröxː',
+}
+
+
+phon_respellings: dict[str, str | Callable[[re.Match[str]], str]] = {
+    # Безударный неконечный Е после Ж, Ш, Ц
+    rf"([žšc])e([^{acc}⁀])": r"\1y\2",
+    # окончания прил./прич. мн. ч.
+    r"y(́?)je⁀": r"y\1i⁀",
+    r"([gkx])i(́?)je⁀": r"\1i\2i⁀",
+    r"([vdntžš])nije⁀": r"\1nii⁀",
+    r"ščije(sja?)⁀": r"ščii\1⁀",
+    r"všije(sja?)⁀": r"všii\1⁀",
+    "h": "ɣ",
+    "šč": "ɕː",  # conversion of šč to geminate
+    "čš": "tš",
+    # the following group is ordered before changes that affect ts
+    # FIXME!!! Should these also pay attention to grave accents?
+    r"́tʹ?sja⁀": "́cca⁀",
+    r"([^́])tʹ?sja⁀": r"\1ca⁀",
+    r"n[dt]sk": r"n(t)sk",
+    r"s[dt]sk": "sck",
+    # Add / before цз, чж sequences (Chinese words) and assimilate чж
+    "cz": "/cz",
+    "čž": "/ĝž",
+    # main changes for affricate assimilation of [dt] + sibilant, including ts;
+    # we either convert to "short" variants t͡s, d͡z, etc. or to "long" variants t͡ss, d͡zz, etc.
+    # 1. т с, д з across word boundary, also т/с, д/з with explicitly written slash, use long variants.
+    r"[dt](ʹ?[ ‿⁀/]+)s": r"c\1s",
+    r"[dt](ʹ?[ ‿⁀/]+)z": r"ĵ\1z",
+    # 2. тс, дз + vowel use long variants.
+    rf"[dt](ʹ?)s(j?{vowels})": r"c\1s\2",
+    rf"[dt](ʹ?)z(j?{vowels})": r"ĵ\1z\2",
+    # 3. тьс, дьз use long variants.
+    r"[dt]ʹs": "cʹs",
+    r"[dt]ʹz": "ĵʹz",
+    # 4. word-initial от[сз]-, под[сз]- use long variants because there is a morpheme boundary.
+    rf"(⁀o{accents}?)t([sz])": lambda match: match[1] + {"s": "cs", "z": "ĵz"}[match[2]],
+    rf"(⁀po{accents}?)d([sz])": lambda match: match[1] + {"s": "cs", "z": "ĵz"}[match[2]],
+    # 5. other тс, дз use short variants.
+    r"[dt]s": "c",
+    r"[dt]z": "ĵ",
+    # 6. тш, дж always use long variants (FIXME, may change)
+    r"[dt](ʹ?[ \-‿⁀/]*)š": r"ĉ\1š",
+    r"[dt](ʹ?[ \-‿⁀/]*)ž": r"ĝ\1ž",
+    # 7. soften palatalized hard hushing affricates resulting from the previous
+    "ĉʹ": "č",
+    "ĝʹ": "ǰ",
+    # changes that generate ɕː and ɕč through assimilation:
+    # зч and жч become ɕː, as does сч at the beginning of a word and in the
+    # sequence счёт when not following [цдт] (подсчёт); else сч becomes ɕč
+    # (отсчи́тываться), as щч always does (рассчитáть written ращчита́ть)
+    r"[cdt]sč": "čɕː",
+    "ɕːč": "ɕč",
+    r"[zž]č": "ɕː",
+    r"[szšž]ɕː?": "ɕː",
+    rf"sčjo({accents}?)t": r"ɕːjo\1t",
+    rf"sče({accents}?)t": r"ɕːe\1t",
+    rf"sčja({accents}?)s": r"ɕːja\1s",
+    "sč": "ɕč",
+    # misc. changes for assimilation of [dtsz] + sibilants and affricates
+    r"[sz][dt]c": "sc",
+    r"([rn])[dt]([cč])": r"\1\2",
+    # -дцат- (in numerals) has optionally-geminated дц
+    rf"dca({accents}?)t": r"c(c)a\1t",
+    # дц, тц, дч, тч + vowel always remain geminated, so mark this with ˑ;
+    # if not followed by a vowel, as in e.g. путч, use normal gemination
+    # (it will normally be degeminated)
+    rf"[dt]([cč])({vowels})": r"\1ˑ\2",
+    r"[dt]([cč])": r"\1\1",
+    # the following is ordered before the next one, which applies assimilation
+    # of [тд] to щ (including across word boundaries)
+    r"n[dt]ɕ": "nɕ",
+    # [сз] and [сз]ь before soft affricates [щч], including across word
+    # boundaries; note that the common sequence сч has already been handled
+    r"[zs]ʹ?([ ‿⁀/]*[ɕč])": r"ɕ\1",
+    # reduction of too many ɕ's, which can happen from the previous
+    "ɕɕː": "ɕː",
+    # assimilation before [тдц] and [тдц]ь before щ
+    r"[cdt]ʹ?([ ‿⁀/]*)ɕ": r"č\1ɕ",
+    # assimilation of [сз] and [сз]ь before [шж]
+    r"[zs]([ ‿⁀/]*)š": r"š\1š",
+    r"[zs]([ ‿⁀/]*)ž": r"ž\1ž",
+    r"[zs]ʹ([ ‿⁀/]*)š": r"ɕ\1š",
+    r"[zs]ʹ([ ‿⁀/]*)ž": r"ӂ\1ž",
+    "sverxi": "sverxy",
+    "stʹd": "zd",
+    "tʹd": "dd",
+    # loss of consonants in certain clusters
+    r"([ns])[dt]g": r"\1g",
+    "zdn": "zn",
+    "lnc": "nc",
+    r"[sz]tn": "sn",
+    rf"[sz]tli({accents}?)v([^š])": r"sli\1v\2",
+    r"čju(́?)vstv": r"ču\1stv",
+    r"zdra(́?)vstv": r"zdra\1stv",
+    "lvstv": "lstv",
+    # backing of /i/ after hard consonants in close juncture
+    r"([mnpbtdkgfvszxɣrlšžcĵĉĝ])⁀‿⁀i": r"\1⁀‿⁀y",
+}
+
+cons_assim_palatal = {
+    # assimilation of tn, dn, sn, zn, st, zd, nč, nɕ is handled specially
+    "compulsory": {"ntʲ", "ndʲ", "xkʲ", "csʲ", "ĵzʲ", "ncʲ", "nĵʲ"},
+    "optional": {"nsʲ", "nzʲ", "mpʲ", "mbʲ", "mfʲ", "fmʲ"},
+}
+
+# words which will be treated as accentless (i.e. their vowels will be
+# reduced), and which will liaise with a preceding or following word;
+# this will not happen if the words have an accent mark, cf.
+# по́ небу vs. по не́бу, etc.
+accentless = {
+    # class 'pre': particles that join with a following word
+    "pre": {
+        "bez",
+        "bliz",
+        "v",
+        "vedʹ",
+        "vo",
+        "da",
+        "do",
+        "za",
+        "iz",
+        "iz-pod",
+        "iz-za",
+        "izo",
+        "k",
+        "ko",
+        "mež",
+        "na",
+        "nad",
+        "nado",
+        "ne",
+        "ni",
+        "ob",
+        "obo",
+        "ot",
+        "oto",
+        "pered",
+        "peredo",
+        "po",
+        "pod",
+        "podo",
+        "pred",
+        "predo",
+        "pri",
+        "pro",
+        "s",
+        "so",
+        "u",
+        "čerez",
+    },
+    # class 'prespace': particles that join with a following word, but only
+    #   if a space (not a hyphen) separates them; hyphens are used here
+    #   to spell out letters, e.g. а-эн-бэ́ for АНБ (NSA = National Security
+    #   Agency) or о-а-э́ for ОАЭ (UAE = United Arab Emirates)
+    "prespace": {"a", "o"},
+    # class 'post': particles that join with a preceding word
+    "post": {"by", "b", "ž", "že", "li", "libo", "lʹ", "ka", "nibudʹ", "tka"},
+    # class 'posthyphen': particles that join with a preceding word, but only
+    #   if a hyphen (not a space) separates them
+    "posthyphen": {"to"},
+}
+
+# Pronunciation of final unstressed -е, depending on the part of speech and
+#   exact ending.
+#
+# Endings:
+#   oe = -ое
+#   ve = any other vowel plus -е (FIXME, may have to split out -ее)
+#   je = -ье
+#   softpaired = soft paired consonant + -е
+#   hardsib = hard sibilant (ц, ш, ж) + -е
+#   softsib = soft sibilant (ч, щ) + -е
+#
+# Parts of speech:
+#   def = default used in absence of pos
+#   n/noun = neuter noun in the nominative/accusative singular (but not ending
+#     in adjectival -ое or -ее; those should be considered as adjectives)
+#   pre = prepositional case singular
+#   dat = dative case singular (treated same as prepositional case singular)
+#   voc = vocative case (currently treated as 'mid')
+#   nnp = noun nominative plural in -е (гра́ждане, боя́ре, армя́не); not
+#     adjectival plurals in -ие or -ые, including adjectival nouns
+#     (да́нные, а́вторские)
+#   inv = invariable noun or other word (currently treated as 'mid')
+#   a/adj = adjective or adjectival noun (typically either neuter in -ое or
+#     -ее, or plural in -ие, -ые, or -ье, or short neuter in unpaired
+#     sibilant + -е)
+#   c/com = comparative (typically either in -ее or sibilant + -е)
+#   adv = adverb
+#   p = preposition (treated same as adverb)
+#   v/vb/verb = finite verbal form (usually 2nd-plural in -те); not
+#     participle forms, which should be treated as adjectives
+#   pro = pronoun (кое-, какие-, ваше, сколькие)
+#   num = number (двое, трое, обе, четыре; currently treated as 'mid')
+#   pref = prefix (treated as 'high' because integral part of word)
+#   hi/high = force high values ([ɪ] or [ɨ])
+#   mid = force mid values ([e] or [ɨ])
+#   lo/low/schwa = force low, really schwa, values ([ə])
+#
+# Possible values:
+#   1. ə [ə], e [e], i [ɪ] after a vowel or soft consonant
+#   2. ə [ə] or y [ɨ] after a hard sibilant
+#
+# If a part of speech doesn't have an entry for a given type of ending,
+#   it receives the default value. If a part of speech's entry is a string,
+#   it's an alias for another way of specifying the same part of speech
+#   (e.g. n=noun).
+final_e: dict[str, dict[str, str] | str] = {
+    "def": {"oe": "ə", "ve": "ə", "je": "ə", "softpaired": "ɪ", "hardsib": "ə", "softsib": "ɪ"},
+    "noun": {"oe": "ə", "ve": "e", "je": "e", "softpaired": "e", "hardsib": "ə", "softsib": "e"},
+    "n": "noun",
+    "pre": {"oe": "e", "ve": "e", "softpaired": "e", "hardsib": "y", "softsib": "e"},
+    "dat": "pre",
+    "voc": "mid",
+    # FIXME, not sure about this
+    "nnp": {"softpaired": "e"},
+    # FIXME, not sure about this (e.g. вице-, кофе)
+    "inv": "mid",
+    # FIXME: Not sure about -ее, e.g. neut adj си́нее; FIXME, not sure about short neuter adj, e.g. похо́же from похо́жий, дорогосто́яще from дорогосто́ящий, should this be treated as neuter noun?
+    "adj": {"oe": "ə", "ve": "e", "je": "ə"},
+    "a": "adj",
+    "com": {"ve": "e", "hardsib": "y", "softsib": "e"},
+    "c": "com",
+    "adv": {"softpaired": "e", "hardsib": "y", "softsib": "e"},
+    # FIXME, not sure about prepositions
+    "p": "adv",
+    "verb": {"softpaired": "e"},
+    "v": "verb",
+    "vb": "verb",
+    # FIXME, not sure about ваше, сколькие, какие-, кое-
+    "pro": {"oe": "i", "ve": "i"},
+    # FIXME, not sure about обе
+    "num": "mid",
+    "pref": "high",
+    # forced values
+    "high": {"oe": "i", "ve": "i", "je": "i", "softpaired": "i", "hardsib": "y", "softsib": "i"},
+    "hi": "high",
+    "mid": {"oe": "e", "ve": "e", "je": "e", "softpaired": "e", "hardsib": "y", "softsib": "e"},
+    "low": {"oe": "ə", "ve": "ə", "je": "ə", "softpaired": "ə", "hardsib": "ə", "softsib": "ə"},
+    "lo": "low",
+    "schwa": "low",
+}
+
+recomposer = {
+    f"и{BREVE}": "й",
+    f"И{BREVE}": "Й",
+    f"е{DIA}": "ё",  # WARNING: Cyrillic е and Е
+    f"Е{DIA}": "Ё",
+    f"e{CARON}": "ě",  # WARNING: Latin e and E
+    f"E{CARON}": "Ě",
+    f"c{CARON}": "č",
+    f"C{CARON}": "Č",
+    f"s{CARON}": "š",
+    f"S{CARON}": "Š",
+    f"z{CARON}": "ž",
+    f"Z{CARON}": "Ž",
+    # used in ru-pron:
+    f"ж{BREVE}": "ӂ",  # used in ru-pron
+    f"Ж{BREVE}": "Ӂ",
+    f"j{CFLEX}": "ĵ",
+    f"J{CFLEX}": "Ĵ",
+    f"j{CARON}": "ǰ",
+    # no composed uppercase equivalent of J-caron
+    f"ʒ{CARON}": "ǯ",
+    f"Ʒ{CARON}": "Ǯ",
+}
+
+# In this table, we now map Cyrillic е and э to je and e, and handle the
+# post-consonant version (plain e and ɛ) specially.
+tab = {
+    "А": "A",
+    "Б": "B",
+    "В": "V",
+    "Г": "G",
+    "Д": "D",
+    "Е": "Je",
+    "Ё": "Jó",
+    "Ж": "Ž",
+    "З": "Z",
+    "И": "I",
+    "Й": "J",
+    "К": "K",
+    "Л": "L",
+    "М": "M",
+    "Н": "N",
+    "О": "O",
+    "П": "P",
+    "Р": "R",
+    "С": "S",
+    "Т": "T",
+    "У": "U",
+    "Ф": "F",
+    "Х": "X",
+    "Ц": "C",
+    "Ч": "Č",
+    "Ш": "Š",
+    "Щ": "Šč",
+    "Ъ": "ʺ",
+    "Ы": "Y",
+    "Ь": "ʹ",
+    "Э": "E",
+    "Ю": "Ju",
+    "Я": "Ja",
+    "а": "a",
+    "б": "b",
+    "в": "v",
+    "г": "g",
+    "д": "d",
+    "е": "je",
+    "ё": "jó",
+    "ж": "ž",
+    "з": "z",
+    "и": "i",
+    "й": "j",
+    "к": "k",
+    "л": "l",
+    "м": "m",
+    "н": "n",
+    "о": "o",
+    "п": "p",
+    "р": "r",
+    "с": "s",
+    "т": "t",
+    "у": "u",
+    "ф": "f",
+    "х": "x",
+    "ц": "c",
+    "ч": "č",
+    "ш": "š",
+    "щ": "šč",
+    "ъ": "ʺ",
+    "ы": "y",
+    "ь": "ʹ",
+    "э": "e",
+    "ю": "ju",
+    "я": "ja",
+    # Russian style quotes
+    "«": "“",
+    "»": "”",
+    # archaic, pre-1918 letters
+    "І": "I",
+    "і": "i",
+    "Ѳ": "F",
+    "ѳ": "f",
+    "Ѣ": "Jě",
+    "ѣ": "jě",
+    "Ѵ": "I",
+    "ѵ": "i",
+}
+
+decompose_grave_map = {
+    "ѐ": f"е{GR}",
+    "Ѐ": f"Е{GR}",
+    "ѝ": f"и{GR}",
+    "Ѝ": f"И{GR}",
+}
+
+
+# For use with {{ru-IPA|phon=+.}}; remove accents that we don't want
+# to appear in the phonetic respelling
+def phon_respelling(text: str) -> str:
+    text = re.sub(rf"[{CFLEX}{DUBGR}{DOTABOVE}{DOTBELOW}]", "", text)
+    return text.replace("‿", " ")
+
+
+# For use with {{ru-IPA|adj=+.}}; rewrite adjectival endings to the form
+# used for phonetic respelling
+def adj_respelling(text: str) -> str:
+    # ого, его, аго (pre-reform spelling), with optional accent on either
+    # vowel, optionally with reflexive -ся suffix, at end of phrase or end
+    # of word followed by space or hyphen
+    text = re.sub(rf"(.[аое]́?)го({AC}?)$", r"\1во\2", text)
+    text = re.sub(rf"(.[аое]́?)го({AC}?ся)$", r"\1во\2", text)
+    text = re.sub(rf"(.[аое]́?)го{AC}?[ \-])", r"\1во\2", text)
+    return re.sub(rf"(.[аое]́?)го({AC}?ся[ \-])", r"\1во\2", text)
+
+
+def decompose(text: str) -> str:
+    text = unicodedata.normalize("NFD", text)
+    return re.compile(rf"(.[{BREVE}{DIA}{CARON}])").sub(lambda match: recomposer[match[1]], text)
+
+
+map_to_plain_e_map = {"Е": "E", "е": "e", "Ѣ": "Ě", "ѣ": "ě", "Э": "Ɛ", "э": "ɛ"}
+
+
+def is_monosyllabic(word: str) -> bool:
+    return not re.search(rf"[{vowels}].*[{vowels}]", word)
+
+
+# Transliterate after the pronunciation-related transformations of
+# export.apply_tr_fixes() have been applied. Called from {{ru-IPA}}.
+# INCLUDE_MONOSYLLABIC_JO_ACCENT is as in export.tr().
+def tr_after_fixes(text: str, include_monosyllabic_jo_accent: bool) -> str:
+    # Remove word-final hard sign, either utterance-finally or followed by
+    # a non-letter character such as space, comma, period, hyphen, etc.
+    text = re.sub(r"[Ъъ]$", "", text)
+    text = re.sub(r"\A[Ъъ](.+)", r"\1", text)
+
+    # the if-statement below isn't necessary but may speed things up,
+    # particularly when include_monosyllabic_jo_accent isn't set, in that
+    # in the majority of cases where ё doesn't occur, we avoid a pattern find
+    # (in is_monosyllabic()) and three pattern subs. The translit module needs
+    # to be as fast as possible since it may be called hundreds or
+    # thousands of times on some pages.
+    if re.search(r"[Ёё]", text):
+        # We need to special-case ё after a "hushing" consonant, which becomes
+        # ó (or o), without j. We also need special cases for monosyllabic ё
+        # when INCLUDE_MONOSYLLABIC_JO_ACCENT isn't set, so we don't add the
+        # accent mark that we would otherwise include.
+        if not include_monosyllabic_jo_accent and is_monosyllabic(text):
+            text = re.sub(r"([жшчщЖШЧЩ])ё", r"\1o", text)
+            text = text.replace("ё", "jo")
+            text = text.replace("Ё", "Jo")
+        else:
+            text = re.sub(r"([жшчщЖШЧЩ])ё", r"\1ó", text)
+            # conversion of remaining ё will occur as a result of 'tab'.
+
+    # ю after ж and ш becomes u (e.g. брошюра, жюри)
+    text = re.sub(r"([жшЖШ])ю", r"\1u", text)
+
+    # the if-statement below isn't necessary but may speed things up in that
+    # in the majority of cases where the letters below don't occur, we avoid
+    # six pattern subs.
+    if re.search(r"[ЕеѢѣЭэ]", text):
+        # е after a dash at the beginning of a word becomes e, and э becomes ɛ
+        # (like after a consonant)
+        text = re.compile(r"^(\-)([ЕеѢѣЭэ])").sub(lambda match: match[1] + map_to_plain_e_map[match[2]], text)
+        text = re.compile(r"(\s-)([ЕеѢѣЭэ])").sub(lambda match: match[1] + map_to_plain_e_map[match[2]], text)
+        # don't get confused by single quote or parens between consonant and е;
+        # e.g. Б'''ез''', американ(ец)
+        text = re.compile(rf"({consonants}['\(\)]*)([ЕеѢѣЭэ])").sub(
+            lambda match: match[1] + map_to_plain_e_map[match[2]], text
+        )
+
+    return translate(text, tab)
+
+
+# Apply transformations to the Cyrillic to more closely match pronunciation.
+# Return two arguments: the "original" text (after decomposing composed
+# grave characters), and the transformed text. If the two are different,
+# {{ru-IPA}} should display a "phonetic respelling" notation.
+# NOADJ disables special-casing for adjectives in -го, while FORCEADJ forces
+# special-casing for adjectives, including those in -аго (pre-reform spelling)
+# and disables checking for exceptions (e.g. много, ого). NOSHTO disables
+# special-casing for что and related words.
+def apply_tr_fixes(text: str) -> tuple[str, str]:
+    # decompose composed grave characters before we convert Cyrillic е to Latin e or je
+    text = translate(text, decompose_grave_map)
+
+    origtext = text
+    # the second half of the if-statement below is an optimization; see above.
+    if "го" in text:
+        # handle много
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Мм]но[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle немного, намного
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Нн][еа]мно[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle ненамного
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Нн]енамно[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle до́рого [short form of дорогой, adverb]
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Дд]о[\204\129\204\128]?ро)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle недо́рого [short form of недорогой, adverb]
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Нн]едо[\204\129\204\128]?ро)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle задо́рого [short form of недорогой, adverb]
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Зз]адо[\204\129\204\128]?ро)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle незадо́рого [short form of недорогой, adverb]
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Зз]анедо[\204\129\204\128]?ро)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle стро́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Сс]тро[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle на́строго
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Нн]а[\204\129\204\128]?стро)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle нестро́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Нн]естро[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle убо́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Уу]бо[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle поло́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Пп]оло[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle длинноно́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Дд]линноно[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle коротконо́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Кк]оротконо[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle кривоно́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Кк]ривоно[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle колчено́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Кк]олчено[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle отло́го
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Оо]тло[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle пе́го [short form of пе́гий "piebald"]
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Пп]е[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle лого, сого, ого
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([лсЛС]?[Оо][\204\129\204\128]?)г(о[\204\129\204\128]?)\f[^\a\204\129\204\128]",
+            r"\1" + TEMP_G + "\2",
+            text,
+        )
+        # handle Того, То́го (but not того or Того́, which have /v/)
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Тт]о[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle лего
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Лл]е[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle игого, огого; note, we substitute TEMP_G for both г's
+        # because otherwise the ого- at the beginning gets converted to ово
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([ИиОо])гог(о[\204\129\204\128]?)\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о{TEMP_G}\2",
+            text,
+        )
+        # handle Диего
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Дд]ие[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+        # handle слого-
+        text = re.sub(
+            r"\f[\a\204\129\204\128]([Сс]ло[\204\129\204\128]?)го\f[^\a\204\129\204\128]",
+            rf"\1{TEMP_G}о",
+            text,
+        )
+
+        # handle genitive/accusative endings, which are spelled -ого/-его/-аго
+        # (-ogo/-ego/-ago) but transliterated -ovo/-evo/-avo; only for adjectives
+        # and pronouns, excluding words like много, ого (-аго occurs in
+        # pre-reform spelling); \204\129 is an acute accent, \204\128 is a grave accent
+        pattern = r"([оеОЕ][\204\129\204\128]?)([гГ])([оО][\204\129\204\128]?)"
+        reflexive = r"([сС][яЯ][\204\129\204\128]?)"
+        v = {"г": "в", "Г": "В"}
+        text = re.compile(rf"{pattern}\f[^\a\204\129\204\128]").sub(
+            lambda match: match[1] + v[match[2]] + match[3] + match[4],
+            text,
+        )
+        text = re.compile(rf"{pattern}{reflexive}\f[^\a\204\129\204\128]").sub(
+            lambda match: match[1] + v[match[2]] + match[3] + match[4],
+            text,
+        )
+
+        # handle сегодня
+        text = re.sub(r"\f[\a\204\129\204\128]([Сс]е)г(о[\204\129\204\128]?дня)\f[^\a\204\129\204\128]", r"\1в\2", text)
+
+        # handle сегодняшн-
+        text = re.sub(r"\f[\a\204\129\204\128]([Сс]е)г(о[\204\129\204\128]?дняшн)", r"\1в\2", text)
+
+        # replace TEMP_G with g; must be done after the -go -> -vo changes
+        text = re.sub(TEMP_G, "г", text)
+
+    # the second half of the if-statement below is an optimization; see above.
+    if "то" in text:
+        ch2sh = {"ч": "ш", "Ч": "Ш"}
+        # Handle что
+        text = re.compile(r"\f[\a\204\129\204\128]([Чч])(то[\204\129\204\128]?)\f[^\a\204\129\204\128]").sub(
+            lambda match: ch2sh[match[1]] + match[2], text
+        )
+        # Handle чтобы, чтоб
+        text = re.compile(r"\f[\a\204\129\204\128]([Чч])(то[\204\129\204\128]?бы?)\f[^\a\204\129\204\128]").sub(
+            lambda match: ch2sh[match[1]] + match[2], text
+        )
+        # Handle ничто
+        text = re.sub(r"\f[\a\204\129\204\128]([Нн]и)ч(то[\204\129\204\128]?)\f[^\a\204\129\204\128]", r"\1ш\2", text)
+
+    text = re.sub(r"([МмЛл][яеё][\204\129\204\128]?)г([кч])", r"\1х\2", text)
+
+    return origtext, text
+
+
+# Transliterates text, which should be a single word or phrase. It should
+# include stress marks, which are: preserved in the transliteration.
+# ё is a special case: it is rendered (j)ó in multisyllabic words and
+# monosyllabic words in multi-word phrases, but rendered (j)o without an
+# accent in isolated monosyllabic words, unless INCLUDE_MONOSYLLABIC_JO_ACCENT
+# is specified. (This is used in conjugation and declension tables.)
+# NOADJ disables special-casing for adjectives in -го, while FORCEADJ forces
+# special-casing for adjectives and disables checking for exceptions
+# (e.g. много). NOSHTO disables special-casing for что and related words.
+def tr(text: str, lang: None, sc: None, include_monosyllabic_jo_accent: bool) -> str:
+    origtext, subbed_text = apply_tr_fixes(text)
+    return tr_after_fixes(subbed_text, include_monosyllabic_jo_accent)
+
+
+def translit(text: str, no_include_monosyllabic_jo_accent: bool) -> str:
+    return decompose(tr(text, None, None, not no_include_monosyllabic_jo_accent))
+
+
+def ipa(text: str, adj: str, gem: str, pos: str) -> str:
+    gem = gem[0] if gem else ""
+    pos = pos or "def"
+
+    # If a multipart part of speech, split into components, and convert
+    # each blank component to the default.
+    # if "/" in pos:
+    #     pos = pos.split("/")
+    #     for i, p in enumerate(pos):
+    #         if p == "":
+    #             pos[i] = "def"
+
+    # Verify that pos (or each part of multipart pos) is recognized
+    if not all(final_e.get(p) for p in ([pos] if isinstance(pos, str) else pos)):
+        raise ValueError(
+            f"Unrecognized part of speech {pos!r}: Should be n/noun/neut, a/adj, c/com, pre, dat, adv, inv, voc, v/verb, pro, hi/high, mid, lo/low/schwa or omitted"
+        )
+
+    text = text.lower()
+    text = text.replace("``", DUBGR)
+    text = text.replace("`", GR)
+    text = text.replace("@", DOTABOVE)
+    text = text.replace("^", CFLEX)
+    text = text.replace(DUBGR, CFLEX)
+
+    # translit doesn't always convert э to ɛ (depends on whether a consonant precedes), so: it ourselves before translit
+    text = text.replace("э", "ɛ")
+    # vowel + йе should have double jj, but the translit module will translit
+    # it the same as vowel + е, so: it ourselves before translit
+    text = re.sub(rf"([{vowel}]{opt_accent})й([еѐ])", r"\1йй\2", text)
+    # transliterate and decompose Latin vowels with accents, recomposing
+    # certain key combinations; don't include accent on monosyllabic ё, so
+    # that we end up without an accent on such words
+    text = translit(text, True)
+
+    # handle old ě (e.g. сѣдло́), and ě̈ from сѣ̈дла
+    text = text.replace("ě̈", f"jo{AC}")
+    text = text.replace("ě", "e")
+    # handle sequences of accents (esp from ё with secondary/tertiary stress)
+    text = re.sub(f"{accents}+({accents})", r"\1", text)
+
+    # canonicalize multiple spaces
+    text = re.sub(r"\s+", " ", text)
+
+    # Add primary stress to single-syllable words preceded or followed by
+    # unstressed particle or preposition. Add "tertiary" stress to remaining
+    # single-syllable words that aren't a particle, preposition, prefix or
+    # suffix and don't already bear an accent (including force-reduction
+    # accents, i.e. dot-above/dot-below); "tertiary stress" means a vowel is
+    # treated as stressed for the purposes of vowel reduction but isn't
+    # marked with a primary or secondary stress marker; we repurpose a
+    # circumflex for this purpose. We need to preserve the distinction
+    # between spaces and hyphens because (1) we only recognize certain
+    # post-accentless particles following a hyphen (to distinguish e.g.
+    # 'то' from '-то'); (2) we only recognize certain pre-accentless
+    # particles preceding a space (to distinguish particles 'о' and 'а' from
+    # spelled letters о and а, which should not be reduced); and (3) we
+    # recognize hyphens for the purpose of marking unstressed prefixes and
+    # suffixes.
+    word = re.split(r"([ \-]+)", text)
+    for i in range(len(word)):
+        # check for single-syllable words that need a stress; they must meet
+        # the following conditions:
+        if not (
+            (
+                # 1. must not be an accentless word, which is any of the following:
+                # 1a. in the "pre" class, or
+                word[i] in accentless["pre"]
+                or
+                # 1b. in the "prespace" class if followed by space and another word, or
+                i < len(word) - 1
+                and word[i] in accentless["prespace"]
+                and word[i + 1] == " "
+                or
+                # 1c. in the "post" class if preceded by another word, or
+                i > 2
+                and word[i] in accentless["post"]
+                or
+                # 1d. in the "posthyphen" class preceded by a hyphen and another word;
+                i > 2
+                and word[i] in accentless["posthyphen"]
+                and word[i - 1] == "-"
+            )
+            and
+            # 2. must be one syllable;
+            len(re.sub(rf"[^{vow}]", "", word[i])) == 1
+            and
+            # 3. must not have any accents (including dot-above, forcing reduction);
+            not re.search(accents, word[i])
+            and
+            # 4. must not be a prefix or suffix, identified by a preceding or trailing hyphen, i.e. one of the following:
+            # 4a. utterance-initial preceded by a hyphen, or
+            not (
+                i == 3
+                and word[2] == "-"
+                and word[1] == ""
+                or
+                # 4b. non-utterance-initial preceded by a hyphen, or
+                i >= 3
+                and word[i - 1] == " -"
+                or
+                # 4c. utterance-final followed by a hyphen, or
+                i == len(word) - 2
+                and word[i + 1] == "-"
+                and word[i + 2] == ""
+                or
+                # 4d. non-utterance-final followed by a hyphen;
+                i <= len(word) - 2
+                and word[i + 1] == "- "
+            )
+        ):
+            # OK, we have a stressable single-syllable word; either add primary
+            # or tertiary stress:
+            if (
+                i > 2
+                and word[i - 2] in accentless["pre"]
+                or i > 2
+                and word[i - 1] == " "
+                and word[i - 2] in accentless["prespace"]
+                or i < len(word) - 1
+                and word[i + 2] in accentless["post"]
+                or i < len(word) - 1
+                and word[i + 1] == "-"
+                and word[i + 2] in accentless["posthyphen"]
+            ):
+                # 1. add primary stress if preceded or followed by an accentless word,
+                word[i] = re.sub(vowels_c, rf"\1{AC}", word[i])
+            else:
+                # 2. else add tertiary stress
+                word[i] = re.sub(vowels_c, rf"\1{CFLEX}", word[i])
+
+    # make unaccented prepositions and particles liaise with the following or
+    # preceding word
+    for i in range(len(word)):
+        if i < len(word) - 1 and (
+            word[i] in accentless["pre"] or word[i] in accentless["prespace"] and word[i + 1] == " "
+        ):
+            word[i + 1] = "‿"
+        elif i > 2 and (word[i] in accentless["post"] or word[i] in accentless["posthyphen"] and word[i - 1] == "-"):
+            word[i - 1] = "‿"
+
+    # rejoin words, convert hyphens to spaces and eliminate stray spaces resulting from this
+    text = re.sub(r"[\-\s]+", " ", "".join(word))
+    text = text.strip()
+
+    # convert commas and en/en dashes to IPA foot boundaries
+    text = re.sub(r"\s*[,–—]\s*", " | ", text)
+
+    # add a ⁀ at the beginning and end of every word and at close juncture
+    # boundaries; we will remove this later but it makes it easier to:
+    # word-beginning and word-end re.subs
+    text = text.replace(" ", "⁀ ⁀")
+    text = "⁀" + text + "⁀"
+    text = text.replace("‿", "⁀‿⁀")
+
+    # save original word spelling before respellings, (de)voicing changes,
+    # geminate changes, etc. for implementation of geminate_pref
+    orig_word = text.split(" ")
+
+    # insert or remove /j/ before [aou] so that palatal versions of these
+    # vowels are always preceded by /j/ and non-palatal versions never are
+    # (do this before the change below adding tertiary stress to final
+    # palatal о):
+    # (1) Non-palatal [ou] after always-hard шж (e.g. in брошю́ра, жю́ри) despite the spelling (FIXME, should this also affect [a]?)
+    text = re.sub(r"([šž])j([ou])", r"\1\2", text)
+    # (2) Palatal [aou] after always-soft щчӂ and voiced variant ǰ (NOTE: this happens before the change šč -> ɕː in phon_respellings)
+    text = re.sub(r"([čǰӂ])([aou])", r"\1j\2", text)
+    # (3) ьо is pronounced as ьйо, i.e. like (possibly unstressed) ьё, e.g. in Асунсьо́н
+    text = text.replace("ʹo", "ʹjo")
+
+    # add tertiary stress to some final -о (this needs to be done before
+    # eliminating dot-above, after adding ⁀, after adding /j/ before palatal о):
+    # (1) after vowels, e.g. То́кио
+    text = re.sub(rf"({vowels}{accents}?o)⁀", rf"\1{CFLEX}⁀", text)
+    # (2) when palatal, e.g. ра́нчо, га́учо, ма́чо, Ога́йо
+    text = text.replace("jo⁀", f"jo{CFLEX}⁀")
+
+    # eliminate dot-above, which has served its purpose of preventing any
+    # sort of stress (needs to be done after adding tertiary stress to final -о)
+    text = text.replace(DOTABOVE, "")
+    # eliminate dot-below (needs to be done after changes above that insert j before [aou] after always-soft щчӂ)
+    text = text.replace(f"ja{DOTBELOW}", "jạ")
+    if DOTBELOW in text:
+        raise ValueError("Dot-below accent can only be placed on я or palatal а")
+
+    text = re.sub(rf"(.[aoe]́?)go({AC}?)⁀", r"\1vo\2⁀", text) if adj else text
+    text = re.sub(rf"(.[aoe]́?)go({AC}?)sja⁀", r"\1vo\2sja⁀", text) if adj else text
+
+    # phonetic respellings
+    for pattern, repl in phon_respellings.items():
+        if isinstance(repl, str):
+            text = re.sub(pattern, repl, text)
+        else:
+            text = re.compile(pattern).sub(repl, text)
+
+    # voicing, devoicing
+    # NOTE: v before an obstruent assimilates in voicing and triggers voicing
+    # assimilation of a preceding consonant; neither happens before a sonorant
+
+    # 1. absolutely final devoicing
+    text = re.compile(r"([bdgvɣzžĝĵǰӂ])(ʹ?⁀)$").sub(lambda match: devoicing[match[1]] + match[2], text)
+
+    # 2. word-final devoicing before another word
+    text = re.compile(r"([bdgvɣzžĝĵǰӂ])(ʹ?⁀ ⁀[^bdgɣzžĝĵǰӂ])").sub(lambda match: devoicing[match[1]] + match[2], text)
+
+    # 3. voicing/devoicing assimilation; repeat to handle recursive assimilation
+    while True:
+        new_text = re.compile(r"([bdgvɣzžĝĵǰӂ])([ ‿⁀ʹːˑ()/]*[ptkfxsščɕcĉ])").sub(
+            lambda match: devoicing[match[1]] + match[2], text
+        )
+        new_text = re.compile(r"([ptkfxsščɕcĉ])([ ‿⁀ʹːˑ()/]*v?[ ‿⁀ʹːˑ()/]*[bdgɣzžĝĵǰӂ])").sub(
+            lambda match: voicing[match[1]] + match[2], new_text
+        )
+        if new_text == text:
+            break
+        text = new_text
+
+    # re-notate orthographic geminate consonants
+    text = re.sub(rf"([^{vow}.\-_])\1", r"\1ː", text)
+    text = re.sub(rf"([^{vow}.\-_])\(\1\)", r"\1(ː)", text)
+
+    # rewrite iotated vowels
+    text = re.compile(r"(j[\(ːˑ\)]*)([aeou])").sub(lambda match: match[1] + iotating[match[2]], text)
+
+    # eliminate j after consonant and before iotated vowel (including semi-reduced ạ)
+    text = re.sub(rf"([^{vow}{acc}ʹʺ‿⁀ ]/?)j([äạëöü])", r"\1\2", text)
+
+    # split by word and process each word
+    word = text.split(" ")
+
+    if isinstance(pos, list) and len(pos) != len(word):
+        raise ValueError(f"Number of parts of speech ({len(pos)}) should match number of combined words ({len(word)})")
+
+    for pron in word:
+        # Check for gemination at prefix boundaries; if so, convert the
+        # regular gemination symbol ː to a special symbol ˑ that indicates
+        # we always preserve the gemination unless gem=n. We look for
+        # certain sequences at the beginning of a word, but make sure that
+        # the original spelling is appropriate as well (see comment above
+        # for geminate_pref).
+        if "ː" in pron:
+            orig_pron = orig_word[i]
+            deac = re.sub(accents, "", pron)
+            orig_deac = re.sub(accents, "", orig_pron)
+            for newspell, oldspell in geminate_pref.items():
+                # FIXME! The re.sub below will be incorrect if there is
+                # gemination in a joined preposition or particle
+                if (
+                    re.search(f"⁀{oldspell}", orig_deac)
+                    and re.search(f"⁀{newspell}", deac)
+                    or re.search(f"⁀ne{oldspell}", orig_deac)
+                    and re.search(f"⁀ne{newspell}", deac)
+                ):
+                    pron = re.sub(r"(⁀[^‿⁀ː]*)ː", r"\1ˑ", pron)
+
+        # degemination, optional gemination
+        if gem == "y":
+            # leave geminates alone, convert ˑ to regular gemination; ˑ is a
+            # special gemination symbol used at prefix boundaries that we
+            # remove only when gem=n, else we convert it to regular gemination
+            pron = pron.replace("ˑ", "ː")
+        elif gem == "o":
+            # make geminates optional, except for ɕӂ, also ignore left paren in (ː) sequence
+            pron = re.sub(r"([^ɕӂ\(\)])[ːˑ]", r"\1(ː)", pron)
+        elif gem == "n":
+            # remove gemination, except for ɕӂ
+            pron = re.sub(r"([^ɕӂ\(\)])[ːˑ]", r"\1", pron)
+        else:
+            # degeminate l's
+            pron = re.sub(r"(l)ː", r"\1", pron)
+            # preserve gemination between vowels immediately after the stress,
+            # special gemination symbol ˑ also remains, ɕӂ remain geminated,
+            # žn remain geminated between vowels even not immediately after
+            # the stress, n becomes optionally geminated when after but not
+            # immediately after the stress, ssk and zsk remain geminated
+            # immediately after the stress, else degeminate; we signal that
+            # gemination should remain by converting to special symbol ˑ,
+            #: removing remaining ː not after ɕӂ and left paren;:
+            # various subs repeatedly in case of multiple geminations in a word
+            # 1. immediately after the stress
+            pron = sub_repeatedly(rf"({vowels}{accents}[^ɕӂ\(\)])ː({vowels})", r"\1ˑ\2", pron)
+
+            # 2. remaining geminate n after the stress between vowels
+            pron = sub_repeatedly(rf"({AC}.-{vowels}{accents}?n)ː({vowels})", r"\1(ː)\2", pron)
+
+            # 3. remaining ž and n between vowels
+            pron = sub_repeatedly(rf"({vowels}{accents}?[žn])ː({vowels})", r"\1ˑ\2", pron)
+
+            # 4. ssk (and zsk, already normalized) immediately after the stress
+            pron = re.sub(rf"({vowels}{accents}[^{vow}]*s)ː(k)", r"\1ˑ\2", pron)
+
+            # 5. eliminate remaining gemination, except for ɕː and ӂː
+            pron = re.sub(r"([^ɕӂ\(\)])ː", r"\1", pron)
+
+            # 6. convert special gemination symbol ˑ to regular gemination
+            pron = pron.replace("ˑ", "ː")
+
+        # handle soft and hard signs, assimilative palatalization
+        # 1. insert j before i when required
+        pron = pron.replace("ʹi", "ʹji")
+
+        # 2. insert glottal stop after hard sign if required
+        pron = re.sub(r"ʺ([aɛiouy])", r"ʔ\1", pron)
+
+        # 3. (ь) indicating optional palatalization
+        pron = pron.replace(r"\(ʹ\)", "⁽ʲ⁾")
+
+        # 4. assimilative palatalization of consonants when followed by
+        #    front vowels or soft sign
+        pron = re.sub(r"([mnpbtdkgfvszxɣrl])([ː()]*[eiäạëöüʹ])", r"\1ʲ\2", pron)
+        pron = re.sub(r"([cĵ])([ː()]*[äạöüʹ])", r"\1ʲ\2", pron)
+
+        # 5. remove hard and soft signs
+        pron = re.sub(r"[ʹʺ]", "", pron)
+
+        # reduction of unstressed word-final -я, -е; but special-case
+        # unstressed не, же. Final -я always becomes [ə]; final -е may
+        # become [ə],e],ɪ] or [ɨ] depending on the part of speech and
+        # the preceding consonants/vowels.
+        pron = re.sub(r"[äạ]⁀", "ə⁀", pron)
+        pron = pron.replace("⁀nʲe⁀", "⁀nʲi⁀")
+        pron = pron.replace("⁀že⁀", "⁀žy⁀")
+
+        # function to fetch the appropriate value for ending and part of
+        # speech, handling aliases and defaults and converting 'e' to 'ê'
+        # so that the unstressed [e] sound is preserved
+        def fetch_e_sub(ending: str) -> str:
+            thispos = pos[i] if isinstance(pos, list) else pos
+            chart = final_e[thispos]
+            while isinstance(chart, str):  # handle aliases
+                chart = final_e[chart]
+            assert isinstance(final_e["def"], dict)  # For Mypy
+            sub = chart[ending] or final_e["def"][ending]
+            assert sub
+            if sub == "e":
+                # add CFLEX to preserve the unstressed [e] sound, which
+                # will otherwise be converted to [ɪ]; NOTE: DO NOT use ê
+                # here directly because it's a single composed char, when
+                # we need the e and accent to be separate
+                return f"e{CFLEX}"
+            return sub
+
+        # handle substitutions in two parts, one for vowel+j+e sequences
+        # and the other for cons+e sequences
+        pron = re.compile(rf"{vowels_c}({accents}?j)ë⁀").sub(
+            lambda match: (ch := match[1]) + match[2] + fetch_e_sub("oe" if ch == "o" else "ve"), pron
+        )
+        # consonant may palatalized, geminated or optional-geminated
+        pron = re.compile(r"(.)(ʲ?[ː()]*)[eë]⁀").sub(
+            lambda match: (
+                (ch := match[1])
+                + match[2]
+                + fetch_e_sub(
+                    "je"
+                    if ch == "j"
+                    else "hardsib"
+                    if re.search(r"[cĵšžĉĝ]", ch)
+                    else "softsib"
+                    if re.search(r"[čǰɕӂ]", ch)
+                    else "softpaired"
+                )
+            ),
+            pron,
+        )
+
+        # Do the old way, which mostly converts final -е to schwa, but
+        # has highly broken retraction code for vowel + [шжц] + е (but
+        # not with accent on vowel!) before it that causes final -е in
+        # this circumstance to become [ɨ], and a special hack for кое-.
+        # pron = re.sub(rf"{vowels_c}([cĵšžĉĝ][ː()]*)[eë]", r'\1\2ɛ', pron)
+        # pron = re.sub(rf"⁀ko({accents})jë⁀", r'⁀ko\1ji⁀', pron)
+        # pron = re.sub(r'[eë]⁀', 'ə⁀', pron)
+
+        # retraction of е and и after цшж
+        pron = re.compile(r"([cĵšžĉĝ][ː()]*)([ei])").sub(lambda match: match[1] + retracting[match[2]], pron)
+
+        # syllabify, inserting @ at syllable boundaries
+
+        # 1. insert @ after each vowel
+        pron = re.sub(rf"({vowels}{accents}?)", r"\1@", pron)
+
+        # 2. eliminate word-final @
+        pron = re.sub(r"@+⁀$", "⁀", pron)
+
+        # 3. move @ forward directly before any ‿⁀, as long as at least one consonant follows that; we will move it across ‿⁀ later
+        pron = re.sub(rf"@([^@{vow}{acc}]*)([‿⁀]+[^‿⁀@{vow}{acc}])", r"\1@\2", pron)
+
+        # 4. in a consonant cluster, move @ forward so it's before the last consonant
+        pron = re.sub(rf"@([^‿⁀@{vow}{acc}]*)([^‿⁀@{vow}{acc}ːˑ()ʲ]ʲ?[ːˑ()]*‿?[{vow}{acc}])", r"\1@\2", pron)
+
+        # 5. move @ backward if in the middle of a "permanent onset" cluster,
+        #   e.g. sk, str, that comes before a vowel, putting the @ before
+        #   the permanent onset cluster
+        def matcher1(match: re.Match[str]) -> str:
+            a, aund, b, bund, c, d = match.groups()
+            if f"{a}{b}{c}" in perm_syl_onset or c == "j" and re.search(r"[čǰɕӂʲ]", b):
+                return f"@{a}{aund}{b}{bund}{c}{d}"
+            elif f"{b}{c}" in perm_syl_onset:
+                return f"{a}{aund}@{b}{bund}{c}{d}"
+            return ""
+
+        pron = re.compile(
+            rf"([^‿⁀@_{vow}{acc}]?)(_*)([^‿⁀@_{vow}{acc}])(_*)@([^‿⁀@{vow}{acc}ːˑ()ʲ])(ʲ?[ːˑ()]*[‿⁀]*[{vow}{acc}])"
+        ).sub(matcher1, pron)
+
+        # 6. if / is present (explicit syllable boundary), remove any @ (automatic boundary) and convert / to @
+        if "/" in pron:
+            pron = re.compile(rf"[^{vow}{acc}]+").sub(
+                lambda match: x.replace("@", "").replace("/", "@") if "/" in (x := match[1]) else x, pron
+            )
+
+        # 7. remove @ followed by a final consonant cluster
+        pron = re.sub(rf"@([^‿⁀@{vow}]+⁀)$", r"\1", pron)
+
+        # 8. remove @ preceded by an initial consonant cluster (should only happen when / is inserted by user or in цз, чж sequences)
+        pron = re.sub(rf"^(⁀[^‿⁀@{vow}]+)@", r"\1", pron)
+
+        # 9. make sure @ isn't directly before linking ‿⁀
+        pron = re.sub(r"@([‿⁀]+)", r"\1@", pron)
+
+        # handle word-initial unstressed o and a; note, vowels always
+        # followed by at least one char because of word-final ⁀
+        #: after syllabification because syllabification doesn't know
+        # about ɐ as a vowel
+        pron = re.sub(rf"^⁀[ao]([^{acc}])", r"⁀ɐ\1", pron)
+
+        # split by syllable
+        syllable = pron.split("@")
+
+        # create set of 1-based syllable indexes of stressed syllables (acute, grave, circumflex)
+        stress = [bool(re.search(accents, syl)) for syl in syllable]
+
+        # iterate syllable by syllable to handle stress marks, vowel allophony
+        syl_conv: list[str] = []
+        for j, syl in enumerate(syllable):
+            # vowel allophony
+            if stress[j]:
+                # convert acute/grave/circumflex accent to appropriate
+                # IPA marker of primary/secondary/unmarked stress
+                alnum = 0
+                syl = re.sub(r"(.*)́", r"ˈ\1", syl)
+                syl = re.sub(r"(.*)̀", r"ˌ\1", syl)
+                syl = syl.replace(CFLEX, "")
+            elif stress[j + 1]:
+                alnum = 1
+            else:
+                alnum = 2
+            syl_conv.append(
+                re.compile(vowels_c).sub(lambda match: allophones[a][alnum] if (a := match[1]) else "", syl)
+            )
+
+        pron = "".join(syl_conv)
+
+        # Optional (j) before ɪ, which is always unstressed
+        pron = re.sub(rf"([{ipa_vow}])jɪ", r"\1(j)ɪ", pron)
+
+        # consonant assimilative palatalization of tn/dn/sn/zn, depending on whether [rl] precedes
+        def matcher2(match: re.Match[str]) -> str:
+            a, b, c = match.groups()
+            return f"{a}{b}ʲ{c}" if not a else f"{a}{b}⁽ʲ⁾{c}"
+
+        pron = re.compile(r"([rl]?)([ː()ˈˌ]*[dtsz])([ː()ˈˌ]*nʲ)").sub(matcher2, pron)
+
+        # consonant assimilative palatalization of st/zd, depending on whether [rl] precedes
+        pron = re.compile(r"([rl]?)([ˈˌ]?[sz])([ː()ˈˌ]*[td]ʲ)").sub(matcher2, pron)
+
+        # general consonant assimilative palatalization
+        def matcher3(match: re.Match[str]) -> str:
+            a, b, c = match.groups()
+            if f"{a}{c}" in cons_assim_palatal["compulsory"]:
+                return f"{a}ʲ{b}{c}"
+            elif f"{a}{c}" in cons_assim_palatal["optional"]:
+                return f"{a}⁽ʲ⁾{b}{c}"
+            return f"{a}{b}{c}"
+
+        while True:
+            new_pron = re.compile(r"([szntdpbmfcĵx])([ː()ˈˌ]*)([szntdpbmfcĵlk]ʲ)").sub(matcher3, pron)
+            if new_pron == pron:
+                break
+            pron = new_pron
+
+        # further assimilation before alveolopalatals
+        pron = re.sub(r"n([ː()ˈˌ]*)([čǰɕӂ])", r"nʲ\1\2", pron)
+
+        # optional palatal assimilation of вп, вб only word-initially
+        pron = re.sub(r"⁀([ː()ˈˌ]*[fv])([ː()ˈˌ]*[pb]ʲ)", r"⁀\1⁽ʲ⁾\2", pron)
+
+        # optional palatal assimilation of бв but not in обв-
+        pron = re.sub(r"b([ː()ˈˌ]*vʲ)", r"b⁽ʲ⁾\1", pron)
+        if re.search(rf"⁀o{accents}?bv", word[i]):
+            # ə in case of a word with a preceding preposition
+            pron = re.sub(r"⁀([ː()ˈˌ]*[ɐəo][ː()ˈˌ]*)b⁽ʲ⁾([ː()ˈˌ]*vʲ)", r"⁀\1b\2", pron)
+
+        if re.search(r"ls[äạ]⁀", word[i]):
+            pron = pron.replace("lsʲə⁀", "ls⁽ʲ⁾ə⁀")
+
+        word[i] = pron
+
+    text = " ".join(word)
+    text = "[" + text + "]"
+
+    # Front a and u between soft consonants. If between a soft and
+    # optionally soft consonant (should only occur in that order, shouldn't
+    # ever have a or u preceded by optionally soft consonant),
+    # split the result into two. We only split into two even if there
+    # happen to be multiple optionally fronted a's and u's to avoid
+    # excessive numbers of possibilities (and it simplifies the code).
+    # 1. First, temporarily add soft symbol to inherently soft consonants.
+    text = re.sub(r"([čǰɕӂj])", r"\1ʲ", text)
+
+    # 2. Handle case of [au] between two soft consonants
+    text = re.compile(r"(ʲ[ː()]*)([auʊ])([ˈˌ]?.ʲ)").sub(lambda match: match[1] + fronting[match[2]] + match[3], text)
+
+    # 3. Handle [au] between soft consonant and optional j, which is still fronted
+    text = re.compile(r"(ʲ[ː()]*)([auʊ])([ˈˌ]?\(jʲ\))").sub(
+        lambda match: match[1] + fronting[match[2]] + match[3], text
+    )
+
+    # 4. Handle case of [au] between soft and optionally soft consonant
+    if re.search(r"ʲ[ː()]*[auʊ][ˈˌ]?.⁽ʲ⁾", text):
+        opt_hard = re.sub(r"(ʲ[ː()]*)([auʊ])([ˈˌ]?.)⁽ʲ⁾", r"\1\2\3", text)
+        opt_soft = re.compile(r"(ʲ[ː()]*)([auʊ])([ˈˌ]?.)⁽ʲ⁾").sub(
+            lambda match: match[1] + fronting[match[2]] + match[3] + "ʲ", text
+        )
+        text = f"{opt_hard}, {opt_soft}"
+
+    # 5. Undo addition of soft symbol to inherently soft consonants.
+    text = re.sub(r"([čǰɕӂj])ʲ", r"\1", text)
+
+    # convert special symbols to IPA
+    text = translate(text, translit_conv_j)
+    # text = re.sub(r"[cĵ]ʲ", translit_conv_j, text)
+    # text = re.sub(r"[cčgĉĝĵǰšžɕӂ]", translit_conv, text)
+    text = translate(text, translit_conv)
+
+    # Assimilation involving hiatus of ɐ and ə
+    text = re.sub(r"ə([‿⁀]*)[ɐə]", r"ɐ\1ɐ", text)
+
+    # eliminate ⁀ symbol at word boundaries
+    # eliminate _ symbol that prevents assimilations
+    text = re.sub(r"[⁀_]", "", text)
+    text = re.sub(r"j([^aæeiɵuʉ])", r"ɪ̯\1", text)
+    text = re.sub(r"j$", "ɪ̯", text)
+    text = re.sub(r"l([^ʲ])", r"ɫ\1", text)
+    text = re.sub(r"l$", "ɫ", text)
+    text = re.sub(r"([aæə])[()]ɪ̯[()]ɪsʲ$", r"\1ɪ̯əsʲ", text)
+
+    return text
+
+
+def transcript(text: str) -> str:
+    """
+    >>> transcript("вот")
+    '[vot]'
+    >>> transcript("прон")
+    '[pron]'
+    >>> transcript("молоко́")
+    '[məɫɐˈko]'
+    >>> transcript("нево́лящий")
+    '[nʲɪˈvolʲɪɕːɪɪ̯]'
+    """
+    return ipa(text, "", "", "")