မော်ဂျူး:uk-pronunciation
Documentation for this module may be created at မော်ဂျူး:uk-pronunciation/doc
local export = {}
local m_IPA = require("Module:IPA")
local uk = require("Module:languages").getByCode("uk")
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local acute = U(0x301)
local grave = U(0x300)
function export.pronunciation(word, accent, output)
if type(word) == "table" then
word, accent, output =
word.args[1] or word:getParent().args[1],
word.args.accent or word:getParent().args.accent,
word.args.output or word:getParent().args.output
end
if not word or (word == "") then
word = mw.title.getCurrentTitle().text
end
-- Returns an error if the word contains alphabetic characters that are not Cyrillic.
require("Module:script utilities").checkScript(word, "Cyrl")
word = mw.ustring.lower(word)
local needsAccent = false
if accent ~= "off" and not mw.ustring.find(word, "[" .. acute .. grave .. "]") then
if require("Module:string").count(word, "[аеєиіїоуюя]") > 1 then
needsAccent = true
end
end
local phonetic_chars_map = {
-- single characters that map to IPA sounds; these are processed last
[3] = {
["а"] = "ɑ", ["б"] = "b", ["в"] = "ʋ", ["г"] = "ɦ", ["ґ"] = "ɡ",
["д"] = "d", ["е"] = "ɛ", ["є"] = "jɛ", ["ж"] = "ʒᵊ", ["з"] = "z",
["и"] = "ɪ", ["і"] = "i", ["ї"] = "ji", ["й"] = "j", ["к"] = "k",
["л"] = "l", ["м"] = "m", ["н"] = "n", ["о"] = "ɔ", ["п"] = "p",
["р"] = "r", ["с"] = "s", ["т"] = "t", ["у"] = "u", ["ф"] = "f",
["х"] = "x", ["ц"] = "t͡s", ["ч"] = "t͡ʃᵊ", ["ш"] = "ʃᵊ", ["щ"] = "ʃᵊt͡ʃᵊ",
["ь"] = "ʲ", ["ю"] = "ju", ["я"] = "jɑ", ["’"] = "j",
-- accented vowels
[acute] = "ˈ", [grave] = "ˈ",
},
-- character sequences of two that map to IPA sounds
[2] = {
["дж"] = "d͡ʒᵊ", ["дз"] = "d͡z",
-- Dental plosives assimilate to following hissing/hushing consonants, which is not noted in the spelling.
["дс"] = "d͡zs", ["дш"] = "d͡ʒᵊʃᵊ", ["дч"] = "d͡ʒᵊt͡ʃᵊ", ["дц"] = "d͡zt͡s",
["тс"] = "t͡s", ["тш"] = "t͡ʃᵊʃᵊ", ["тч"] = "t͡ʃᵊː", ["тц"] = "t͡sː",
},
-- character sequences of three that map to IPA sounds
[1] = {
["дзь"] = "d͡zʲ",
-- Dental plosives assimilate to following hissing/hushing consonants, which is not noted in the spelling.
["тьс"] = "t͡sʲː"
},
}
local phonetic = word
local orthographic_replacements = {
-- first apply consonant cluster simplifications that always occur orthographically
["нтськ" ] = "ньськ",
["стськ" ] = "ськ",
["нтст" ] = "нст",
["стч" ] = "шч",
["стд" ] = "зд",
["стс" ] = "сː",
["стськ" ] = "ськ",
["^зш" ] = "шː",
["зш" ] = "жш",
["^зч" ] = "шч",
["зч" ] = "жч",
-- then long consonants that are orthographically geminated.
["([бвгґд])%1" ] = "%1ː",
["([^д]+)жж" ] = "%1жː", -- джж sequence encode diphonemic дж
["([^д]+)зз" ] = "%1зː", -- дзз sequence encode diphonemic дз
["([йклмнпрстфхцчшщ])%1"] = "%1ː",
["дждж" ] = "джː",
["дздз" ] = "дзː",
}
for regex, replacement in pairs(orthographic_replacements) do
phonetic = gsub(phonetic, regex, replacement)
end
-- remap apostrophe to '!' so that it doesn't conflict with IPA stress mark
phonetic = gsub(phonetic, "'", "!")
-- replace multiple letter sequences
for _, replacements in ipairs(phonetic_chars_map) do
for key, replacement in pairs(replacements) do
phonetic = gsub(phonetic, key, replacement)
end
end
-- move stress mark, added by phonetic_chars_map, before vowel
phonetic = gsub(phonetic, "([ɑɛiɪuɔ])ˈ", "ˈ%1")
-- add accent if the word is monosyllabic and not "|accent=off"
local _, numberOfVowels = gsub(phonetic, "[ɑɛiɪuɔ]", "")
if (numberOfVowels == 1) and (accent ~= "off") then
phonetic = gsub(phonetic, "([ɑɛiɪuɔ])", "ˈ%1")
end
-- palatalizable consonants before /i/ or /j/ become palatalized
local palatalizable = "[tdsznlrbpʋfɡmkɦxʃᵊʒᵊ]"
phonetic = gsub(phonetic, "(" .. palatalizable .. ")([ː]?)([ˈ]?)i", "%1ʲ%2%3i")
phonetic = gsub(phonetic, "(" .. palatalizable .. ")([ː]?)j", "%1ʲ%2")
-- eliminate garbage sequences of [ʲːj] resulting from -тьс- cluster followed by [j]
phonetic = gsub(phonetic, "ʲːj", "ʲː")
-- consonant simplification: ст + ц' → [с'ц']. We do it here because of palatalization.
-- Due to the т +ц → [ц:] rule length is present. According to Орфоепскі словник p. 13,
-- both forms are proper, without length in normal (colloquial) speech and with length
-- in slow speech, so we parenthesize the length as optional.
phonetic = gsub(phonetic, "st͡sʲ([ː]?)", "sʲt͡sʲ(%1)")
-- assimilation: voiceless + voiced = voiced + voiced
-- should /ʋ/ be included as voiced? Орфоепічний словник doesn't voice initial cluster of шв (p. 116)
local voiced_obstruent = "[bdzʒᵊɡɦ]"
local voicing = {
["p"] = "b",
["f"] = "v",
["t"] = "d",
["tʲ"] = "dʲ",
["s"] = "z",
["sʲ"] = "zʲ",
["ʃᵊ"] = "ʒᵊ",
["k"] = "ɡ",
["x"] = "ɣ",
["t͡s"] = "d͡z",
["t͡sʲ"] = "d͡zʲ",
["t͡ʃᵊ"] = "d͡ʒᵊ",
["ʃᵊt͡ʃᵊ"] = "ʒᵊd͡ʒᵊ",
}
for voiceless, voiced in pairs(voicing) do
phonetic = gsub(phonetic, voiceless .. "(" .. voiced_obstruent .. "+)", voiced .. "%1")
end
-- In the sequence of two consonants, of which the second is soft, the first is pronounced soft too
-- unless the first consonant is a labial, namely б, п, в, ф, м.
phonetic = gsub(phonetic, "([tdsznl])(.)ʲ", "%1ʲ%2ʲ")
phonetic = gsub(phonetic, "([tdsznl])t͡sʲ", "%1ʲt͡sʲ")
phonetic = gsub(phonetic, "([tdsznl])d͡zʲ", "%1ʲd͡zʲ")
phonetic = gsub(phonetic, "t͡s(.)ʲ", "t͡sʲ%1ʲ")
phonetic = gsub(phonetic, "d͡z(.)ʲ", "d͡zʲ%1ʲ")
phonetic = gsub(phonetic, "d͡zt͡sʲ", "d͡zʲt͡sʲ")
phonetic = gsub(phonetic, "t͡sd͡zʲ", "t͡sʲd͡zʲ")
-- Hushing consonants ж, ч, ш assimilate to the following hissing consonants, giving a long hissing consonant:
-- [ʒᵊ] + [t͡sʲ] → [zʲt͡sʲ], [t͡ʃᵊ] + [t͡sʲ] → [t͡sʲː], [ʃᵊ] + [t͡sʲ] → [sʲt͡sʲ], [ʃᵊ] + [sʲ] → [sʲː]
phonetic = gsub(phonetic, "ʒᵊt͡sʲ", "zʲt͡sʲ")
phonetic = gsub(phonetic, "t͡ʃᵊt͡sʲ", "t͡sʲː")
phonetic = gsub(phonetic, "ʃᵊt͡sʲ", "sʲt͡sʲ")
phonetic = gsub(phonetic, "ʃᵊsʲ", "sʲː")
-- Hissing consonants before hushing consonants within a word assimilate - on зш and зч word-initially and
-- word-medially see above.
-- [s] + [ʃᵊ] → [ʃᵊː], [z] + [ʃᵊ] → [ʒʃᵊ], [z] + [t͡s] → [ʒt͡s]
-- [z] + [d͡ʒᵊ] → [ʒd͡ʒᵊ]
phonetic = gsub(phonetic, "zʒᵊ", "ʒᵊː")
phonetic = gsub(phonetic, "sʃᵊ", "ʃᵊː")
phonetic = gsub(phonetic, "zt͡s", "ʒt͡s")
phonetic = gsub(phonetic, "zd͡ʒᵊ", "ʒd͡ʒᵊ")
-- cleanup: excessive palatalization: CʲCʲCʲ → CCʲCʲ
phonetic = gsub(phonetic, "([^ɑɛiɪuɔ]+)ʲ([^ɑɛiɪuɔ]+)ʲ([^ɑɛiɪuɔ]+)ʲ", "%1%2ʲ%3ʲ")
-- unstressed /ɑ/ has an allophone [ɐ]
phonetic = gsub(phonetic, "([^ˈ])ɑ", "%1ɐ")
phonetic = gsub(phonetic, "^ɑ", "ɐ")
-- unstressed /u/ has an allophone [ʊ]
phonetic = gsub(phonetic, "([^ˈ])u", "%1ʊ")
phonetic = gsub(phonetic, "^u", "ʊ")
-- unstressed /ɔ/ has by assimilation an allophone [o] before a stressed syllable with /u/ or /i/
phonetic = gsub(phonetic, "ɔ([bdzʒᵊɡɦmnlrpftskxʲʃ͡ᵊ]+)ˈ([uiʊ]+)", "o%1ˈ%2")
-- one allophone [e] covers unstressed /ɛ/ and /ɪ/
phonetic = gsub(phonetic, "([^ˈ])ɛ", "%1e")
phonetic = gsub(phonetic, "^ɛ", "e")
phonetic = gsub(phonetic, "([^ˈ])ɪ", "%1e")
phonetic = gsub(phonetic, "^ɪ", "e")
-- /ʋ/ has an allophone [u̯] in a syllable coda
local vowel = "[ɑɛiɪuɔɐoʊe]"
phonetic = gsub(phonetic, "(" .. vowel .. "+)ʋ", "%1u̯")
-- /ʋ/ has an allophone [w] before /ɔ, u/and voiced consonants (not after a vowel)
phonetic = gsub(phonetic, "ʋ([ˈ]?)([ɔuoʊbdzʒᵊɡɦmnlr]+)", "w%1%2")
-- /ʋ/ has an allophone [ʍ] before before voiceless consonants (not after a vowel)
phonetic = gsub(phonetic, "ʋ([pftskxʃᵊ]+)", "ʍ%1")
-- in a syllable-final position (i.e. the first position of a syllable coda) /j/ has an allophone [i̯]:
local consonant = "[bdzʒᵊɡɦmnlrpftskxʃᵊʋ]"
phonetic = gsub(phonetic, "(" .. vowel .. "+)j([ˈ]?)(" .. gsub(consonant, "ʋ", "") .. "+)", "%1i̯%2%3")
phonetic = gsub(phonetic, "(" .. vowel .. "+)j$", "%1i̯")
-- also at the beginning of a word before a consonant
phonetic = gsub(phonetic, "^j(" .. gsub(consonant, "ʋ", "") .. "+)", "i̯%1")
-- remove old orthographic apostrophe
phonetic = gsub(phonetic, "!", "")
-- stress mark in correct place
phonetic = gsub(phonetic, "([bdzʒᵊɡɦjʲmnlrpftskxʃᵊʋwʍː͡]+)ˈ", "ˈ%1")
phonetic = gsub(phonetic, "([ui]̯)ˈ([ʲ]?" .. vowel .. ")", "ˈ%1%2")
phonetic = gsub(phonetic, "ˈ(l[ʲ]?[ː]?)(" .. gsub(consonant, "l", "") .. ")", "%1ˈ%2")
phonetic = gsub(phonetic, "ˈ(r[ʲ]?[ː]?)(" .. gsub(consonant, "r", "") .. ")", "%1ˈ%2")
phonetic = gsub(phonetic, "ˈ(m[ʲ]?[ː]?)([bpfɦszʃᵊʋʒᵊ])", "%1ˈ%2")
phonetic = gsub(phonetic, "ˈ(n[ʲ]?[ː]?)([dtfkɡɦlxszʃᵊʋʒᵊ])", "%1ˈ%2")
if output == "template" then
return m_IPA.format_IPA_full(uk, { { pron = "[" .. phonetic .. "]" } } ) ..
( needsAccent and
'<span class="error previewonly"><br>The word ' .. word .. ' contains multiple vowels, but has no accent mark, so some of the vowels may be transcribed incorrectly. Please add acute accents to mark stressed syllables.</span>' ..
require("Module:utilities").format_categories( { "Ukrainian terms with incomplete pronunciation" }, lang)
or "" )
else
return (phonetic)
end
end
return export