Module:el-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will transliterate Greek language text per WT:EL TR. It is also used to transliterate Cappadocian Greek, Pontic Greek, Tsakonian, and Thracian. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:el-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

local m_str_utils = require("Module:string utilities")

local gsub = m_str_utils.gsub
local sub = m_str_utils.sub
local U = m_str_utils.char

local acute = U(0x301)
local diaeresis = U(0x308)
local erotimatiko = U(0x37E)

local export = {}

local tt = {
	["αあるふぁ"] = "a",  ["ά"] = "á",  ["βべーた"] = "v",  ["γがんま"] = "g",  ["δでるた"] = "d",
	["εいぷしろん"] = "e",  ["έ"] = "é",  ["ζぜーた"] = "z",  ["ηいーた"] = "i",  ["ή"] = "í",
	["θしーた"] = "th", ["ιいおた"] = "i",  ["ί"] = "í",  ["ϊ"] = "ï",  ["ΐ"] = "ḯ",
	["κかっぱ"] = "k",  ["λらむだ"] = "l",  ["μみゅー"] = "m",  ["νにゅー"] = "n",  ["ξくしー"] = "x",
	["οおみくろん"] = "o",  ["ό"] = "ó",  ["πぱい"] = "p",  ["ρろー"] = "r",  ["σしぐま"] = "s",
	["ς"] = "s",  ["τたう"] = "t",  ["υうぷしろん"] = "y",  ["ύ"] = "ý",  ["ϋ"] = "ÿ",
	["ΰ"] = "ÿ́",  ["φふぁい"] = "f",  ["χかい"] = "ch", ["ψぷさい"] = "ps", ["ωおめが"] = "o",
	["ώ"] = "ó",
	["Αあるふぁ"] = "A",  ["Ά"] = "Á",  ["Βべーた"] = "V",  ["Γがんま"] = "G",  ["Δでるた"] = "D",
	["Εいぷしろん"] = "E",  ["Έ"] = "É",  ["Ζぜーた"] = "Z",  ["Ηいーた"] = "I",  ["Ή"] = "Í",
	["Θしーた"] = "Th", ["Ιいおた"] = "I",  ["Ί"] = "Í",  ["Κかっぱ"] = "K",  ["Λらむだ"] = "L",
	["Μみゅー"] = "M",  ["Νにゅー"] = "N",  ["Ξくしー"] = "X",  ["Οおみくろん"] = "O",  ["Ό"] = "Ó",
	["Πぱい"] = "P",  ["Ρろー"] = "R",  ["Σしぐま"] = "S",  ["Τたう"] = "T",  ["Υうぷしろん"] = "Y",
	["Ύ"] = "Ý",  ["Φふぁい"] = "F",  ["Χかい"] = "Ch", ["Ψぷさい"] = "Ps", ["Ωおめが"] = "O",
	["Ώ"] = "Ó",
-- punctuation
	["·"] = ";",
}

-- transliterates any words or phrases
function export.tr(text, lang, sc)
	
	text = gsub(gsub(text, "χかい̌", "š"), "Χかい̌", "Š") -- dialectal
	text = gsub(gsub(text, "ά̤", "ä́"), "Ά̤", "Ä́") -- dialectal
	text = gsub(gsub(text, "αあるふぁ̤", "ä"), "Αあるふぁ̤", "Ä") -- dialectal
	text = gsub(gsub(text, "ό̤", "ö́"), "Ό̤", "Ö́") -- dialectal
	text = gsub(gsub(text, "οおみくろん̤", "ö"), "Οおみくろん̤", "Ö") -- dialectal

	text = gsub(text, "([^A-Za-z0-9])[;" .. erotimatiko .. "]", "%1?")

	text = gsub(text, "([αあるふぁεいぷしろんηいーたΑあるふぁΕいぷしろんΗいーた])([υύ])()",
				function (vowel, upsilon, position)
					-- Find next character that is not whitespace or punctuation.
					local following = ""
					while true do
						local next = sub(text, position, position)
						if next == "" then -- reached end of string
							break
						elseif next:find "[%s%p]" then
							position = position + 1
						else
							following = next
							break
						end
					end
					return tt[vowel]
						.. (upsilon == "ύ" and acute or "")
						.. ((following == "" or ("θκξπσςτφχψ"):find(following, 1, true)) and "f" or "v")
				end)

	text = gsub(text, "([αεοωΑΕΟΩ])([ηή])",
				function (vowel, ita)
					if ita == "ή" then
						return tt[vowel] .. "i" .. diaeresis .. acute
					else
						return tt[vowel] .. "i" .. diaeresis
					end
				end)

	text = gsub(text, "[ωおめがΩおめが][ιί]",
				{["ωおめがιいおた"] = "oï", ["ωί"] = "oḯ",
				 ["Ωおめがιいおた"] = "Oï", ["Ωί"] = "Oḯ"})

	text = gsub(text, "[οおみくろんΟおみくろん][υύ]",
				{["οおみくろんυうぷしろん"] = "ou", ["ού"] = "oú",
				 ["Οおみくろんυうぷしろん"] = "Ou", ["Ού"] = "Oú"})

	text = gsub(text, "(.?)([μみゅーΜみゅー])πぱい",
				function (before, mi)
					if before == "" or before == " " or before == "-" then
						if mi == "Μみゅー" then
							return before .. "B"
						else
							return before .. "b"
						end
					end
				end)

	text = gsub(text, "γがんま([γがんまξくしーχかい])", "n%1")

	text = gsub(text, ".", tt)

	return text
end

return export