モジュール:ain-kana-conv
このモジュールについての説明文ページを モジュール:ain-kana-conv/doc に作成できます
local export = {}
local CONSONANTS = {
"p", "t", "c", "k",
"m", "n", "s", "h",
"w", "r", "y", "'"
}
local VOWELS = {
"a", "i", "u", "e", "o",
"á", "í", "ú", "é", "ó",
}
local ACCENT_CONVERSION_TABLE = {
["á"] = "a", ["í"] = "i", ["ú"] = "u", ["é"] = "e", ["ó"] = "o",
}
local CONVERSION_TABLE = {
[ "a"] = "ア", [ "i"] = "イ", [ "u"] = "ウ", [ "e"] = "エ", [ "o"] = "オ",
["'a"] = "ア", ["'i"] = "イ", ["'u"] = "ウ", ["'e"] = "エ", ["'o"] = "オ",
["ka"] = "カ", ["ki"] = "キ", ["ku"] = "ク", ["ke"] = "ケ", ["ko"] = "コ",
["sa"] = "サ", ["si"] = "シ", ["su"] = "ス", ["se"] = "セ", ["so"] = "ソ",
["ta"] = "タ", ["tu"] = "ト゚", ["te"] = "テ", ["to"] = "ト",
["ca"] = "チャ", ["ci"] = "チ", ["cu"] = "チュ", ["ce"] = "チェ", ["co"] = "チョ",
["CA"] = "サ゚", ["CU"] = "ス゚", ["CE"] = "セ゚", ["CO"] = "ソ゚",
["na"] = "ナ", ["ni"] = "ニ", ["nu"] = "ヌ", ["ne"] = "ネ", ["no"] = "ノ",
["ha"] = "ハ", ["hi"] = "ヒ", ["hu"] = "フ", ["he"] = "ヘ", ["ho"] = "ホ",
["pa"] = "パ", ["pi"] = "ピ", ["pu"] = "プ", ["pe"] = "ペ", ["po"] = "ポ",
["ma"] = "マ", ["mi"] = "ミ", ["mu"] = "ム", ["me"] = "メ", ["mo"] = "モ",
["ya"] = "ヤ", ["yi"] = "イ", ["yu"] = "ユ", ["ye"] = "イェ", ["yo"] = "ヨ",
["ra"] = "ラ", ["ri"] = "リ", ["ru"] = "ル", ["re"] = "レ", ["ro"] = "ロ",
["wa"] = "ワ", ["wi"] = "ヰ", ["we"] = "ヱ", ["wo"] = "ヲ",
["nn"] = "ン", ["tt"] = "ッ"
}
local CODA_CONS = {
["w"] = "ゥ", ["y"] = "ィ",
["m"] = "ㇺ", ["n"] = "ㇴ", ["N"] = "𛅧",
["s"] = "ㇱ",["S"] = "ㇲ",
["p"] = "ㇷ゚", ["t"] = "ッ", ["T"] = "ㇳ", ["k"] = "ㇰ"
}
local CODA_VARA = {
["r"] = {
["a"] = "ㇻ", ["i"] = "ㇼ", ["u"] = "ㇽ", ["e"] = "ㇾ", ["o"] = "ㇿ"
},
["h"] = {
["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ"
},
["x"] = {
["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ"
}
}
local VARIANT_TABLE = {
-- tu
["ト゚"] = { "ツ゚", "トゥ" },
-- -n
["ㇴ"] = { "ン" },
-- wV -> wlV
["ヱ"] = { "ウェ" },
["ヰ"] = { "ウィ" },
["ヲ"] = { "ウォ" },
-- pp -> tp
["ㇷ゚パ"] = { "ッパ" },
["ㇷ゚ピ"] = { "ッピ" },
["ㇷ゚ペ"] = { "ッペ" },
["ㇷ゚プ"] = { "ップ" },
["ㇷ゚ポ"] = { "ッポ" },
-- kk -> tk
["ㇰカ"] = { "ッカ" },
["ㇰキ"] = { "ッキ" },
["ㇰケ"] = { "ッケ" },
["ㇰク"] = { "ック" },
["ㇰコ"] = { "ッコ" },
-- -y -> i
["ィ"] = { "イ" },
-- -w -> u
["ゥ"] = { "ウ" },
-- -r(_a/_i/_e/_o) -> -r(_u)
["ㇻ"] = { "ㇽ" },
["ㇼ"] = { "ㇽ" },
["ㇾ"] = { "ㇽ" },
["ㇿ"] = { "ㇽ" },
}
function in_values(item, items)
for _, v in pairs(items) do
if v == item then
return true
end
end
return false
end
function in_keys(item, items)
for k, _ in pairs(items) do
if k == item then
return true
end
end
return false
end
local function convert_syllable(syllable, next_char)
local l_syllable = mw.ustring.len(syllable)
local remains = syllable
local coda = ""
local last_char = mw.ustring.sub(syllable, mw.ustring.len(syllable), mw.ustring.len(syllable))
if in_keys(last_char, CODA_CONS) then
remains = mw.ustring.sub(remains, 1, l_syllable - 1)
coda = CODA_CONS[last_char]
-- ruunpe ルウㇴペ?ルウンペ? 暫定的に後者を取る
if last_char == "n" and (next_char ~= nil and next_char ~= "") then
coda = CONVERSION_TABLE['nn']
end
elseif in_keys(last_char, CODA_VARA) then
remains = mw.ustring.sub(remains, 1, l_syllable - 1)
local second_last_char = mw.ustring.sub(syllable, l_syllable - 1, l_syllable - 1)
coda = CODA_VARA[last_char][second_last_char]
end
local accented_flag = false
local nucleus = mw.ustring.sub(remains, mw.ustring.len(remains), mw.ustring.len(remains))
if in_keys(nucleus, ACCENT_CONVERSION_TABLE) then
accented_flag = true
remains = mw.ustring.sub(remains, 1, mw.ustring.len(remains) - 1) .. ACCENT_CONVERSION_TABLE[nucleus]
end
if in_keys(remains, CONVERSION_TABLE) then
remains = CONVERSION_TABLE[remains]
elseif in_keys(mw.ustring.lower(remains), CONVERSION_TABLE) then
remains = CONVERSION_TABLE[mw.ustring.lower(remains)]
else
error("cannot find katakana for CV pair: ‘" .. remains .. "’")
end
local converted = remains .. coda
if accented_flag then
converted = "<u style='text-decoration:overline;'>" .. converted .. "</u>"
end
return converted
end
---@param result string
---@param variant_keys string[]
---@param index number
local function apply_variants(result, variant_keys, index)
if index > #variant_keys then
return { result }
end
local original = variant_keys[index]
local variations = VARIANT_TABLE[original]
local all_results = { result }
for _, variation in ipairs(variations) do
local new_result = result:gsub(original, variation)
table.insert(all_results, new_result)
end
local final_results = {}
for _, res in ipairs(all_results) do
local sub_results = apply_variants(res, variant_keys, index + 1)
for _, sub_res in ipairs(sub_results) do
table.insert(final_results, sub_res)
end
end
return final_results
end
---@param target string
local function generate_variants(target)
local variant_keys = {}
for original, _ in pairs(VARIANT_TABLE) do
if target:find(original) then
table.insert(variant_keys, original)
end
end
return apply_variants(target, variant_keys, 1)
end
local function do_convert(temp)
-- function export.do_convert(temp)
-- Extensibility を考慮せねばならない
-- # N ン 記号
-- # Pawci-Kamuy 固有名詞
-- # accent 記号やその他特別表記 %u %l はそれらを含む。%a はひらがな・カタカナも含むのでダメ。
local ignore_chars = "%-=."
local valid_pattern = "[%u%l'" .. ignore_chars .. "]+"
-- TODO: hotne = ホッネ?ホㇳネ?
-- TODO: wan e-tu ワㇴ エト゚? ワネト゚?
-- If contains more than alphabets and symbols -> e.g. {{ain-kana|hoy'oy|ヒオイオイ}} -> カナ表記 ヒオィオィ/ヒオイオイ
-- -- Dectect カタカタ if detected do nothing but return the original value
-- CONVERSION_TABLE にあるかどうかだけで判断するのは早計すぎたので、変更
if not mw.ustring.match(temp, valid_pattern) then
return temp
end
-- if in_values(mw.ustring.sub(temp, 1, 1), CONVERSION_TABLE) then
-- return temp
-- end
-- Normalize
-- # Lower
-- temp = temp:lower() N のためにここで lower しない
-- # Clear special characters such as "-", ".", "="
temp = mw.ustring.gsub(temp, "[" .. ignore_chars .. "]", "")
local group_ids = {}
-- TODO: iyayiraykere = イヤィイラィケレ ? イヤイラィケレ? 暫定的に後者を取る
-- Count syllables by vowels and save to a map with onset and nucleus marked
local syllable_count = 1
local i = 1
for char in mw.ustring.gmatch(temp, ".") do
-- print("Current Char (No. " .. tostring(i) .. "): " .. char)
if in_values(char, VOWELS) then
-- print("-- Current Vowel: " .. char)
-- print("-- Char Before: " .. temp:sub(i - 1, i - 1))
local char_before = mw.ustring.sub(temp, i - 1, i - 1)
if in_values(char_before, CONSONANTS) or in_values(mw.ustring.lower(char_before), CONSONANTS) then
-- print("---- Char Before is Consonant")
group_ids[i - 1] = syllable_count
end
group_ids[i] = syllable_count
syllable_count = syllable_count + 1
end
i = i + 1
end
local str_buffer = ""
for i = 1, mw.ustring.len(temp) do
if group_ids[i] ~= nil then
str_buffer = str_buffer .. group_ids[i]
else
str_buffer = str_buffer .. "X"
end
end
-- error("group_indicies: " .. str_buffer .. "<br>" .. "group_contents: " .. temp)
-- Fill codas
local i = 1
for char in mw.ustring.gmatch(temp, ".") do
if group_ids[i] == nil then
group_ids[i] = group_ids[i - 1]
end
i = i + 1
end
local result = ""
local current_group_id = 1
local head = 1
local tail = 1
local content = ""
for i = 1, mw.ustring.len(temp) do
if group_ids[i] ~= current_group_id then
current_group_id = group_ids[i]
tail = i - 1
content = mw.ustring.sub(temp, head, tail)
result = result .. convert_syllable(content, mw.ustring.sub(temp, i, i))
head = i
end
end
content = mw.ustring.sub(temp, head, mw.ustring.len(temp))
result = result .. convert_syllable(content)
return result
end
-- local function valid_ainu_word(word)
-- -- TODO:
-- end
function export.debug(word)
return do_convert(word)
end
function export.convert(frame)
-- Get Args
local params = {
[1] = {list = true, allow_holes = true}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
-- Do Conversion
local items = {}
local unqiue = {}
for i = 1, math.max(args[1].maxindex, 1) do
local original_str = args[1][i]
if not original_str or original_str == "" then
original_str = mw.title.getCurrentTitle().text
-- else
-- original_str = frame:preprocess(original_str)
end
local converted_words = {}
for word in mw.text.gsplit(original_str, " ") do
-- error(do_convert(word))
table.insert(converted_words, do_convert(word))
end
local converted_str = table.concat(converted_words, " ")
local variants = generate_variants(converted_str)
for _, variant in ipairs(variants) do
if not unqiue[variant] then
unqiue[variant] = true
table.insert(items, variant)
end
end
end
return table.concat(items, "/")
end
function export.no_variants(frame)
-- Get Args
local params = {
[1] = { list = true, allow_holes = true }
}
local args = require("Module:parameters").process(frame:getParent().args, params)
-- Do Conversion
local items = {}
local unqiue = {}
for i = 1, math.max(args[1].maxindex, 1) do
local original_str = args[1][i]
if not original_str or original_str == "" then
original_str = mw.title.getCurrentTitle().text
end
local converted_words = {}
for word in mw.text.gsplit(original_str, " ") do
table.insert(converted_words, do_convert(word))
end
local converted_str = table.concat(converted_words, " ")
if not unqiue[converted_str] then
unqiue[converted_str] = true
table.insert(items, converted_str)
end
end
return table.concat(items, "/")
end
return export