モジュール:ain-kana-conv

このモジュールについての説明文ページを モジュール:ain-kana-conv/doc に作成できます

local export = {}

local CONSONANTS = {
    "p", "t", "c", "k",
    "m", "n", "s", "h",
    "w", "r", "y", "'"
}
local VOWELS = {
    "a", "i", "u", "e", "o",
    "á", "í", "ú", "é", "ó",
}
local ACCENT_CONVERSION_TABLE = {
    ["á"] = "a", ["í"] = "i", ["ú"] = "u", ["é"] = "e", ["ó"] = "o",
}

local CONVERSION_TABLE = {
    [ "a"] = "ア",  [ "i"] = "イ", [ "u"] = "ウ", [ "e"] = "エ", [ "o"] = "オ",
    ["'a"] = "ア",  ["'i"] = "イ", ["'u"] = "ウ", ["'e"] = "エ", ["'o"] = "オ",
    ["ka"] = "カ",  ["ki"] = "キ", ["ku"] = "ク", ["ke"] = "ケ", ["ko"] = "コ",
    ["sa"] = "サ",  ["si"] = "シ", ["su"] = "ス", ["se"] = "セ", ["so"] = "ソ",
    ["ta"] = "タ",                 ["tu"] = "ツ゚", ["te"] = "テ", ["to"] = "ト",
    ["Tu"] = "ツ",  ["tU"] = "ト゚",
    ["ca"] = "チャ", ["ci"] = "チ", ["cu"] = "チュ", ["ce"] = "チェ", ["co"] = "チョ",
    ["CA"] = "サ゚",               ["CU"] = "ス゚", ["CE"] = "セ゚", ["CO"] = "ソ゚",
    ["na"] = "ナ",  ["ni"] = "ニ", ["nu"] = "ヌ", ["ne"] = "ネ", ["no"] = "ノ",
    ["ha"] = "ハ",  ["hi"] = "ヒ", ["hu"] = "フ", ["he"] = "ヘ", ["ho"] = "ホ",
    ["pa"] = "パ",  ["pi"] = "ピ", ["pu"] = "プ", ["pe"] = "ペ", ["po"] = "ポ",
    ["ma"] = "マ",  ["mi"] = "ミ", ["mu"] = "ム", ["me"] = "メ", ["mo"] = "モ",
    ["ya"] = "ヤ",  ["yi"] = "イ", ["yu"] = "ユ", ["ye"] = "イェ", ["yo"] = "ヨ",
    ["ra"] = "ラ",  ["ri"] = "リ", ["ru"] = "ル", ["re"] = "レ", ["ro"] = "ロ",
    ["wa"] = "ワ",  ["wi"] = "ヰ",                ["we"] = "ヱ", ["wo"] = "ヲ",
    ["nn"] = "ン",  ["tt"] = "ッ"
}

local CODA_CONS = {
    ["w"] = "ゥ", ["y"] = "ィ",
    ["m"] = "ㇺ", ["n"] = "ㇴ", ["N"] = "𛅧",
    ["s"] = "ㇱ",["S"] = "ㇲ",
    ["p"] = "ㇷ゚", ["t"] = "ッ", ["T"] = "ㇳ", ["k"] = "ㇰ"
}

local CODA_VARA = {
    ["r"] = {
        ["a"] = "ㇻ", ["i"] = "ㇼ", ["u"] = "ㇽ", ["e"] = "ㇾ", ["o"] = "ㇿ"
    },
    ["h"] = {
        ["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ"
    },
    ["x"] = {
        ["a"] = "ㇵ", ["i"] = "ㇶ", ["u"] = "ㇷ", ["e"] = "ㇸ", ["o"] = "ㇹ"
    }
}

function in_values(item, items)
    for _, v in pairs(items) do
        if v == item then
            return true
        end
    end
    return false
end

function in_keys(item, items)
    for k, _ in pairs(items) do
        if k == item then
            return true
        end
    end
    return false
end


local function convert_syllable(syllable, next_char)
    local l_syllable = mw.ustring.len(syllable)

    local remains = syllable
    local coda = ""

    local last_char = mw.ustring.sub(syllable, mw.ustring.len(syllable), mw.ustring.len(syllable))

    if in_keys(last_char, CODA_CONS) then
        remains = mw.ustring.sub(remains, 1, l_syllable - 1)
        coda = CODA_CONS[last_char]
        -- ruunpe ルウㇴペ?ルウンペ? 暫定的に後者を取る
        if last_char == "n" and (next_char ~= nil and next_char ~= "") then
            coda = CONVERSION_TABLE['nn']
        end
    elseif in_keys(last_char, CODA_VARA) then
        remains = mw.ustring.sub(remains, 1, l_syllable - 1)
        local second_last_char = mw.ustring.sub(syllable, l_syllable - 1, l_syllable - 1)
        coda = CODA_VARA[last_char][second_last_char]
    end

    local accented_flag = false
    local nucleus = mw.ustring.sub(remains, mw.ustring.len(remains), mw.ustring.len(remains))
    
    if in_keys(nucleus, ACCENT_CONVERSION_TABLE) then
        accented_flag = true
        remains = mw.ustring.sub(remains, 1, mw.ustring.len(remains) - 1) .. ACCENT_CONVERSION_TABLE[nucleus]
    end

    

    if in_keys(remains, CONVERSION_TABLE) then
        remains = CONVERSION_TABLE[remains]
    elseif in_keys(mw.ustring.lower(remains), CONVERSION_TABLE) then
        remains = CONVERSION_TABLE[mw.ustring.lower(remains)]
    else
        error("cannot find katakana for CV pair: ‘" .. remains .. "’")
    end

    local converted = remains .. coda

    if accented_flag then
        converted = "<u style='text-decoration:overline;'>" .. converted .. "</u>"
    end
    return converted
end

local function do_convert(temp)
-- function export.do_convert(temp)
    -- Extensibility を考慮せねばならない
    -- # N ン 記号
    -- # Pawci-Kamuy 固有名詞
    -- # accent 記号やその他特別表記 %u %l はそれらを含む。%a はひらがな・カタカナも含むのでダメ。
    local ignore_chars = "%-=."
    local valid_pattern = "[%u%l'" .. ignore_chars .. "]+"
    
    -- TODO: hotne = ホッネ?ホㇳネ?
    -- TODO: wan e-tu ワㇴ エト゚? ワネト゚? 

    -- If contains more than alphabets and symbols -> e.g. {{ain-kana|hoy'oy|ヒオイオイ}} -> カナ表記 ヒオィオィ/ヒオイオイ
    -- -- Dectect カタカタ if detected do nothing but return the original value
    -- CONVERSION_TABLE にあるかどうかだけで判断するのは早計すぎたので、変更
    if not mw.ustring.match(temp, valid_pattern) then
        return temp
    end

    -- if in_values(mw.ustring.sub(temp, 1, 1), CONVERSION_TABLE) then
    --     return temp
    -- end

    -- Normalize
    -- # Lower
    -- temp = temp:lower() N のためにここで lower しない
    -- # Clear special characters such as "-", ".", "="
    temp = mw.ustring.gsub(temp, "[" .. ignore_chars .. "]", "")
    
    local group_ids = {}

    -- TODO: iyayiraykere = イヤィイラィケレ ? イヤイラィケレ? 暫定的に後者を取る

    -- Count syllables by vowels and save to a map with onset and nucleus marked
    local syllable_count = 1
    local i = 1
    for char in mw.ustring.gmatch(temp, ".") do
        -- print("Current Char (No. " .. tostring(i) .. "): " .. char)
        if in_values(char, VOWELS) then
            -- print("-- Current Vowel: " .. char)
            -- print("-- Char Before: " .. temp:sub(i - 1, i - 1))
            local char_before = mw.ustring.sub(temp, i - 1, i - 1)
            if in_values(char_before, CONSONANTS) or in_values(mw.ustring.lower(char_before), CONSONANTS) then
                -- print("---- Char Before is Consonant")
                group_ids[i - 1] = syllable_count
            end
            group_ids[i] = syllable_count

            syllable_count = syllable_count + 1
        end
        i = i + 1
    end

    local str_buffer = ""
    for i = 1, mw.ustring.len(temp) do
        if group_ids[i] ~= nil then
            str_buffer = str_buffer .. group_ids[i]
        else
            str_buffer = str_buffer .. "X"
        end
    end
    -- error("group_indicies: " .. str_buffer .. "<br>" .. "group_contents: " .. temp)

    -- Fill codas
    local i = 1

    for char in mw.ustring.gmatch(temp, ".") do
        if group_ids[i] == nil then
            group_ids[i] = group_ids[i - 1]
        end
        i = i + 1
    end

    local result = ""
    local i = 1
    local current_group_id = 1
    local head = 1
    local tail = 1
    local content = ""

    -- while i < mw.ustring.len(temp) do
    for i = 1, mw.ustring.len(temp) do
        -- print("group_id " .. tostring(group_ids[i]) .. "  " .. tostring(temp:sub(i, i)))
        if group_ids[i] ~= current_group_id then
            current_group_id = group_ids[i]
            tail = i - 1
            content = mw.ustring.sub(temp, head, tail)
            -- print("(head, tail) = " .. tostring(head) .. " " .. tostring(tail))
            -- print(content)
            result = result .. convert_syllable(content, mw.ustring.sub(temp, i, i))
            head = i
        end
        -- i = i + 1
    end
    content = mw.ustring.sub(temp, head, mw.ustring.len(temp))
    -- print(content)
    result = result .. convert_syllable(content)
    
    return result
end

-- local function valid_ainu_word(word)
--     -- TODO:
-- end


function export.debug(word)
    return do_convert(word)
end

function export.convert(frame)
    -- Get Args
    local params = {
        [1] = {list = true, allow_holes = true}
    }
    local args = require("Module:parameters").process(frame:getParent().args, params)

    -- Do Conversion
    local items = {}
    for i = 1, math.max(args[1].maxindex, 1) do
        local original_str = args[1][i]
        if not original_str or original_str == "" then
            original_str = mw.title.getCurrentTitle().text
        -- else
            -- original_str = frame:preprocess(original_str)
        end

        local converted_words = {}
        for word in mw.text.gsplit(original_str, " ") do
            -- error(do_convert(word))
            table.insert(converted_words, do_convert(word))
        end
        table.insert(items, table.concat(converted_words, " "))
    end

    return table.concat(items, "/")
end



return export