モジュール:ja-headword

このモジュールについての説明文ページをモジュール:ja-headword/doc に作成できます
local m_ja = require("モジュール:ja")
local m_links = require("モジュール:links")
local u = require("Module:string utilities").char

local find = mw.ustring.find

local export = {}
local pos_functions = {}

local lang = require("モジュール:languages").getByCode("ja")
local sc = require("モジュール:scripts").getByCode("Jpan")
local Latn = require("モジュール:scripts").getByCode("Latn")

local Japanese_symbols = '%ｰ・＝？！。、'
local katakana_range = 'ァ-ヺーヽヾ'
local hiragana_range = 'ぁ-ゖーゞゝ'
local kana_range = katakana_range .. hiragana_range .. Japanese_symbols
local kanji_range =
	u(0x2E80) .. "-" .. u(0x2FDF) .. -- CJK Unified Ideographs
	u(0x4E00) .. "-" .. u(0x9FFF) .. -- CJK Unified Ideographs
	u(0x3400) .. "-" .. u(0x4DBF) .. -- CJK Unified Ideographs Extension A
	u(0xF900) .. "-" .. u(0xFAFF) .. -- CJK Compatibility Ideographs
	u(0x20000) .. "-" .. u(0x2A6DF) .. -- CJK Unified Ideographs Extension B
	u(0x2A700) .. "-" .. u(0x2EE5F) .. -- CJK Unified Ideographs Extension C-F & I
	u(0x2F800) .. "-" .. u(0x2FA1F) .. -- CJK Compatibility Ideographs Supplement
	u(0x30000) .. "-" .. u(0x323AF) -- CJK Unified Ideographs Extension C-F & I
local Japanese_scripts_range = kana_range .. kanji_range

local katakana_pattern = '^[' .. katakana_range .. Japanese_symbols .. ']*$'
local hiragana_pattern = '^[' .. hiragana_range .. Japanese_symbols .. ']*$'
local kana_pattern = '^[' .. kana_range .. ']*$'
local kana_pattern_full = '^[、' .. kana_range .. '%s%.%-%^]*$'
local kana_pattern_char = '[、' .. kana_range .. '%s%.%-%^]'

local detect_kana_script = require("モジュール:fun").memoize(function(kana)
	if find(kana, katakana_pattern) then
		return 'kata'
	elseif find(kana, hiragana_pattern) then
		return 'hira'
	elseif find(kana, kana_pattern) then
		return 'both'
	else
		return nil
	end
end)

local function kana_to_romaji(kana, data, args)
	-- make adjustments for -u verbs and -i adjectives by placing a period before the last character
	-- to prevent romanizing long vowels with macrons
	if (data.pos_category == "動詞") or (data.pos_category == "形容詞" and (args["infl"] == "i" or args["infl"] == "い")) then
		kana = mw.ustring.gsub(kana,'([うい])$','.%1')
	end
	local romaji = m_ja.kana_to_romaji(kana)

	-- init caps for proper nouns
	if data.pos_category == "固有名詞" then
		romaji = mw.ustring.gsub(romaji, "^%l", mw.ustring.upper)
		romaji = mw.ustring.gsub(romaji, " %l", mw.ustring.upper)
		romaji = mw.ustring.gsub(romaji, "-%l", mw.ustring.upper)
	end

	-- hyphens for prefixes, suffixes, and counters (classifiers)
	if data.pos_category == "接頭辞" then
		return romaji .. "-"
	elseif data.pos_category == "接尾辞" or data.pos_category == "助数詞" or data.pos_category == "類別詞" then
		return "-" .. romaji
	else
		return romaji
	end
end

local function ends_in_iru_eru(kana)
	if mw.ustring.sub(kana, -1) ~= "る" then
		return false
	end

	local wanted = {
		["い"]=1, ["き"]=1, ["し"]=1, ["ち"]=1, ["に"]=1, ["ひ"]=1, ["み"]=1, ["り"]=1, ["ゐ"]=1,
		["ぃ"]=1, ["ぎ"]=1, ["じ"]=1, ["ぢ"]=1, ["び"]=1, ["ぴ"]=1,
		["え"]=1, ["け"]=1, ["せ"]=1, ["て"]=1, ["ね"]=1, ["へ"]=1, ["め"]=1, ["れ"]=1, ["ゑ"]=1,
		["ぇ"]=1, ["げ"]=1, ["ぜ"]=1, ["で"]=1, ["べ"]=1, ["ぺ"]=1,

		["イ"]=1, ["キ"]=1, ["シ"]=1, ["チ"]=1, ["ニ"]=1, ["ヒ"]=1, ["ミ"]=1, ["リ"]=1, ["ヰ"]=1,
		["ィ"]=1, ["ギ"]=1, ["ジ"]=1, ["ヂ"]=1, ["ビ"]=1, ["ピ"]=1,
		["エ"]=1, ["ケ"]=1, ["セ"]=1, ["テ"]=1, ["ネ"]=1, ["ヘ"]=1, ["メ"]=1, ["レ"]=1, ["ヱ"]=1,
		["ェ"]=1, ["ゲ"]=1, ["ゼ"]=1, ["デ"]=1, ["ベ"]=1, ["ペ"]=1, 
	}

	return wanted[mw.ustring.sub(kana, -2, -2)]
end

local ja_numerals = {
	"1", "2", "3", "4", "5",
	"6", "7", "8", "9", "10",
	"11", "12", "13", "14", "15"
}

local ja_grades = {
	"第1学年で習う", "第2学年で習う", "第3学年で習う",
	"第4学年で習う", "第5学年で習う", "第6学年で習う",
	"中学校で習う", "人名用", "表外"
}

-- adds category Japanese terms spelled with jōyō kanji or Japanese terms spelled with non-jōyō kanji
-- (if it contains any kanji)
local function categorize_by_kanji(data, PAGENAME)
	-- remove non-kanji characters
	local onlykanji = mw.ustring.gsub(PAGENAME, '[^一-鿌]', '')

	local number_of_kanji = mw.ustring.len(onlykanji)
	if number_of_kanji > 0 then
		for i=1,mw.ustring.len(onlykanji) do
			table.insert(data.categories, ("%s漢字が含まれる日本語"):format(ja_grades[m_ja.kanji_grade(mw.ustring.sub(onlykanji,i,i))]))
		end

		-- categorize by number of kanji
		if number_of_kanji == 1 then
			table.insert(data.categories, "漢字が1個含まれる日本語")
		elseif ja_numerals[number_of_kanji] then
			table.insert(data.categories, ("漢字が%s個含まれる日本語"):format(ja_numerals[number_of_kanji]))
		end
	end
end

-- if this term is composed of only a single kanji, it does not have kanjitab/kanji reading tab
-- which generate "Japanese terms spelled with .. " categories, and since it is only one kanji
-- we know the kanji reading
-- (this category is for maintenance because many of these need attention)
local function singlekanji_term(data, PAGENAME)
	if mw.ustring.len(PAGENAME) == 1 and mw.ustring.match(PAGENAME, '[一-鿌]') then
		table.insert(data.categories, "日本語 文字PAGENAME .. を含む語")
		table.insert(data.categories, "日本語 単漢字")
	end
end

-- get a kana form to use, in order of preference: unnamed, hira, kana, pagename
local function find_kana(args, PAGENAME)
	for i,arg in ipairs(args) do
		if args[i] then
			local unlinked_value = m_links.remove_links(args[i])
			if find(unlinked_value, kana_pattern_full) then
				return unlinked_value
			end
		end
	end
	if find(PAGENAME, kana_pattern_full) then return PAGENAME end
	local hira = args["hira"] or ""; if hira ~= "" then return hira end
	local kata = args["kata"] or ""; if kata ~= "" then return kata end
	error("読み仮名は必須です。無名引数で読み仮名を指定してください。")
end

local function find_kanji(args, PAGENAME, data)
	for i, arg in ipairs(args) do
		if args[i] and find(args[i], "[" .. kanji_range .. "]") then
			table.insert(data.kanji, args[i])
		end
	end
end

-- go through args and build inflections by finding whatever kanas were given to us
local function find_inflections(args, data, PAGENAME, conj, suru_in_pagename)
	local detect_result = detect_kana_script(PAGENAME)
	local function romanization(auto_rom)
		-- accept the automatic romanization generated in function kana_to_romaji() above
		-- compare that to the manual romanization if it exists and add it to inflections
		local rom = args["rom"] or ""
		if rom == "" then rom = auto_rom end

		-- check auto rom against manual and put in hidden category if they differ
		if rom ~= auto_rom then
			table.insert(data.categories, "ローマ字に注意が必要な日本語")
		end

		-- throw an error if there is no romanization
		if rom == "" then
			error("仮名読みが必要です。")
		end

		-- 日本語版ウィクショナリーにローマ字表記は必要なさそうなので除外
		-- -- add romaji
		-- -- add link manually for WT:ACCEL unless headword is for suru verb
		-- if data.pos_category == "suru verbs" then
		-- 	table.insert(data.inflections, {label = "ローマ字", "[[" .. rom .. "]] [[suru]]", sc = Latn})
		-- elseif detect_result then
		-- 	-- only accelerate romaji creation for kana entries
		-- 	table.insert(data.inflections, {label = "ローマ字", accel = "romanized-form-of", rom, sc = Latn})
		-- else
		-- 	table.insert(data.inflections, {label = "ローマ字", rom, sc = Latn})
		-- end
	end

	local allkana,original,readings,romajis,romaji_lookup = {},{},{},{},{}

	for i,arg in ipairs(args) do
		if arg and arg ~= "" then
			local unlinked_value = m_links.remove_links(arg)
			if find(unlinked_value, kana_pattern_full) then
				table.insert(allkana, arg)
			end
		end
	end

	-- accept "hira" and "kata" but let Lua decide if they are really hiragana or katakana
	if args["hira"] and args["hira"] ~= "" and find(args["hira"], kana_pattern_full) then table.insert(allkana, args["hira"]) end
	if args["kata"] and args["kata"] ~= "" and find(args["kata"], kana_pattern_full) then table.insert(allkana, args["kata"]) end

	if find(PAGENAME, kana_pattern_full) then
		if #allkana == 0 then table.insert(allkana, PAGENAME) end
	end

	for i = 1, #allkana do	
		-- auto_romanization
		romajis[i] = kana_to_romaji(allkana[i], data, args)
		-- remove markup
		table.insert(original,allkana[i])
		allkana[i] = mw.ustring.gsub(allkana[i], '[%s%.%-%^]', '')
	end
	for i = 1, #allkana do
		-- if this is not kana, blank it out
		if allkana and not mw.ustring.match(allkana[i], kana_pattern_char) then
			allkana[i] = ""
		else
			-- if this is kana, count it as another effective reading (ignoring hiragana-katakana distinction)
			readings[m_ja.kata_to_hira(allkana[i])] = 1
		end
		-- only if this kana is different from the page name
		if allkana[i] ~= PAGENAME and allkana[i] ~= "" then

			-- かな表記をtranslitとする。
			if data.pos_category == "動詞" and conj == "する" and not suru_in_pagename then
				table.insert(data.translits, allkana[i] .. "する")
			else
				table.insert(data.translits, allkana[i])
			end
		end

		-- do the romanization business if it passes through every check
		local undergo_romanization = true
		if allkana[i] ~= "" then
			if allkana[i] == PAGENAME and not find(PAGENAME, kana_pattern_full) then
				undergo_romanization = false
			else
				for j=i+1, #allkana do
					if allkana[j] and romajis[i] == romajis[j] then
						undergo_romanization = false
					end
				end
			end
		end
		if undergo_romanization then romanization(romajis[i]) end
	end

	local reki = args["hhira"] or args["reki"] or ""
	if reki ~= "" then
		if data.pos_category == "動詞" and conj == "する" and not suru_in_pagename then
			table.insert(data.inflections, {label = "歴史的仮名遣い", "[[" .. reki .. "]][[する]]"})
		else
			table.insert(data.inflections, {label = "歴史的仮名遣い", reki})
		end
	end

	local num_readings = 0
	for _ in pairs(readings) do
		num_readings = num_readings + 1
	end
	
	if num_readings > 1 then
		---------------------------------------------------------------------------------
		-- 2020/04/08 日本語版ウィクショナリーで扱っていないカテゴリをコメントアウト。
		-- 必要に応じて復帰してください。
		---------------------------------------------------------------------------------
		-- table.insert(data.categories, "日本語 複数読み")
		if data.pos_category == "和語の漢字表記" then
			table.insert(data.categories, "複数訓")
		end
	end
end

-- categorize by the script of the pagename or specific characters contained in it
local function extra_categorization(data, PAGENAME, alphabet_category)
	-- if PAGENAME is hiragana, put in that category, same for katakana (but do it at the end)
	---------------------------------------------------------------------------------
	-- 2020/04/08 日本語版ウィクショナリーで扱っていないカテゴリをコメントアウト。
	-- 必要に応じて復帰してください。
	---------------------------------------------------------------------------------
	-- if detect_kana_script(PAGENAME) == 'hira' then table.insert(data.categories, "日本語 平仮名") end
	-- if detect_kana_script(PAGENAME) == 'kata' then table.insert(data.categories, "日本語 片仮名") end
	if find(PAGENAME, "[^" .. Japanese_scripts_range .. "]") and find(PAGENAME, '[' .. Japanese_scripts_range .. ']') then
		table.insert(data.categories, "日本語 字種混合表記") end
	if not find(PAGENAME, "[" .. Japanese_scripts_range .. "]") then
		table.insert(alphabet_category, "日本語 アルファベット記述") end

	for _,character in ipairs({'々','〆','ヶ','ゝ','ゞ','ヽ','ヾ','ゐ','ヰ','ゑ','ヱ','ゔ','ヷ','ヸ','ヹ','ヺ','・','＝','゠'}) do
		if mw.ustring.match(PAGENAME,character) then
			table.insert(data.categories, ("日本語 文字%sを含む語"):format(character))
		end
	end
end

pos_functions["名詞"] = function(args, data)
	local counter = args["count"] or ""
	local counters = {label = "助数詞"}
	
	if counter ~= "" then
		for v in mw.text.gsplit(counter, ',') do
			table.insert(counters, v)
		end
		table.insert(data.inflections, counters)
	end
end

pos_functions["漢字表記語の仮名表記"] = function(args, data)
	local homophones = args["h"] or args["homophones"] or ""
	if homophones ~= "" then
		table.insert(data.categories, "日本語 同音異義")
	end
end

-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
	local args = frame:getParent().args
	PAGENAME = args["pagename"] or mw.title.getCurrentTitle().text
	local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
	local conj = frame.args[2] or ""

	local head = args["head"] or mw.ustring.gsub(PAGENAME, "[" .. kanji_range .. "]", '[[%0]]')

	local suru_in_pagename = false
	if poscat == "動詞" and conj == "する" then
		local base, count = head:gsub("する$", "")
		if count == 1 then
			suru_in_pagename = true
			require("モジュール:debug").track("ja-headword/suru in pagename")
		end
		head = base .. "[[する]]"
	end

	if args["decl"] and (not args["infl"] or args["infl"] == "") then
		args["infl"] = args["decl"]
	end
	
	local data = {lang = lang, sc = sc, pos_category = poscat, categories = {}, heads = {head}, inflections = {}, translits = {}, kanji = {}}
	local alphabet_category = {}
	
	local kana = find_kana(args, PAGENAME)
	
	-- the presence of kyūjitai param indicates that this is shinjitai kanji entry and vice versa
	local kyu = args["kyu"] or ""
	local shin = args["shin"] or ""
	
	find_kanji(args, PAGENAME, data)
	if data.pos_category == "動詞" and conj == "する" and not suru_in_pagename then
		for i, v in ipairs(data.kanji) do
			data.kanji[i] = "[[" .. v .. "]]する" 
		end
	end
	
	if kyu == "" then
		kyu = nil
	else
		if data.pos_category == "動詞" and conj == "する" and not suru_in_pagename then
			table.insert(data.inflections, {label = "旧字体", "[[" .. kyu .. "]][[する]]"})
		else
			table.insert(data.inflections, {label = "旧字体", kyu})
		end
	end
	
	if shin ~= "" then
		if data.pos_category == "動詞" and conj == "する" and not suru_in_pagename then
			table.insert(data.inflections, {label = "新字体", "[[" .. shin .. "]][[する]]"})
		else
			table.insert(data.inflections, {label = "新字体", shin})
		end
	end

	-- add certain "inflections" and categories for adjectives, verbs, or nouns
	if pos_functions[poscat] then
		pos_functions[poscat](args, data)
	end

	if data.pos_category == "名詞" and conj == "する" then
		table.insert(data.categories, "日本語 名詞 サ変動詞")
	elseif data.pos_category == "副詞" and conj == "する" then
		table.insert(data.categories, "日本語 副詞 サ変動詞")
	end
	
	-- sort out all the kanas and do the romanization business
	find_inflections(args, data, PAGENAME, conj, suru_in_pagename)

	---------------------------------------------------------------------------------
	-- 2020/04/08 日本語版ウィクショナリーで扱っていないカテゴリをコメントアウト。
	-- 必要に応じて復帰してください。
	---------------------------------------------------------------------------------
	-- -- categorize by joyo kanji and number of kanji
	-- categorize_by_kanji(data, PAGENAME)
	-- -- generate "Japanese terms spelled with ... read as ..." for single-kanji terms
	-- singlekanji_term(data, PAGENAME)
	-- add categories for terms with iteration marks (which are not kanji and hence are not categorized by ja-kanjitab)
	extra_categorization(data, PAGENAME, alphabet_category)
	
	if find(PAGENAME, "[ァ-ヺヽヾ]") and find(PAGENAME, "[ぁ-ゖゞゝ]") and data.pos_category ~= "proverbs" and data.pos_category ~= "phrases" then
		table.insert(data.categories, "日本語 仮名混用")
	end
	
	-- 読み仮名がページ名と異なる場合にのみ sort-base として読み仮名を渡す
	if kana ~= PAGENAME then
		-- convert sortkey to katakana version for katakana terms category (should sort by katakana)
		data.sort_base = kana
	end

	return
		require("モジュール:headword").full_headword(data) ..
		require("モジュール:utilities").format_categories(alphabet_category, lang)
end

function export.show_head(frame)
    local pat_kanji = '[一-鿌]'
    local pat_non_kana = '[^ぁ-ゖヺーヽヾァ-ヶーゞゝ%ｰ・＝？！。、]'

    local function show_kango(args, pagename)
        local kana_forms = {}
        for _, v in ipairs(args) do
            table.insert(kana_forms, v)
        end
        kana_forms = #kana_forms > 0 and '（' .. table.concat(kana_forms, '、') .. '）' or error('仮名の入力が必要です')
        pagename = mw.ustring.gsub(pagename, pat_kanji, ' [[%0]] '):gsub('^ +', ''):gsub(' +$', ''):gsub(' +', ' ')
        return "'''" .. pagename .. "'''" .. kana_forms
    end

    local function show_wago(args, pagename)
        local kanji_forms = {}
        for _, v in ipairs(args) do
            table.insert(kanji_forms, (mw.ustring.gsub(v, pat_kanji, '[[%0]]')))
        end
        kanji_forms = #kanji_forms > 0 and '【' .. table.concat(kanji_forms, '・') .. '】' or ''
        return "'''" .. pagename .. "'''" .. kanji_forms
    end

    local pos = frame.args[1] or error('品詞の入力が必要です')
    local args = frame:getParent().args
    local pagename = args['pagename'] or mw.title.getCurrentTitle().text

    local reki = args['reki'] and '（<small>歴史的仮名遣い: </small>[[' .. args['reki'] .. ']]）' or ''
    local sort = args['sort'] and '|' .. args['sort'] or ''

    local head
    if mw.ustring.find(pagename, pat_non_kana) then
        head = show_kango(args, pagename)
    else
        head = show_wago(args, pagename)
    end

    return head .. reki .. '[[カテゴリ:日本語' .. sort ..']][[カテゴリ:日本語 ' .. pos .. sort .. ']]'
end

return export