「モジュール:headword/data」の版間の差分

削除された内容 追加された内容
古典日本語はtransliteration要らん。
en:Module:headword/data 最新版とマージ
47行目:
"hanzi",
"hanja",
"ideophones",
"idioms",
"infixes",
65 ⟶ 66行目:
"phrases",
"postpositions",
"postpositional phrases",
"predicatives",
"prefixes",
100 ⟶ 102行目:
"adjectival participles",
"adjective forms",
"adjective comparative forms", -- TODO: replace with "comparative adjectives" or "comparative adjective forms", depending on which one the word is
"adjective feminine forms",
"adjective equative forms",
"adjective plural forms",
"adjective superlative forms", -- TODO: replace with "superlative adjectives" or "superlative adjective forms", depending on which one the word is
"adverb forms",
"adverb comparative forms", -- TODO: replace with "comparative adverbs" or "comparative adverb forms", depending on which one the word is
"adverb superlative forms", -- TODO: replace with "superlative adverbs" or "superlative adverb forms", depending on which one the word is
"adverbial participles",
"agent participles",
123 ⟶ 120行目:
"determiner superlative forms",
"diminutive nouns",
"equative adjective forms",
"equative adjectives",
"future participles",
"gerunds",
136 ⟶ 135行目:
"noun dual forms",
"noun forms",
"noun paucal forms",
"noun plural forms",
"noun possessive forms",
167行目:
"rafsi",
"romanizations",
"root forms",
"singulatives",
"suffix forms",
182 ⟶ 183行目:
"名詞 複数形",
"和語の漢字表記",
}
 
-- These languages will not have "LANG multiword terms" categories added.
data.no_multiword_cat = {
-------- Languages without spaces between words (sometimes spaces between phrases) --------
"aho", -- Ahom
"blt", -- Tai Dam
"ja", -- Japanese
"khb", -- Lü
"km", -- Khmer
"lo", -- Lao
"mnw", -- Mon
"my", -- Burmese
"nan", -- Min Nan (some words in Latin script; hyphens between syllables)
"nod", -- Northern Thai
"ojp", -- Old Japanese
"tdd", -- Tai Nüa
"th", -- Thai
"tts", -- Isan
"twh", -- Tai Dón
"shn", -- Shan
"sou", -- Southern Thai
"zh", -- Chinese (all varieties with Chinese characters)
 
-------- Languages with spaces between syllables --------
"ahk", -- Akha
"aou", -- A'ou
"atb", -- Zaiwa
"byk", -- Biao
--"duu", -- Drung; not sure
--"hmx-pro", -- Proto-Hmong-Mien
--"hnj", -- Green Hmong; not sure
"huq", -- Tsat
"ium", -- Iu Mien
--"lis", -- Lisu; not sure
"mtq", -- Muong
--"mww", -- White Hmong; not sure
--"sit-gkh", -- Gokhy; not sure
--"swi", -- Sui; not sure
"tbq-lol-pro", -- Proto-Loloish
"tdh", -- Thulung
"ukk", -- Muak Sa-aak
"vi", -- Vietnamese
"yig", -- Wusa Nasu
"zng", -- Mang
 
-------- Languages with ~ with surrounding spaces used to separate variants --------
"mkh-ban-pro", -- Proto-Bahnaric
"sit-pro", -- Proto-Sino-Tibetan; listed above
-------- Other weirdnesses --------
"mul", -- Translingual; gestures, Morse code, etc.
"aot", -- Atong (India); bullet is a letter
 
-------- All sign languages --------
"ads",
"aed",
"aen",
"afg",
"ase",
"asf",
"asp",
"asq",
"asw",
"bfi",
"bfk",
"bog",
"bqn",
"bqy",
"bvl",
"bzs",
"cds",
"csc",
"csd",
"cse",
"csf",
"csg",
"csl",
"csn",
"csq",
"csr",
"doq",
"dse",
"dsl",
"ecs",
"esl",
"esn",
"eso",
"eth",
"fcs",
"fse",
"fsl",
"fss",
"gds",
"gse",
"gsg",
"gsm",
"gss",
"gus",
"hab",
"haf",
"hds",
"hks",
"hos",
"hps",
"hsh",
"hsl",
"icl",
"iks",
"ils",
"inl",
"ins",
"ise",
"isg",
"isr",
"jcs",
"jhs",
"jls",
"jos",
"jsl",
"jus",
"kgi",
"kvk",
"lbs",
"lls",
"lsl",
"lso",
"lsp",
"lst",
"lsy",
"lws",
"mdl",
"mfs",
"mre",
"msd",
"msr",
"mzc",
"mzg",
"mzy",
"nbs",
"ncs",
"nsi",
"nsl",
"nsp",
"nsr",
"nzs",
"okl",
"pgz",
"pks",
"prl",
"prz",
"psc",
"psd",
"psg",
"psl",
"pso",
"psp",
"psr",
"pys",
"rms",
"rsl",
"rsm",
"sdl",
"sfb",
"sfs",
"sgg",
"sgx",
"slf",
"sls",
"sqk",
"sqs",
"ssp",
"ssr",
"svk",
"swl",
"syy",
"tse",
"tsm",
"tsq",
"tss",
"tsy",
"tza",
"ugn",
"ugy",
"ukl",
"uks",
"vgt",
"vsi",
"vsl",
"vsv",
"xki",
"xml",
"xms",
"ygs",
"ysl",
"zib",
"zsl",
}
 
-- In these languages, the hyphen is not considered a word separator for the "multiword terms" category.
data.hyphen_not_multiword_sep = {
"akk", -- Akkadian; hyphens between syllables
"akl", -- Aklanon; hyphens for mid-word glottal stops
"ber-pro", -- Proto-Berber; morphemes separated by hyphens
"ceb", -- Cebuano; hyphens for mid-word glottal stops
"cnk", -- Khumi Chin; hyphens used in single words
"cpi", -- Chinese Pidgin English; Chinese-derived words with hyphens between syllables
"de", -- too many false positives
"esx-esk-pro", -- hyphen used to separate morphemes
"fi", -- Finnish; hyphen used to separate components in compound words if the final and initial vowels match, respectively
"hil", -- Hiligaynon; hyphens for mid-word glottal stops
"ilo", -- Ilocano; hyphens for mid-word glottal stops
"lcp", -- Western Lawa; dash as syllable joiner
"lwl", -- Eastern Lawa; dash as syllable joiner
"mkh-vie-pro", -- Proto-Vietic; morphemes separated by hyphens
"msb", -- Masbatenyo; too many false positives
"tl", -- Tagalog; too many false positives
"war", -- Waray-Waray; too many false positives
}
 
-- These languages will not have "LANG masculine nouns" and similar categories added.
data.no_gender_cat = {
-- Languages without gender but which use the gender field for other purposes
"ja",
"th",
}
 
252 ⟶ 478行目:
data[key] = require("Module:utils").list_to_set(list)
end
 
-- Parts of speech for which categories like "German masculine nouns" or "Russian imperfective verbs"
-- will be generated if the headword is of the appropriate gender/number. We put this at the bottom
-- because it's a map, not a list.
data.pos_for_gender_number_cat = {
["nouns"] = "nouns",
["proper nouns"] = "nouns",
["suffixes"] = "suffixes",
-- We include verbs because impf and pf are valid "genders".
["verbs"] = "verbs",
}
 
return data