Modul:ja-headword
Bu modulun sənədləşdirmə səhifəsi Modul:ja-headword/doc səhifəsində yaradıla bilər
local m_ja = require("Module:ja")
local export = {}
local pos_functions = {}
local lang = require("Module:languages").getByCode("ja")
local sc = require("Module:scripts").getByCode("Jpan")
local Japanese_symbols = 'ー・=?!。、'
local katakana_range = 'ァ-ヺーヽヾ'
local hiragana_range = 'ぁ-ゖーゞゝ'
local kana_range = katakana_range .. hiragana_range .. Japanese_symbols
local Japanese_scripts_range = kana_range .. '一-鿌・々'
local katakana_pattern = '^[' .. katakana_range .. Japanese_symbols .. ']*$'
local hiragana_pattern = '^[' .. hiragana_range .. Japanese_symbols .. ']*$'
local kana_pattern = '^[' .. kana_range .. ']*$'
local kana_pattern_full = '^[、' .. kana_range .. '%s%.%-]*$'
local kana_pattern_char = '[、' .. kana_range .. '%s%.%-]'
local function detect_kana_script(kana)
if mw.ustring.find(kana, katakana_pattern) then
return 'kata'
elseif mw.ustring.find(kana, hiragana_pattern) then
return 'hira'
elseif mw.ustring.find(kana, kana_pattern) then
return 'both'
else
return nil
end
end
local function kana_to_romaji(kana, poscat, args)
-- make adjustments for -u verbs and -i adjectives by placing a period before the last character
-- to prevent romanizing long vowels with macrons
if poscat == "verbs" or (poscat == "adjectives" and ((args["infl"] == "i" or args["infl"] == "い") or (args["decl"] == "i" or args["decl"] == "い"))) then
kana = mw.ustring.gsub(kana,'(.)$','.%1')
end
local romaji = m_ja.kana_to_romaji(kana)
-- init caps for proper nouns
if poscat == "proper nouns" then
romaji = mw.ustring.gsub(romaji, "^%l", mw.ustring.upper)
romaji = mw.ustring.gsub(romaji, " %l", mw.ustring.upper)
romaji = mw.ustring.gsub(romaji, "-%l", mw.ustring.upper)
end
-- hyphens for prefixes, suffixes, and counters (classifiers)
if poscat == "prefixes" then return romaji .. "-" end
if poscat == "suffixes" or poscat == "counters" or poscat == "classifiers" then return "-" .. romaji end
return romaji
end
local en_numerals = {
"one", "two", "three", "four", "five",
"six", "seven", "eight", "nine", "ten",
"eleven", "twelve", "thirteen", "fourteen", "fifteen"
}
local en_grades = {
"first grade", "second grade", "third grade",
"fourth grade", "fifth grade", "sixth grade",
"secondary school", "jinmeiyō", "hyōgaiji"
}
-- adds category Japanese terms spelled with jōyō kanji or Japanese terms spelled with non-jōyō kanji
-- (if it contains any kanji)
local function categorize_by_kanji(categories, PAGENAME)
-- remove non-kanji characters
local onlykanji = mw.ustring.gsub(PAGENAME, '[^一-鿌]', '')
local number_of_kanji = mw.ustring.len(onlykanji)
if number_of_kanji > 0 then
for i=1,mw.ustring.len(onlykanji) do
table.insert(categories, ("Japanese terms spelled with %s kanji"):format(en_grades[m_ja.kanji_grade(mw.ustring.sub(onlykanji,i,i))]))
end
-- categorize by number of kanji
if number_of_kanji == 1 then
table.insert(categories, "Japanese terms written with one Han script character")
elseif en_numerals[number_of_kanji] then
table.insert(categories, ("Japanese terms written with %s Han script characters"):format(en_numerals[number_of_kanji]))
end
end
end
-- if this term is composed of only a single kanji, it does not have kanjitab/kanji reading tab
-- which generate "Japanese terms spelled with .. " categories, and since it is only one kanji
-- we know the kanji reading
-- (this category is for maintenance because many of these need attention)
local function singlekanji_term(categories, PAGENAME)
if mw.ustring.len(PAGENAME) == 1 and mw.ustring.match(PAGENAME, '[一-鿌]') then
table.insert(categories, "Japanese terms spelled with " .. PAGENAME)
table.insert(categories, "Japanese single-kanji terms")
end
end
-- get a kana form to use, in order of preference: unnamed, hira, kana, pagename
local function find_kana(args, PAGENAME)
for i,arg in ipairs(args) do
if args[i] and mw.ustring.find(args[i], kana_pattern_full) then return args[i] end
end
if mw.ustring.find(PAGENAME, kana_pattern_full) then return PAGENAME end
local hira = args["hira"] or ""; if hira ~= "" then return hira end
local kata = args["kata"] or ""; if kata ~= "" then return kata end
error("No kana detected in the unnamed parameters, |hira= and |kata= parameter. See template documentation for details.")
end
-- go through args and build inflections by finding whatever kanas were given to us
local function find_inflections(args, categories, inflections, poscat, PAGENAME)
local detect_result = detect_kana_script(PAGENAME)
local function romanization(auto_rom)
-- accept the automatic romanization generated in function kana_to_romaji() above
-- compare that to the manual romanization if it exists and add it to inflections
local rom = args["rom"] or ""
if rom == "" then rom = auto_rom end
-- check auto rom against manual and put in hidden category if they differ
if rom ~= auto_rom then table.insert(categories, "Japanese terms with romaji needing attention") end
-- throw an error if there is no romanization
if rom == "" then error("Japanese terms must have a kana form.") end
-- add romaji
-- add link manually for WT:ACCEL unless headword is for suru verb
if poscat == "suru verbs" then
table.insert(inflections, {label = "romaci", "[[" .. rom .. "]] [[suru]]"})
elseif detect_result then
-- only accelerate romaji creation for kana entries
table.insert(inflections, {label = "romaci", accel = "romanized-form-of", rom})
else
table.insert(inflections, {label = "romaci", rom})
end
end
local allkana,original,readings,romajis,romaji_lookup = {},{},{},{},{}
for i,arg in ipairs(args) do
if arg and arg ~= "" and mw.ustring.find(arg, kana_pattern_full) then table.insert(allkana, arg) end
end
-- accept "hira" and "kata" but let Lua decide if they are really hiragana or katakana
if args["hira"] and args["hira"] ~= "" and mw.ustring.find(args["hira"], kana_pattern_full) then table.insert(allkana, args["hira"]) end
if args["kata"] and args["kata"] ~= "" and mw.ustring.find(args["kata"], kana_pattern_full) then table.insert(allkana, args["kata"]) end
if mw.ustring.find(PAGENAME, kana_pattern_full) then
if #allkana == 0 then table.insert(allkana, PAGENAME) end
end
for i = 1, #allkana do
-- auto_romanization
romajis[i] = kana_to_romaji(allkana[i], poscat, args)
-- remove markup
table.insert(original,allkana[i])
allkana[i] = mw.ustring.gsub(allkana[i], '[%s%.%-]', '')
end
for i = 1, #allkana do
-- if this is not kana, blank it out
if allkana and not mw.ustring.match(allkana[i], kana_pattern_char) then
allkana[i] = ""
else
-- if this is kana, count it as another effective reading (ignoring hiragana-katakana distinction)
readings[m_ja.kata_to_hira(allkana[i])] = 1
end
-- only if this kana is different from the page name
if allkana[i] ~= PAGENAME and allkana[i] ~= "" then
-- find script type and put it in "label"
local labelval = ""
local alternative = true
for j = 1, i-1 do
if allkana[j] and romajis[i] == romajis[j] then
alternative = false
end
end
if i>1 and alternative then labelval = "alternative reading"
elseif detect_kana_script(allkana[i]) == 'both' then labelval = "hiraqana və katakana"
elseif detect_kana_script(allkana[i]) == 'hira' then labelval = "hiraqana"
else labelval = "katakana" end
-- add everything to inflections, except historical hiragana which is next
if poscat == "nouns" or poscat == "proper nouns" or poscat == "verbs" or poscat == "adjectives" or poscat == "adverbs" then
-- enable accelerated entry creation using hiragana links for certain parts of speech
if mw.ustring.match(original[i],"[%. ]") then
local tr = mw.ustring.gsub(original[i], " ", "-")
table.insert(inflections, {label = labelval, accel = ("kana-%s-form-of transliteration-%s"):format(poscat:sub(1,poscat:len()-1):gsub(' ','-'), tr), allkana[i]})
else
table.insert(inflections, {label = labelval, accel = ("kana-%s-form-of"):format(poscat:sub(1,poscat:len()-1):gsub(' ','-')), allkana[i]})
end
elseif poscat ~= "suru verbs" then
table.insert(inflections, {label = labelval, allkana[i]})
else
table.insert(inflections, {label = labelval, "[[" .. allkana[i] .. "]][[する]]"})
end
end
-- do the romanization business if it passes through every check
local undergo_romanization = true
if allkana[i] ~= "" then
if allkana[i] == PAGENAME and not mw.ustring.find(PAGENAME, kana_pattern_full) then
undergo_romanization = false
else
for j=i+1, #allkana do
if allkana[j] and romajis[i] == romajis[j] then
undergo_romanization = false
end
end
end
end
if undergo_romanization then romanization(romajis[i]) end
end
local hhira = args["hhira"] or ""
if hhira ~= "" then
if poscat == "suru verbs" then
table.insert(inflections, {label = "tarixi hiraqana", "[[" .. hhira .. "]][[する]]"})
else
table.insert(inflections, {label = "tarixi hiraqana", hhira})
end
end
local hkata = args["hkata"] or ""
if hkata ~= "" then
if poscat == "suru verbs" then
table.insert(inflections, {label = "historical katakana", "[[" .. hkata .. "]][[する]]"})
else
table.insert(inflections, {label = "historical katakana", hkata})
end
end
local num_readings = 0
for _ in pairs(readings) do
num_readings = num_readings + 1
end
if num_readings > 1 then table.insert(categories, "Japanese words with multiple readings") end
end
-- categorize by the script of the pagename or specific characters contained in it
local function extra_categorization(categories, PAGENAME, katakana_category)
-- if PAGENAME is hiragana, put in that category, same for katakana (but do it at the end)
if detect_kana_script(PAGENAME) == 'hira' then table.insert(categories, "Yapon hiraqana") end
if detect_kana_script(PAGENAME) == 'kata' then table.insert(katakana_category, "Yapon katakana") end
if mw.ustring.find(PAGENAME, "[^" .. Japanese_scripts_range .. "]") and mw.ustring.find(PAGENAME, '[' .. Japanese_scripts_range .. ']') then
table.insert(categories, "Yapon terms written in multiple scripts") end
for _,character in ipairs({'々','ゝ','ゞ','ヽ','ヾ'}) do
if mw.ustring.match(PAGENAME,character) then table.insert(categories, ("Yapon terms spelled with %s"):format(character)) end
end
end
local aliases = {
['transitive']='tr', ['trans']='tr',
['intransitive']='in', ['intrans']='in', ['intr']='in',
['godan']='1', ['ichidan']='2', ['irregular']='3'
}
pos_functions["verbs"] = function(args, inflections, categories)
table.insert(categories, "Yapon verbs")
-- transitivity
local tr = args["tr"] or ""
tr = aliases[tr] or tr
if tr ~= "" then
if tr == "tr" then table.insert(inflections, {label = "transitive"}) end
if tr == "in" then table.insert(inflections, {label = "intransitive"}) end
if tr == "both" then table.insert(inflections, {label = "transitive and intransitive"}) end
else
table.insert(categories, "Japanese verbs without transitivity")
end
-- conjugation type
local conjugation = args["type"] or ""
conjugation = aliases[conjugation] or conjugation
if conjugation ~= "" then
if conjugation == "1" then table.insert(inflections, {label = "godan conjugation"}); table.insert(categories, "Japanese type 1 verbs") end
if conjugation == "2" then table.insert(inflections, {label = "ichidan conjugation"}); table.insert(categories, "Japanese type 2 verbs") end
if conjugation == "3" then
-- hidden temporary maintenance category
-- (suru verbs should use ja-verb-suru but sometime erroneously use ja-verb with type=3 instead)
table.insert(inflections, {label = "irregular conjugation"}); table.insert(categories, "Japanese type 3 verbs")
if mw.ustring.match(PAGENAME,'する$') then table.insert(categories, "Japanese terms using ja-verb with type 3") end
end
if conjugation == "yo" then table.insert(inflections, {label = "yodan conjugation"}); table.insert(categories, "Japanese yodan verbs") end
if conjugation == "ni" then table.insert(inflections, {label = "nidan conjugation"}); table.insert(categories, "Japanese nidan verbs") end
end
-- >> maintenance category <<
-- check if this ends in something other than acceptable kana in a modern verb (and isn't already categorised as yodan or nidan)
if not mw.ustring.match(PAGENAME, '[うくぐすつぬぶむる]$') and conjugation ~= "yo" and conjugation ~= "ni" then table.insert(categories, "Japanese verbs without modern conjugations") end
end
pos_functions["auxiliary verbs"] = function(args, inflections, categories)
table.insert(categories, "Japanese verbs")
table.insert(categories, "Japanese auxiliary verbs")
end
pos_functions["suru verbs"] = function(args, inflections, categories)
table.insert(categories, "Japanese verbs")
table.insert(categories, "Japanese type 3 verbs")
-- transitivity
local tr = args["tr"] or ""
tr = aliases[tr] or tr
if tr ~= "" then
if tr == "tr" then table.insert(inflections, {label = "transitive"}) end
if tr == "in" then table.insert(inflections, {label = "intransitive"}) end
if tr == "both" then table.insert(inflections, {label = "transitive and intransitive"}) end
else
table.insert(categories, "Japanese verbs without transitivity")
end
end
pos_functions["adjectives"] = function(args, inflections, categories)
table.insert(categories, "Japanese adjectives")
-- categorize by inflection type
local infl = args["infl"] or ""
local decl = args["decl"] or ""
if infl == "" then infl = decl end
if infl ~= "" then
if infl == "i" or infl == "い" then table.insert(inflections, {label = "-i fleksiyası"}); table.insert(categories, "Japanese い-i adjectives") end
if infl == "na" or infl == "な" then table.insert(inflections, {label = "-na fleksiyası"}); table.insert(categories, "Japanese な-na adjectives") end
if infl == "nari" or infl == "なり" then table.insert(inflections, {label = "-nari fleksiyası"}); table.insert(categories, "Japanese なり-nari adjectives") end
if infl == "tari" or infl == "たり" then table.insert(inflections, {label = "-tari fleksiyası"}); table.insert(categories, "Japanese たり-tari adjectives") end
end
end
pos_functions["nouns"] = function(args, inflections, categories)
table.insert(categories, "Japanese nouns")
-- the counter (classifier) parameter, only relevant for nouns
local counter = args["count"] or ""
if counter ~= "" then
if counter == "-" then table.insert(inflections, {label = "uncountable"}) else table.insert(inflections, {label = "counter", counter}) end
end
end
-- The main entry point.
-- This is the only function that can be invoked from a template.
function export.show(frame)
PAGENAME = mw.title.getCurrentTitle().text
local args = frame:getParent().args
local poscat = frame.args[1] or error("Part of speech has not been specified. Please pass parameter 1 to the module invocation.")
local categories = {}
local katakana_category = {}
local inflections = {}
-- set to PAGENAME if left empty
local head = args["head"] or ""; if head == "" then head = PAGENAME end
-- if this is a suru verb append [[する]]
if poscat == "suru verbs" then head = head .. "[[する]]" end
local kana = find_kana(args, PAGENAME)
-- the presence of kyūjitai param indicates that this is shinjitai kanji entry and vice versa
local kyu = args["kyu"] or ""; if kyu == "" then kyu = nil else table.insert(inflections, {label = "[[shinjitai]] kanji"}); table.insert(inflections, {label = "[[kyūjitai]] kanji", kyu}) end
local shin = args["shin"] or ""; if shin ~= "" then table.insert(inflections, {label = "[[kyūjitai]] kanji"}); table.insert(inflections, {label = "[[shinjitai]] kanji", shin}) end
-- add certain "inflections" and categories for adjectives, verbs, or nouns
if pos_functions[poscat] then
pos_functions[poscat](args, inflections, categories)
else
table.insert(categories, "Japanese " .. poscat)
end
-- sort out all the kanas and do the romanization business
find_inflections(args, categories, inflections, poscat, PAGENAME, kana)
-- categorize by joyo kanji and number of kanji
categorize_by_kanji(categories, PAGENAME)
-- generate "Japanese terms spelled with ... read as ..." for single-kanji terms
singlekanji_term(categories, PAGENAME)
-- add categories for terms with iteration marks (which are not kanji and hence are not categorized by ja-kanjitab)
extra_categorization(categories, PAGENAME, katakana_category)
-- will only use sortkey if sortkey is different from PAGENAME
-- when katakana in PAGENAME is converted to hiragana
sortkey = m_ja.jsort(kana)
if sortkey == m_ja.kata_to_hira(PAGENAME) then
return
require("Module:headword").full_headword(lang, sc, head, nil, nil, inflections, categories, nil) ..
require("Module:utilities").format_categories(katakana_category, lang)
else
-- convert sortkey to katakana version for katakana terms category (should sort by katakana)
return
require("Module:headword").full_headword(lang, sc, head, nil, nil, inflections, categories, sortkey) ..
require("Module:utilities").format_categories(katakana_category, lang, m_ja.hira_to_kata(sortkey))
end
end
return export