Module:zh-forms: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
add comment to $chars_unified |
appearance tweaks |
||
(29 intermediate revisions by 12 users not shown) | |||
Line 1: | Line 1: | ||
local export = {} |
local export = {} |
||
local m_zh = require("Module:zh") |
|||
local links = require("Module:links") |
local links = require("Module:links") |
||
local lang = require("Module:languages") |
local lang = require("Module:languages") |
||
local m_data = require("Module:zh-forms/data") |
|||
local find = mw.ustring.find |
|||
local |
local m_scripts = require("Module:scripts") |
||
local m_str_utils = require("Module:string utilities") |
|||
local match = mw.ustring.match |
|||
local len = mw.ustring.len |
|||
local concat = table.concat |
|||
local explode = m_str_utils.explode_utf8 |
|||
local find = m_str_utils.find |
|||
local get_section = require("Module:utilities").get_section |
|||
local gsub = m_str_utils.gsub |
|||
local insert = table.insert |
|||
local len = m_str_utils.len |
|||
local match = m_str_utils.match |
|||
local new_title = mw.title.new |
|||
local sub = m_str_utils.sub |
|||
local u = m_str_utils.char |
|||
local Hani_chars = m_scripts.getByCode("Hani"):getCharacters() |
|||
local nonbreaking_hyphen = u(0x2011) |
|||
local sc = { |
local sc = { |
||
Line 13: | Line 26: | ||
["both"] = "Hani", |
["both"] = "Hani", |
||
} |
} |
||
local chars_variant_both = ' |
|||
local chars_variant_one = ' |
|||
-- result of comparing the utf32-cn.map and utf32-tw.map files that are at https://github.com/adobe-fonts/source-han-sans/tree/release/Resources |
|||
local chars_unified = "⺽⼊⼋⼔⼣⼥⼾⽍⽐⽕⽚⽛⽠⽡⽰⽱⽳⽶⽼⽾⽿⾆⾇⾌⾝⾠⾣⾬⾮⾵⾷⾻⾿⿁⿅⿆⿇⿈⿋⿌⿏⿓⿔㑤㑦㒈㒓㒖㒯㒼㓁㓲㕙㕯㕹㖗㖘㖨㖿㗛㗳㗻㗾㘉㘚㙂㙇㙈㙎㙳㚚㚥㚪㚬㚰㚱㚵㚹㚼㛁㛃㛄㛅㛇㛈㛓㛔㛖㛝㛡㛢㛥㛦㛵㜁㜃㜈㜊㜍㜜㜟㜢㜣㜥㜬㜭㜰㜲㜳㜺㝢㝬㞹㞾㟲㟴㟸㠏㠗㠙㠠㡵㢣㤆㤯㥤㥯㦀㦊㦑㦙㦛㦤㦾㧬㧻㨘㩞㩦㩧㪖㫵㫶㫽㬚㬫㬹㮕㮡㮴㯂㯄㯳㯴㱔㱕㲋㴬㴲㵆㵌㵝㵟㵢㵩㵪㵵㶈㶏㶥㶭㶴㶿㷆㷇㷉㷌㷍㷓㷧㷨㷫㷳㷴㷷㷼㷽㸆㸏㹃㺖㺲㻇㻐㻑㻖㻗㻢㻧㻫㻬㻰㻳㻴㻺㼀㼆㽹㿀㿍㿗㿭㿹㿺䀝䁓䁘䁟䁥䁯䂨䃈䃺䄃䄒䄲䅮䅼䅿䆲䆻䆿䇛䇭䇯䇹䈑䈣䉀䉠䉪䉶䊌䊔䊢䋻䌊䌫䍃䎑䎗䎚䎺䎼䏁䏊䏙䏟䏭䏰䏲䐁䐂䐓䐗䐤䐥䐭䑛䑺䒟䒠䒢䒰䒷䒽䓀䓃䓅䓎䓝䓞䓟䓡䓤䓩䓪䓫䓬䓲䓴䔃䔄䔉䔋䔖䔛䔝䔧䔮䔳䔶䔻䔽䔿䕃䕑䕒䕕䕘䕜䕡䕢䕪䕭䕷䕸䕺䕾䖅䖎䖓䖛䖣䖳䗚䘆䘵䙺䚀䚄䛀䛷䜓䜘䜶䝏䞦䟴䟿䠴䠷䡊䡝䡱䢛䢭䢮䣐䣭䣮䣳䣺䤆䤑䤫䤵䥑䥲䧔䧩䨏䨝䨤䨵䩮䪤䪩䫿䬐䬙䬠䬬䬷䮐䮽䯀䯊䯒䯛䰁䰠䱀䱗䱚䱭䱷䱻䱽䴇䵶䵷䶉 |
|||
-- Change one variant character to another variant character. |
-- Change one variant character to another variant character. |
||
Line 23: | Line 31: | ||
function export.change_to_variant(text) |
function export.change_to_variant(text) |
||
local count = 0 |
local count = 0 |
||
local text = |
local text = gsub(text, ('([%s])'):format(m_data.chars_variant_both), function(char) |
||
count = count + 1 |
count = count + 1 |
||
if (count > 1) then return '' end -- add |t2= manually, please |
if (count > 1) then return '' end -- add |t2= manually, please |
||
local this_char_index = |
local this_char_index = find(m_data.chars_variant_both, char) |
||
local first_or_second = (this_char_index % 2) |
local first_or_second = (this_char_index % 2) |
||
local other_char_index = this_char_index + (first_or_second == 1 and 1 or -1) |
local other_char_index = this_char_index + (first_or_second == 1 and 1 or -1) |
||
return |
return sub(m_data.chars_variant_both, other_char_index, other_char_index) |
||
end) |
end) |
||
return (count == 1 and text or '') |
return (count == 1 and text or '') |
||
end |
|||
local zh_link_impl = nil |
|||
local function zh_link(...) |
|||
if zh_link_impl == nil then |
|||
zh_link_impl = require("Module:zh/link") |
|||
end |
|||
return zh_link_impl.link(unpack(arg)) |
|||
end |
end |
||
Line 39: | Line 55: | ||
["s"] = { list = true }, |
["s"] = { list = true }, |
||
["t"] = { list = true }, |
["t"] = { list = true }, |
||
["ss"] = {}, ["ns"] = {}, ["alt"] = {}, ["type"] = {}, ["delink"] = {}, ["lit"] = {}, ["note"] = {}, ["gloss"] = {} |
["ss"] = {}, ["ns"] = {}, ["alt"] = {}, ["type"] = {}, ["delink"] = {}, ["lit"] = {}, ["note"] = {}, ["gloss"] = {}, ["align"] = {} |
||
} |
} |
||
local args = require("Module:parameters").process(frame:getParent().args, params) |
local args = require("Module:parameters").process(frame:getParent().args, params) |
||
Line 45: | Line 61: | ||
local s, t = {}, {} |
local s, t = {}, {} |
||
local annotation = {} |
local annotation = {} |
||
local pagename = mw.loadData("Module:headword/data").pagename |
|||
local current_title = mw.title.getCurrentTitle() |
|||
local content = current_title:getContent() |
|||
if not frame:getParent().args["t"] then |
if not frame:getParent().args["t"] then |
||
insert(t, 1, current_title.subpageText) |
|||
end |
|||
local PAGENAME = mw.title.getCurrentTitle().text |
|||
local content = mw.title.new(PAGENAME):getContent() |
|||
local function insert_st(set, text) |
|||
table.insert(set, text) |
|||
end |
end |
||
for i = 1, #args.s do |
for i = 1, #args.s do |
||
if (#args.s == 1) and |
if (#args.s == 1) and pagename == args.s[i] then error('Redundant text in |s=.') end |
||
insert(s, args.s[i]) |
|||
end |
end |
||
for i = 1, #args.t do |
for i = 1, #args.t do |
||
if (#args.t == 1) and |
if (#args.t == 1) and pagename == args.t[i] then error('Redundant text in |t=.') end |
||
insert(t, args.t[i]) |
|||
end |
end |
||
local t1 = t[1] |
|||
local t1_len = len(t1) |
|||
-- temp tracking |
-- temp tracking |
||
if #s == 0 and |
if #s == 0 and require("Module:zh").ts(t1) ~= t1 then |
||
require('Module:debug').track('zh-forms/entry possibly missing a simplified form') |
require('Module:debug').track('zh-forms/entry possibly missing a simplified form') |
||
end |
end |
||
if #t == 1 then |
if #t == 1 then |
||
local to_variant = export.change_to_variant( |
local to_variant = export.change_to_variant(t1) |
||
if (to_variant ~= '') then |
if (to_variant ~= '') then |
||
-- automatically generate a |t2= |
-- automatically generate a |t2= |
||
insert(t, export.change_to_variant(t1)) |
|||
elseif find( |
elseif find(t1, ('[%s]'):format(m_data.chars_variant_both .. m_data.chars_variant_one)) then |
||
require('Module:debug').track('zh-forms/entry possibly missing a variant form') |
require('Module:debug').track('zh-forms/entry possibly missing a variant form') |
||
end |
end |
||
Line 82: | Line 99: | ||
if #t ~= 1 and #s == 0 then |
if #t ~= 1 and #s == 0 then |
||
insert(s, t1) |
|||
end |
end |
||
local function asterisk(term, iscomp) |
local function asterisk(term, iscomp) |
||
if iscomp and |
if iscomp and t1_len > 1 then return "" end |
||
if term == |
if term == current_title.subpageText or not (new_title(term) or {}).exists then return "" end |
||
local content = |
local content = new_title(term):getContent() |
||
content = gsub(content, "zh%-pron", "Ꙁ") |
content = gsub(content, "zh%-pron", "Ꙁ") |
||
content = gsub(content, "zh%-see", "Ꙁ") |
content = gsub(content, "zh%-see", "Ꙁ") |
||
content = gsub(content, "[^Ꙁ]", "") |
content = gsub(content, "[^Ꙁ]", "") |
||
return |
return len(content) > 1 and '<sup><span class="explain" title="This form has one or more other meanings.">*</span></sup>' or '' |
||
end |
end |
||
Line 110: | Line 127: | ||
local function header(length, var_count) |
local function header(length, var_count) |
||
return ((length > 3 or var_count * length > 5) and '{|' or '{| |
return ((length > 3 or var_count * length > 5 or args.align == 'left') and '{|' or '{| class="floatright"') .. |
||
' style="clear: right;margin: 1em;border-collapse: collapse;text-align: center"' .. |
' style="clear: right;margin: 1em 0 1em 1em;border-collapse: collapse;text-align: center"' .. |
||
(length ~= 1 and '\n|-\n! colspan=2|' or '') |
(length ~= 1 and '\n|-\n! colspan=2|' or '') |
||
end |
end |
||
Line 118: | Line 135: | ||
return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') .. |
return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') .. |
||
'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' .. |
'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' .. |
||
(length <= 8 and ( |
(length <= 8 and (40 * word + 40) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1) ..'|' |
||
end |
end |
||
Line 124: | Line 141: | ||
local fmtd_text = {} |
local fmtd_text = {} |
||
for i,value in ipairs(text) do |
for i,value in ipairs(text) do |
||
fmtd_text[i] = links.language_link |
fmtd_text[i] = links.language_link{ lang = lang.getByCode("zh"), term = value } .. asterisk(value, false) |
||
end |
end |
||
return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') .. |
return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') .. |
||
'(<span lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '">' .. |
'(<span lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '">' .. concat(fmtd_text, "/") .. '</span>)' .. |
||
(length > 8 and '' or '</span>')) or '' |
(length > 8 and '' or '</span>')) or '' |
||
end |
end |
||
local function char_fmt(text, length, script) |
local function char_fmt(text, length, script) |
||
return (#text ~= 1 and (char_gap(length, script) .. |
return (#text ~= 1 and (char_gap(length, script) .. concat(text, char_gap(length, script), 1, #text-1)) or '') .. char_gap(length, script, true) .. text[#text] |
||
end |
end |
||
local test_word = t[1] |
|||
local length = len(test_word) |
|||
local word_division = {} |
local word_division = {} |
||
local i = 1 |
local i = 1 |
||
Line 143: | Line 158: | ||
if comp_type then |
if comp_type then |
||
for index in mw.text.gsplit(comp_type, "", true) do |
for index in mw.text.gsplit(comp_type, "", true) do |
||
if find( |
if find(sub(t1, i, i), '[,%-]') then |
||
insert(word_division, { i, i } ) |
|||
i = i + 1 |
i = i + 1 |
||
elseif |
elseif sub(t1, i, i) == '…' then |
||
insert(word_division, { i, i + 1 } ) |
|||
i = i + 2 |
i = i + 2 |
||
end |
end |
||
insert(word_division, { i, i + index - 1 } ) |
|||
i = i + index |
i = i + index |
||
end |
end |
||
if i - 1 ~= len(gsub( |
if i - 1 ~= len(gsub(t1, '…+$', '')) and not find(concat(t) .. concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then |
||
error("'type' parameter does not match word length.") |
error("'type' parameter does not match word length.") |
||
end |
end |
||
else |
else |
||
for i = 1, |
for i = 1, t1_len do |
||
insert(word_division, { i, i } ) |
|||
end |
end |
||
decomposable = len(gsub( |
decomposable = len(gsub(t1, '…+$', '')) > 2 and true or false |
||
end |
end |
||
Line 184: | Line 199: | ||
local char_string = "" |
local char_string = "" |
||
for j = 1, #id do |
for j = 1, #id do |
||
local word_form = |
local word_form = sub(id[j], position[1], position[2]) |
||
if not find(char_string, word_form) then |
if not find(char_string, word_form) then |
||
char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form |
char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form |
||
Line 192: | Line 207: | ||
local hash = {} |
local hash = {} |
||
for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do |
for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do |
||
insert(hash, links.language_link{ lang = lang.getByCode("zh"), term = thing } .. asterisk(thing, true)) |
|||
end |
end |
||
char_string = |
char_string = concat(hash, (delink[i] and "" or "/")) |
||
end |
end |
||
insert(char_set[id.name], char_string) |
|||
end |
end |
||
for _, item in ipairs(id) do |
for _, item in ipairs(id) do |
||
if not ( |
if not (new_title(item) or {}).exists and item ~= current_title.subpageText then |
||
insert(uncreated, '"[[' .. item .. ']]"') |
|||
end |
end |
||
end |
end |
||
end |
end |
||
local scripts = { [ |
local scripts = { [m_scripts.getByCode("Hani"):getCharacters()] = "Hani" , [m_scripts.getByCode("Latn"):getCharacters()] = "Latn", ["0-90-9"] = "Numb", [m_scripts.getByCode("Polyt"):getCharacters()] = "Grek", [m_scripts.getByCode("Bopo"):getCharacters()] = "Bopo" } |
||
local script = {} |
local script = {} |
||
for range, script_name in pairs(scripts) do |
for range, script_name in pairs(scripts) do |
||
if find( |
if find(t1, '[' .. range .. ']') then |
||
insert(script, script_name) |
|||
end |
end |
||
end |
end |
||
if find( |
if find(t1, "([^─…]+)%1") and args['gloss'] ~= '-' and t1_len < 7 then |
||
if gsub(comp_type or "", "1", "") == "" then |
if gsub(comp_type or "", "1", "") == "" then |
||
insert(annotation, '[[Category:Chinese reduplications]]') |
|||
elseif find( |
elseif find(t1, "([^…][^…]+)%1") or find(concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then |
||
local evil |
local evil |
||
for _, component in ipairs(char_set['trad']) do |
for _, component in ipairs(char_set['trad']) do |
||
if len(component) > 1 then |
if len(component) > 1 then |
||
local comp_content = |
local comp_content = new_title(links.remove_links(component)):getContent() or false |
||
if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then |
if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then |
||
evil = true |
evil = true |
||
Line 226: | Line 241: | ||
end |
end |
||
end |
end |
||
if not evil then |
if not evil then insert(annotation, '[[Category:Chinese reduplications]]') end |
||
end |
end |
||
end |
end |
||
insert(annotation, #script > 1 and '[[Category:Chinese terms written in multiple scripts]]' or nil) |
|||
insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '[[Category:Chinese entries with potentially decomposable titles]]' or nil) |
|||
insert(annotation, (#uncreated > 0 and current_title.nsText == "") and '[[Category:Chinese terms with uncreated forms]]' .. |
|||
'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') .. |
'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') .. |
||
' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' .. |
' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' .. |
||
concat(uncreated, ", ") .. '.)</small>' or nil) |
|||
local ss = "" |
local ss = "" |
||
if args["ss"] then |
if args["ss"] then |
||
ss = var_fmt( |
ss = var_fmt(t1_len, 'FFFFE0') .. '[[w:Second round of simplified Chinese characters|2nd round simp.]]' .. sub(char_gap(t1_len, 'simp', true), 1, -45) |
||
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
||
.. ' colspan="' .. #word_division .. '"|' .. |
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ss"], tr = "-" }, current_title.subpageText) |
||
end |
end |
||
local ns = "" |
local ns = "" |
||
if args["ns"] then |
if args["ns"] then |
||
ns = var_fmt( |
ns = var_fmt(t1_len, 'FFFFE0') .. 'nonstandard simp.' .. sub(char_gap(t1_len, 'simp', true), 1, -45) |
||
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
||
.. ' colspan="' .. #word_division .. '"|' .. |
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ns"], tr = "-" }, current_title.subpageText) |
||
end |
end |
||
Line 257: | Line 272: | ||
local altdecomp = mw.text.split(altform, ":") |
local altdecomp = mw.text.split(altform, ":") |
||
local altdecomp2 = mw.text.split(altdecomp[1], "-") |
local altdecomp2 = mw.text.split(altdecomp[1], "-") |
||
altdecomp3 = altdecomp2[2] and ' <span style="font-size: |
local altdecomp3 = altdecomp2[2] and ' <span style="font-size:80%"><i>' .. gsub(altdecomp2[2], "‡", frame:expandTemplate{ |
||
title = "Template:zh-historical-dict", |
title = "Template:zh-historical-dict", |
||
args = { type = "form", nocat = "1" } |
args = { type = "form", nocat = "1" } |
||
}) .. '</i></span>' or '' |
}) .. '</i></span>' or '' |
||
insert(altform_list, '<span style="white-space:nowrap;">' .. |
|||
zh_link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, current_title.subpageText) .. |
|||
altdecomp3 .. '</span>') |
altdecomp3 .. '</span>') |
||
end |
end |
||
if #altform_list > 5 then |
if #altform_list > 5 then |
||
altforms = '<div class="vsSwitcher" data-toggle-category="Chinese alternative forms" |
altforms = '<div class="vsSwitcher" data-toggle-category="Chinese alternative forms">' .. |
||
'<div class="vsShow">' .. |
'<div class="vsShow">' .. concat(altform_list, "<br>", 1, 5) .. |
||
'</div><div class="vsHide">' .. |
'</div><div class="vsHide">' .. concat(altform_list, "<br>") .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto"> </span></div>' |
||
else |
else |
||
altforms = |
altforms = concat(altform_list, "<br>") |
||
end |
end |
||
altforms = var_fmt( |
altforms = var_fmt(t1_len, 'F0FFE0') .. 'alternative forms' .. sub(char_gap(t1_len, "trad", true), 1, -45) |
||
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
||
.. ' colspan="' .. #word_division .. '"|' .. altforms |
.. ' colspan="' .. #word_division .. '"|' .. altforms |
||
end |
end |
||
local |
local anagrams = "" |
||
if |
if t1_len >= 2 and t1_len <= 5 then |
||
local anagram_link = mw.ustring.sub(t[1], 2, 2) .. mw.ustring.sub(t[1], 1, 1) |
|||
local function generate_anagrams(term, n, anagrams, checked) |
|||
local anagram_content = mw.title.new(anagram_link):getContent() or false |
|||
if n == 0 then |
|||
if anagram_content and find(anagram_content, "==Chinese==") then |
|||
local anagram = concat(term) |
|||
anagram = var_fmt(length, 'F0FFE0') .. 'anagram' .. mw.ustring.sub(char_gap(length, 'trad', true), 1, -45) |
|||
if checked[anagram] then |
|||
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
|||
return |
|||
.. ' colspan="' .. #word_division .. '"|' .. m_zh.link(nil, nil, { anagram_link, tr = "-" }, mw.title.getCurrentTitle().subpageText) |
|||
end |
|||
local title = new_title(anagram) |
|||
if not title then |
|||
return |
|||
end |
|||
title = title:getContent() |
|||
if title and get_section(title, "Chinese", 2) then |
|||
insert(anagrams, anagram) |
|||
end |
|||
checked[anagram] = true |
|||
else |
|||
for i = 1, n do |
|||
generate_anagrams(term, n - 1, anagrams, checked) |
|||
local i = n % 2 == 0 and i or 1 |
|||
term[i], term[n] = term[n], term[i] |
|||
end |
|||
end |
|||
return anagrams |
|||
end |
|||
local term, checked = explode(t1), {[t1] = true} |
|||
anagrams = generate_anagrams(term, #term, {}, checked) |
|||
local anagrams_num = #anagrams |
|||
for i = 1, anagrams_num do |
|||
anagrams[i] = "<span style=\"white-space:nowrap;\">" .. |
|||
zh_link(nil, nil, {anagrams[i], tr = "-"}, current_title.subpageText) .. |
|||
"</span>" |
|||
end |
|||
if anagrams_num == 0 then |
|||
anagrams = "" |
|||
else |
|||
anagrams = concat(anagrams, "<br>") |
|||
local label = anagrams_num == 1 and "anagram" or "anagrams" |
|||
anagrams = var_fmt(t1_len, 'F0FFE0') .. label .. sub(char_gap(t1_len, "trad", true), 1, -45) |
|||
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"' |
|||
.. ' colspan="' .. #word_division .. '"|' .. anagrams |
|||
end |
end |
||
end |
end |
||
local literal = (args["lit"] or args["note"]) and '\n|-' .. |
local literal = (args["lit"] or args["note"]) and '\n|-' .. |
||
gloss_fmt( |
gloss_fmt(t1_len, #word_division + 2, t1_len) .. |
||
(args["lit"] and '<i>Literally:</i> “' .. args["lit"] .. |
(args["lit"] and '<i>Literally:</i> “' .. args["lit"] .. |
||
(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or "" |
(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or "" |
||
Line 295: | Line 348: | ||
local gloss = {} |
local gloss = {} |
||
if args['gloss'] == '-' then |
if args['gloss'] == '-' then |
||
gloss = { gloss_fmt( |
gloss = { gloss_fmt(t1_len * 1.6, #word_division, t1_len) .. '<i>phonetic</i>' } |
||
elseif |
elseif t1_len == 1 then |
||
gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' } |
gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' } |
||
else |
else |
||
for i, position in ipairs(word_division) do |
for i, position in ipairs(word_division) do |
||
local character = |
local character = sub(t1, position[1], position[2]) |
||
local gloss_text = args[1][i] |
local gloss_text = args[1][i] |
||
Line 325: | Line 378: | ||
} |
} |
||
end) |
end) |
||
if gloss_text == "" and find(character, "^[ |
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") and not find(glosses.nonlemma, character) then |
||
require('Module:debug').track('zh-forms/no gloss found for Chinese character') |
require('Module:debug').track('zh-forms/no gloss found for Chinese character') |
||
end |
end |
||
Line 340: | Line 393: | ||
]] |
]] |
||
if find(gloss_text, "-", nil, true) then |
if find(gloss_text, "-", nil, true) then |
||
gloss_text = gsub(gloss_text, "^%-", nonbreaking_hyphen) |
|||
local nonbreaking_hyphen = mw.ustring.char(0x2011) |
|||
gloss_text = |
gloss_text = gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen) |
||
gloss_text = mw.ustring.gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen) |
|||
end |
end |
||
if gloss_text == "" and position[2] > position[1] then |
if gloss_text == "" and position[2] > position[1] then |
||
local |
local content = new_title(character):getContent() or false |
||
local content = mw.title.new(character):getContent() or false |
|||
if content then |
if content then |
||
gloss_text = |
gloss_text = require("Module:zh/extract").extract_gloss(content, false) |
||
if gloss_text == "" and find(character, "^[ |
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then |
||
require('Module:debug').track('zh-forms/no gloss found but entry exists') |
require('Module:debug').track('zh-forms/no gloss found but entry exists') |
||
end |
end |
||
--[[ |
|||
if not string.match(content, character) then |
if not string.match(content, character) then |
||
require('Module:debug').track('zh-forms/compounds not mentioned in derived terms on the component pages') |
require('Module:debug').track('zh-forms/compounds not mentioned in derived terms on the component pages') |
||
end |
end |
||
]]-- |
|||
else |
else |
||
if gloss_text == "" and find(character, "^[ |
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then |
||
insert(annotation, '[[Category:Chinese redlinks/zh-forms]]') |
|||
end |
end |
||
end |
end |
||
Line 366: | Line 419: | ||
gloss_text = gsub(gloss_text, ";+", ";") |
gloss_text = gsub(gloss_text, ";+", ";") |
||
if len(gsub(gloss_text, '[^;]', '')) > 2 then |
if len(gsub(gloss_text, '[^;]', '')) > 2 then |
||
gloss_text = '<div class="vsSwitcher" data-toggle-category="glosses" |
gloss_text = '<div class="vsSwitcher" data-toggle-category="glosses"><div class="vsShow">' .. |
||
match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div><div class="vsHide">' .. gloss_text .. '</div></div>' |
match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div><div class="vsHide">' .. gloss_text .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto"> </span></div>' |
||
end |
end |
||
local word_length = match(character, '[,…%-]') and 0 or |
local word_length = match(character, '[,…%-]') and 0 or |
||
(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1)) |
(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1)) |
||
insert(gloss, gloss_fmt(word_length, 1, t1_len) .. gloss_text) |
|||
end |
end |
||
end |
end |
||
if content then |
if content then |
||
--[[ |
|||
local applicable_pos = { ["Noun"] = 1, ["Verb"] = 1, ["Adjective"] = 1, ["Adverb"] = 1, |
local applicable_pos = { ["Noun"] = 1, ["Verb"] = 1, ["Adjective"] = 1, ["Adverb"] = 1, |
||
["Definitions"] = 1, ["Pronunciation"] = 1 } |
["Definitions"] = 1, ["Pronunciation"] = 1 } |
||
local previous_level = 2 |
local previous_level = 2 |
||
local subheading_wanted |
local subheading_wanted |
||
]]-- |
|||
--[=[ |
--[=[ |
||
-- Used under headers for Chinese varieties, for instance in [[āu-piah]] |
-- Used under headers for Chinese varieties, for instance in [[āu-piah]] |
||
Line 387: | Line 442: | ||
end |
end |
||
--]=] |
--]=] |
||
if |
if t1_len == 1 and not string.match(content, "===Definitions===") then |
||
require('Module:debug').track('zh-forms/no definitions section found') |
require('Module:debug').track('zh-forms/no definitions section found') |
||
end |
end |
||
--[[ disable, {{zh-der}} has been deprecated so this expression will always be false |
|||
if length == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then |
|||
if t1_len == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then |
|||
require('Module:debug').track('zh-forms/derived terms probably needing renaming') |
require('Module:debug').track('zh-forms/derived terms probably needing renaming') |
||
end |
end |
||
]]-- |
|||
--[[ comment out relatively expensive parsing |
|||
for equals, heading_text in string.gmatch(content, "\n(%=%=+)([^%=]+)%=%=+") do |
for equals, heading_text in string.gmatch(content, "\n(%=%=+)([^%=]+)%=%=+") do |
||
local current_level = #equals |
local current_level = #equals |
||
Line 403: | Line 461: | ||
subheading_wanted = string.find(heading_text, " [0-9]+") and true or false |
subheading_wanted = string.find(heading_text, " [0-9]+") and true or false |
||
end |
end |
||
]]-- |
|||
end |
end |
||
local simp_note = "" |
local simp_note = "" |
||
--This is not a complete list! |
|||
if #identity == 1 and find(chars_unified, '[' .. |
if #identity == 1 and find(m_data.chars_unified, '[' .. t1 .. ']') then |
||
simp_note = '<sup><span class="explain" title="Using the same code point' .. ( |
simp_note = '<sup><span class="explain" title="Using the same code point' .. (t1_len > 1 and 's' or '') .. ' as the traditional form due to Han unification. Without proper font support, it may be displayed as the same as the traditional form.">#</span></sup>' |
||
char_set['simp'] = char_set['trad'] |
char_set['simp'] = char_set['trad'] |
||
end |
end |
||
return |
return concat{ |
||
header( |
header(t1_len, math.max(#t, #s)), concat(gloss, ""), |
||
((#identity == 1 and simp_note == '') and |
((#identity == 1 and simp_note == '') and |
||
(var_fmt( |
(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] and [[Traditional Chinese|trad.]]<br>' .. |
||
form_fmt(t, |
form_fmt(t, t1_len, 'both') .. char_fmt(char_set['trad'], t1_len, 'both')) |
||
or |
or |
||
var_fmt( |
var_fmt(t1_len) .. '[[Traditional Chinese|trad.]] ' .. |
||
form_fmt(t, |
form_fmt(t, t1_len, 'trad') .. char_fmt(char_set['trad'], t1_len, 'trad') .. |
||
(var_fmt( |
(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] ' .. simp_note .. |
||
form_fmt(#s == 0 and t or s, |
form_fmt(#s == 0 and t or s, t1_len, 'simp') .. char_fmt(char_set['simp'], t1_len, 'simp')) |
||
), ss, ns, altforms, |
), ss, ns, altforms, anagrams, literal, '\n|}', concat(annotation) |
||
} |
} |
||
end |
end |
Revision as of 21:03, 19 June 2024
- The following documentation is located at Module:zh-forms/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox
New Chinese Character box, used by {{zh-forms}}
.
local export = {}
local links = require("Module:links")
local lang = require("Module:languages")
local m_data = require("Module:zh-forms/data")
local m_scripts = require("Module:scripts")
local m_str_utils = require("Module:string utilities")
local concat = table.concat
local explode = m_str_utils.explode_utf8
local find = m_str_utils.find
local get_section = require("Module:utilities").get_section
local gsub = m_str_utils.gsub
local insert = table.insert
local len = m_str_utils.len
local match = m_str_utils.match
local new_title = mw.title.new
local sub = m_str_utils.sub
local u = m_str_utils.char
local Hani_chars = m_scripts.getByCode("Hani"):getCharacters()
local nonbreaking_hyphen = u(0x2011)
local sc = {
["trad"] = "Hant",
["simp"] = "Hans",
["both"] = "Hani",
}
-- Change one variant character to another variant character.
-- Currently, only do so once. Return false if there is more than one character to change, or if there is no change.
function export.change_to_variant(text)
local count = 0
local text = gsub(text, ('([%s])'):format(m_data.chars_variant_both), function(char)
count = count + 1
if (count > 1) then return '' end -- add |t2= manually, please
local this_char_index = find(m_data.chars_variant_both, char)
local first_or_second = (this_char_index % 2)
local other_char_index = this_char_index + (first_or_second == 1 and 1 or -1)
return sub(m_data.chars_variant_both, other_char_index, other_char_index)
end)
return (count == 1 and text or '')
end
local zh_link_impl = nil
local function zh_link(...)
if zh_link_impl == nil then
zh_link_impl = require("Module:zh/link")
end
return zh_link_impl.link(unpack(arg))
end
function export.make(frame)
local params = {
[1] = { list = true, allow_holes = true, allow_empty = true },
["s"] = { list = true },
["t"] = { list = true },
["ss"] = {}, ["ns"] = {}, ["alt"] = {}, ["type"] = {}, ["delink"] = {}, ["lit"] = {}, ["note"] = {}, ["gloss"] = {}, ["align"] = {}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local comp_type = args["type"]
local s, t = {}, {}
local annotation = {}
local pagename = mw.loadData("Module:headword/data").pagename
local current_title = mw.title.getCurrentTitle()
local content = current_title:getContent()
if not frame:getParent().args["t"] then
insert(t, 1, current_title.subpageText)
end
for i = 1, #args.s do
if (#args.s == 1) and pagename == args.s[i] then error('Redundant text in |s=.') end
insert(s, args.s[i])
end
for i = 1, #args.t do
if (#args.t == 1) and pagename == args.t[i] then error('Redundant text in |t=.') end
insert(t, args.t[i])
end
local t1 = t[1]
local t1_len = len(t1)
-- temp tracking
if #s == 0 and require("Module:zh").ts(t1) ~= t1 then
require('Module:debug').track('zh-forms/entry possibly missing a simplified form')
end
if #t == 1 then
local to_variant = export.change_to_variant(t1)
if (to_variant ~= '') then
-- automatically generate a |t2=
insert(t, export.change_to_variant(t1))
elseif find(t1, ('[%s]'):format(m_data.chars_variant_both .. m_data.chars_variant_one)) then
require('Module:debug').track('zh-forms/entry possibly missing a variant form')
end
end
s.name = "simp"
t.name = "trad"
if #t ~= 1 and #s == 0 then
insert(s, t1)
end
local function asterisk(term, iscomp)
if iscomp and t1_len > 1 then return "" end
if term == current_title.subpageText or not (new_title(term) or {}).exists then return "" end
local content = new_title(term):getContent()
content = gsub(content, "zh%-pron", "Ꙁ")
content = gsub(content, "zh%-see", "Ꙁ")
content = gsub(content, "[^Ꙁ]", "")
return len(content) > 1 and '<sup><span class="explain" title="This form has one or more other meanings.">*</span></sup>' or ''
end
local function var_fmt(length, color)
return '\n|-\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
'em;border: 1px solid #aaa;background: #' .. (color or 'E0FFFF') ..
';font-weight: normal;font-size: smaller;" colspan="2" |'
end
local function char_gap(length, script, last)
return '\n| style="padding: 0.' .. (length > 8 and '3' or '5') ..
'em; background-color:white;' ..
(last and 'border-right: 1px solid #aaa;border' .. (length ~= 1 and '-bottom' or '') .. ': 1px solid #aaa; '
or 'border-bottom: 1px solid #aaa; ') ..
'font-size:x-large" lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '" | '
end
local function header(length, var_count)
return ((length > 3 or var_count * length > 5 or args.align == 'left') and '{|' or '{| class="floatright"') ..
' style="clear: right;margin: 1em 0 1em 1em;border-collapse: collapse;text-align: center"' ..
(length ~= 1 and '\n|-\n! colspan=2|' or '')
end
local function gloss_fmt(word, colspan, length)
return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' ..
(length <= 8 and (40 * word + 40) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1) ..'|'
end
local function form_fmt(text, length, script)
local fmtd_text = {}
for i,value in ipairs(text) do
fmtd_text[i] = links.language_link{ lang = lang.getByCode("zh"), term = value } .. asterisk(value, false)
end
return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') ..
'(<span lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '">' .. concat(fmtd_text, "/") .. '</span>)' ..
(length > 8 and '' or '</span>')) or ''
end
local function char_fmt(text, length, script)
return (#text ~= 1 and (char_gap(length, script) .. concat(text, char_gap(length, script), 1, #text-1)) or '') .. char_gap(length, script, true) .. text[#text]
end
local word_division = {}
local i = 1
local decomposable
if comp_type then
for index in mw.text.gsplit(comp_type, "", true) do
if find(sub(t1, i, i), '[,%-]') then
insert(word_division, { i, i } )
i = i + 1
elseif sub(t1, i, i) == '…' then
insert(word_division, { i, i + 1 } )
i = i + 2
end
insert(word_division, { i, i + index - 1 } )
i = i + index
end
if i - 1 ~= len(gsub(t1, '…+$', '')) and not find(concat(t) .. concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then
error("'type' parameter does not match word length.")
end
else
for i = 1, t1_len do
insert(word_division, { i, i } )
end
decomposable = len(gsub(t1, '…+$', '')) > 2 and true or false
end
local delink = {}
if args["delink"] then
if args["delink"] == "y" then
for del_index, _ in ipairs(word_division) do
delink[del_index] = "yes"
end
else
for position in mw.text.gsplit(args["delink"], ",") do
delink[tonumber(position)] = "yes"
end
end
end
local char_set = { ['simp'] = {}, ['trad'] = {} }
local identity = #s == 0 and {t} or {s,t}
local uncreated = {}
for _, id in ipairs(identity) do
for i, position in ipairs(word_division) do
local char_string = ""
for j = 1, #id do
local word_form = sub(id[j], position[1], position[2])
if not find(char_string, word_form) then
char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form
end
end
if not find(char_string, '[,%-]') then
local hash = {}
for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do
insert(hash, links.language_link{ lang = lang.getByCode("zh"), term = thing } .. asterisk(thing, true))
end
char_string = concat(hash, (delink[i] and "" or "/"))
end
insert(char_set[id.name], char_string)
end
for _, item in ipairs(id) do
if not (new_title(item) or {}).exists and item ~= current_title.subpageText then
insert(uncreated, '"[[' .. item .. ']]"')
end
end
end
local scripts = { [m_scripts.getByCode("Hani"):getCharacters()] = "Hani" , [m_scripts.getByCode("Latn"):getCharacters()] = "Latn", ["0-90-9"] = "Numb", [m_scripts.getByCode("Polyt"):getCharacters()] = "Grek", [m_scripts.getByCode("Bopo"):getCharacters()] = "Bopo" }
local script = {}
for range, script_name in pairs(scripts) do
if find(t1, '[' .. range .. ']') then
insert(script, script_name)
end
end
if find(t1, "([^─…]+)%1") and args['gloss'] ~= '-' and t1_len < 7 then
if gsub(comp_type or "", "1", "") == "" then
insert(annotation, '[[Category:Chinese reduplications]]')
elseif find(t1, "([^…][^…]+)%1") or find(concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then
local evil
for _, component in ipairs(char_set['trad']) do
if len(component) > 1 then
local comp_content = new_title(links.remove_links(component)):getContent() or false
if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then
evil = true
end
end
end
if not evil then insert(annotation, '[[Category:Chinese reduplications]]') end
end
end
insert(annotation, #script > 1 and '[[Category:Chinese terms written in multiple scripts]]' or nil)
insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '[[Category:Chinese entries with potentially decomposable titles]]' or nil)
insert(annotation, (#uncreated > 0 and current_title.nsText == "") and '[[Category:Chinese terms with uncreated forms]]' ..
'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') ..
' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' ..
concat(uncreated, ", ") .. '.)</small>' or nil)
local ss = ""
if args["ss"] then
ss = var_fmt(t1_len, 'FFFFE0') .. '[[w:Second round of simplified Chinese characters|2nd round simp.]]' .. sub(char_gap(t1_len, 'simp', true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ss"], tr = "-" }, current_title.subpageText)
end
local ns = ""
if args["ns"] then
ns = var_fmt(t1_len, 'FFFFE0') .. 'nonstandard simp.' .. sub(char_gap(t1_len, 'simp', true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ns"], tr = "-" }, current_title.subpageText)
end
local altforms = ""
if args["alt"] then
local altform_list = {}
for altform in mw.text.gsplit(args["alt"], ",") do
local altdecomp = mw.text.split(altform, ":")
local altdecomp2 = mw.text.split(altdecomp[1], "-")
local altdecomp3 = altdecomp2[2] and ' <span style="font-size:80%"><i>' .. gsub(altdecomp2[2], "‡", frame:expandTemplate{
title = "Template:zh-historical-dict",
args = { type = "form", nocat = "1" }
}) .. '</i></span>' or ''
insert(altform_list, '<span style="white-space:nowrap;">' ..
zh_link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, current_title.subpageText) ..
altdecomp3 .. '</span>')
end
if #altform_list > 5 then
altforms = '<div class="vsSwitcher" data-toggle-category="Chinese alternative forms">' ..
'<div class="vsShow">' .. concat(altform_list, "<br>", 1, 5) ..
'</div><div class="vsHide">' .. concat(altform_list, "<br>") .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto"> </span></div>'
else
altforms = concat(altform_list, "<br>")
end
altforms = var_fmt(t1_len, 'F0FFE0') .. 'alternative forms' .. sub(char_gap(t1_len, "trad", true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. altforms
end
local anagrams = ""
if t1_len >= 2 and t1_len <= 5 then
local function generate_anagrams(term, n, anagrams, checked)
if n == 0 then
local anagram = concat(term)
if checked[anagram] then
return
end
local title = new_title(anagram)
if not title then
return
end
title = title:getContent()
if title and get_section(title, "Chinese", 2) then
insert(anagrams, anagram)
end
checked[anagram] = true
else
for i = 1, n do
generate_anagrams(term, n - 1, anagrams, checked)
local i = n % 2 == 0 and i or 1
term[i], term[n] = term[n], term[i]
end
end
return anagrams
end
local term, checked = explode(t1), {[t1] = true}
anagrams = generate_anagrams(term, #term, {}, checked)
local anagrams_num = #anagrams
for i = 1, anagrams_num do
anagrams[i] = "<span style=\"white-space:nowrap;\">" ..
zh_link(nil, nil, {anagrams[i], tr = "-"}, current_title.subpageText) ..
"</span>"
end
if anagrams_num == 0 then
anagrams = ""
else
anagrams = concat(anagrams, "<br>")
local label = anagrams_num == 1 and "anagram" or "anagrams"
anagrams = var_fmt(t1_len, 'F0FFE0') .. label .. sub(char_gap(t1_len, "trad", true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. anagrams
end
end
local literal = (args["lit"] or args["note"]) and '\n|-' ..
gloss_fmt(t1_len, #word_division + 2, t1_len) ..
(args["lit"] and '<i>Literally:</i> “' .. args["lit"] ..
(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or ""
local gloss = {}
if args['gloss'] == '-' then
gloss = { gloss_fmt(t1_len * 1.6, #word_division, t1_len) .. '<i>phonetic</i>' }
elseif t1_len == 1 then
gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' }
else
for i, position in ipairs(word_division) do
local character = sub(t1, position[1], position[2])
local gloss_text = args[1][i]
-- Load glosses module if no gloss was supplied and the word is a single character.
if not gloss_text and position[1] == position[2] then
local glosses = mw.loadData("Module:zh/data/glosses")
gloss_text = glosses.glosses[character] or ""
gloss_text = gsub(gloss_text, "{{taxlink|([^{}]+)}}",
function (taxlink_text)
local taxlink_args, argi = {}, 1
for arg in mw.text.gsplit(taxlink_text, "|") do
local arg_split = mw.text.split(arg, "=")
if arg_split[2] then
taxlink_args[arg_split[1]] = arg_split[2]
else
taxlink_args[argi] = (arg ~= "" and arg or nil)
argi = argi + 1
end
end
local frame = mw.getCurrentFrame()
return frame:expandTemplate{
title = 'taxlink',
args = taxlink_args
}
end)
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") and not find(glosses.nonlemma, character) then
require('Module:debug').track('zh-forms/no gloss found for Chinese character')
end
end
if gloss_text == "-" then gloss_text = "''phonetic''" end
if not gloss_text then
gloss_text = ""
end
--[[
To ensure that suffixes are not broken up between lines, like this:
-
ist
]]
if find(gloss_text, "-", nil, true) then
gloss_text = gsub(gloss_text, "^%-", nonbreaking_hyphen)
gloss_text = gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen)
end
if gloss_text == "" and position[2] > position[1] then
local content = new_title(character):getContent() or false
if content then
gloss_text = require("Module:zh/extract").extract_gloss(content, false)
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then
require('Module:debug').track('zh-forms/no gloss found but entry exists')
end
--[[
if not string.match(content, character) then
require('Module:debug').track('zh-forms/compounds not mentioned in derived terms on the component pages')
end
]]--
else
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then
insert(annotation, '[[Category:Chinese redlinks/zh-forms]]')
end
end
end
gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]+;", ";")
gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]*$", "")
gloss_text = gsub(gloss_text, ";+", ";")
if len(gsub(gloss_text, '[^;]', '')) > 2 then
gloss_text = '<div class="vsSwitcher" data-toggle-category="glosses"><div class="vsShow">' ..
match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div><div class="vsHide">' .. gloss_text .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto"> </span></div>'
end
local word_length = match(character, '[,…%-]') and 0 or
(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1))
insert(gloss, gloss_fmt(word_length, 1, t1_len) .. gloss_text)
end
end
if content then
--[[
local applicable_pos = { ["Noun"] = 1, ["Verb"] = 1, ["Adjective"] = 1, ["Adverb"] = 1,
["Definitions"] = 1, ["Pronunciation"] = 1 }
local previous_level = 2
local subheading_wanted
]]--
--[=[
-- Used under headers for Chinese varieties, for instance in [[āu-piah]]
local Chinese_section = string.match(content, "\n==Chinese==\n(.-)\n==[^=]")
if not Chinese_section then
error("No Chinese section found.")
end
--]=]
if t1_len == 1 and not string.match(content, "===Definitions===") then
require('Module:debug').track('zh-forms/no definitions section found')
end
--[[ disable, {{zh-der}} has been deprecated so this expression will always be false
if t1_len == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then
require('Module:debug').track('zh-forms/derived terms probably needing renaming')
end
]]--
--[[ comment out relatively expensive parsing
for equals, heading_text in string.gmatch(content, "\n(%=%=+)([^%=]+)%=%=+") do
local current_level = #equals
if subheading_wanted and current_level ~= previous_level + 1 then
if applicable_pos[heading_text] then
error("The heading \"===Etymology ''n''===\" or \"===Pronunciation ''n''===\" must be followed by a subheading one level lower.")
end
end
previous_level = current_level
subheading_wanted = string.find(heading_text, " [0-9]+") and true or false
end
]]--
end
local simp_note = ""
--This is not a complete list!
if #identity == 1 and find(m_data.chars_unified, '[' .. t1 .. ']') then
simp_note = '<sup><span class="explain" title="Using the same code point' .. (t1_len > 1 and 's' or '') .. ' as the traditional form due to Han unification. Without proper font support, it may be displayed as the same as the traditional form.">#</span></sup>'
char_set['simp'] = char_set['trad']
end
return concat{
header(t1_len, math.max(#t, #s)), concat(gloss, ""),
((#identity == 1 and simp_note == '') and
(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] and [[Traditional Chinese|trad.]]<br>' ..
form_fmt(t, t1_len, 'both') .. char_fmt(char_set['trad'], t1_len, 'both'))
or
var_fmt(t1_len) .. '[[Traditional Chinese|trad.]] ' ..
form_fmt(t, t1_len, 'trad') .. char_fmt(char_set['trad'], t1_len, 'trad') ..
(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] ' .. simp_note ..
form_fmt(#s == 0 and t or s, t1_len, 'simp') .. char_fmt(char_set['simp'], t1_len, 'simp'))
), ss, ns, altforms, anagrams, literal, '\n|}', concat(annotation)
}
end
return export