Module:zh-forms: difference between revisions

From Wiktionary, the free dictionary
Jump to navigation Jump to search
Content deleted Content added
comment out relatively expensive or pointless parsing for trivial error checking, see comments for further details
appearance tweaks
 
(11 intermediate revisions by 4 users not shown)
Line 4: Line 4:
local m_data = require("Module:zh-forms/data")
local m_data = require("Module:zh-forms/data")
local m_scripts = require("Module:scripts")
local m_scripts = require("Module:scripts")
local m_str_utils = require("Module:string utilities")
local find = mw.ustring.find

local gsub = mw.ustring.gsub
local match = mw.ustring.match
local concat = table.concat
local len = mw.ustring.len
local explode = m_str_utils.explode_utf8
local find = m_str_utils.find
local get_section = require("Module:utilities").get_section
local gsub = m_str_utils.gsub
local insert = table.insert
local len = m_str_utils.len
local match = m_str_utils.match
local new_title = mw.title.new
local sub = m_str_utils.sub
local u = m_str_utils.char

local Hani_chars = m_scripts.getByCode("Hani"):getCharacters()
local nonbreaking_hyphen = u(0x2011)


local sc = {
local sc = {
Line 19: Line 31:
function export.change_to_variant(text)
function export.change_to_variant(text)
local count = 0
local count = 0
local text = mw.ustring.gsub(text, ('([%s])'):format(m_data.chars_variant_both), function(char)
local text = gsub(text, ('([%s])'):format(m_data.chars_variant_both), function(char)
count = count + 1
count = count + 1
if (count > 1) then return '' end -- add |t2= manually, please
if (count > 1) then return '' end -- add |t2= manually, please
local this_char_index = mw.ustring.find(m_data.chars_variant_both, char)
local this_char_index = find(m_data.chars_variant_both, char)
local first_or_second = (this_char_index % 2)
local first_or_second = (this_char_index % 2)
local other_char_index = this_char_index + (first_or_second == 1 and 1 or -1)
local other_char_index = this_char_index + (first_or_second == 1 and 1 or -1)
return mw.ustring.sub(m_data.chars_variant_both, other_char_index, other_char_index)
return sub(m_data.chars_variant_both, other_char_index, other_char_index)
end)
end)
return (count == 1 and text or '')
return (count == 1 and text or '')
Line 49: Line 61:
local s, t = {}, {}
local s, t = {}, {}
local annotation = {}
local annotation = {}
local pagename = mw.loadData("Module:headword/data").pagename
local current_title = mw.title.getCurrentTitle()
local content = current_title:getContent()
if not frame:getParent().args["t"] then
if not frame:getParent().args["t"] then
table.insert(t, 1, mw.title.getCurrentTitle().subpageText)
insert(t, 1, current_title.subpageText)
end
local PAGENAME = mw.title.getCurrentTitle().text
local content = mw.title.new(PAGENAME):getContent()
local function insert_st(set, text)
table.insert(set, text)
end
end
for i = 1, #args.s do
for i = 1, #args.s do
if (#args.s == 1) and PAGENAME == args.s[i] then error('Redundant text in |s=.') end
if (#args.s == 1) and pagename == args.s[i] then error('Redundant text in |s=.') end
insert_st(s, args.s[i])
insert(s, args.s[i])
end
end
for i = 1, #args.t do
for i = 1, #args.t do
if (#args.t == 1) and PAGENAME == args.t[i] then error('Redundant text in |t=.') end
if (#args.t == 1) and pagename == args.t[i] then error('Redundant text in |t=.') end
insert_st(t, args.t[i])
insert(t, args.t[i])
end
end
local t1 = t[1]
local t1_len = len(t1)
-- temp tracking
-- temp tracking
if #s == 0 and require("Module:zh").ts(t[1]) ~= t[1] then
if #s == 0 and require("Module:zh").ts(t1) ~= t1 then
require('Module:debug').track('zh-forms/entry possibly missing a simplified form')
require('Module:debug').track('zh-forms/entry possibly missing a simplified form')
end
end
if #t == 1 then
if #t == 1 then
local to_variant = export.change_to_variant(t[1])
local to_variant = export.change_to_variant(t1)
if (to_variant ~= '') then
if (to_variant ~= '') then
-- automatically generate a |t2=
-- automatically generate a |t2=
insert_st(t, export.change_to_variant(t[1]))
insert(t, export.change_to_variant(t1))
elseif find(t[1], ('[%s]'):format(m_data.chars_variant_both .. m_data.chars_variant_one)) then
elseif find(t1, ('[%s]'):format(m_data.chars_variant_both .. m_data.chars_variant_one)) then
require('Module:debug').track('zh-forms/entry possibly missing a variant form')
require('Module:debug').track('zh-forms/entry possibly missing a variant form')
end
end
Line 86: Line 99:
if #t ~= 1 and #s == 0 then
if #t ~= 1 and #s == 0 then
table.insert(s, t[1])
insert(s, t1)
end
end


local function asterisk(term, iscomp)
local function asterisk(term, iscomp)
if iscomp and len(t[1]) > 1 then return "" end
if iscomp and t1_len > 1 then return "" end
if term == mw.title.getCurrentTitle().subpageText or not (mw.title.new(term) or {}).exists then return "" end
if term == current_title.subpageText or not (new_title(term) or {}).exists then return "" end
local content = mw.title.new(term):getContent()
local content = new_title(term):getContent()
content = gsub(content, "zh%-pron", "Ꙁ")
content = gsub(content, "zh%-pron", "Ꙁ")
content = gsub(content, "zh%-see", "Ꙁ")
content = gsub(content, "zh%-see", "Ꙁ")
content = gsub(content, "[^Ꙁ]", "")
content = gsub(content, "[^Ꙁ]", "")
return mw.ustring.len(content) > 1 and '<sup><span class="explain" title="This form has one or more other meanings.">*</span></sup>' or ''
return len(content) > 1 and '<sup><span class="explain" title="This form has one or more other meanings.">*</span></sup>' or ''
end
end


Line 114: Line 127:
local function header(length, var_count)
local function header(length, var_count)
return ((length > 3 or var_count * length > 5 or args.align == 'left') and '{|' or '{| align=right') ..
return ((length > 3 or var_count * length > 5 or args.align == 'left') and '{|' or '{| class="floatright"') ..
' style="clear: right;margin: 1em;border-collapse: collapse;text-align: center"' ..
' style="clear: right;margin: 1em 0 1em 1em;border-collapse: collapse;text-align: center"' ..
(length ~= 1 and '\n|-\n! colspan=2|' or '')
(length ~= 1 and '\n|-\n! colspan=2|' or '')
end
end
Line 122: Line 135:
return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' ..
'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' ..
(length <= 8 and (30 * word + 30) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1) ..'|'
(length <= 8 and (40 * word + 40) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1) ..'|'
end
end


Line 128: Line 141:
local fmtd_text = {}
local fmtd_text = {}
for i,value in ipairs(text) do
for i,value in ipairs(text) do
fmtd_text[i] = links.language_link({ lang = lang.getByCode("zh"), term = value }) .. asterisk(value, false)
fmtd_text[i] = links.language_link{ lang = lang.getByCode("zh"), term = value } .. asterisk(value, false)
end
end
return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') ..
return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') ..
'(<span lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '">' .. table.concat(fmtd_text, "/") .. '</span>)' ..
'(<span lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '">' .. concat(fmtd_text, "/") .. '</span>)' ..
(length > 8 and '' or '</span>')) or ''
(length > 8 and '' or '</span>')) or ''
end
end
local function char_fmt(text, length, script)
local function char_fmt(text, length, script)
return (#text ~= 1 and (char_gap(length, script) .. table.concat(text, char_gap(length, script), 1, #text-1)) or '') .. char_gap(length, script, true) .. text[#text]
return (#text ~= 1 and (char_gap(length, script) .. concat(text, char_gap(length, script), 1, #text-1)) or '') .. char_gap(length, script, true) .. text[#text]
end
end
local test_word = t[1]
local length = len(test_word)
local word_division = {}
local word_division = {}
local i = 1
local i = 1
Line 147: Line 158:
if comp_type then
if comp_type then
for index in mw.text.gsplit(comp_type, "", true) do
for index in mw.text.gsplit(comp_type, "", true) do
if find(mw.ustring.sub(test_word, i, i), '[,%-]') then
if find(sub(t1, i, i), '[,%-]') then
table.insert(word_division, { i, i } )
insert(word_division, { i, i } )
i = i + 1
i = i + 1
elseif mw.ustring.sub(test_word, i, i) == '…' then
elseif sub(t1, i, i) == '…' then
table.insert(word_division, { i, i + 1 } )
insert(word_division, { i, i + 1 } )
i = i + 2
i = i + 2
end
end
table.insert(word_division, { i, i + index - 1 } )
insert(word_division, { i, i + index - 1 } )
i = i + index
i = i + index
end
end
if i - 1 ~= len(gsub(test_word, '…+$', '')) and not find(table.concat(t) .. table.concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then
if i - 1 ~= len(gsub(t1, '…+$', '')) and not find(concat(t) .. concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then
error("'type' parameter does not match word length.")
error("'type' parameter does not match word length.")
end
end
else
else
for i = 1, length do
for i = 1, t1_len do
table.insert(word_division, { i, i } )
insert(word_division, { i, i } )
end
end
decomposable = len(gsub(test_word, '…+$', '')) > 2 and true or false
decomposable = len(gsub(t1, '…+$', '')) > 2 and true or false
end
end
Line 188: Line 199:
local char_string = ""
local char_string = ""
for j = 1, #id do
for j = 1, #id do
local word_form = mw.ustring.sub(id[j], position[1], position[2])
local word_form = sub(id[j], position[1], position[2])
if not find(char_string, word_form) then
if not find(char_string, word_form) then
char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form
char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form
Line 196: Line 207:
local hash = {}
local hash = {}
for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do
for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do
table.insert(hash, links.language_link({ lang = lang.getByCode("zh"), term = thing }) .. asterisk(thing, true))
insert(hash, links.language_link{ lang = lang.getByCode("zh"), term = thing } .. asterisk(thing, true))
end
end
char_string = table.concat(hash, (delink[i] and "" or "/"))
char_string = concat(hash, (delink[i] and "" or "/"))
end
end
table.insert(char_set[id.name], char_string)
insert(char_set[id.name], char_string)
end
end
for _, item in ipairs(id) do
for _, item in ipairs(id) do
if not (mw.title.new(item) or {}).exists and item ~= mw.title.getCurrentTitle().subpageText then
if not (new_title(item) or {}).exists and item ~= current_title.subpageText then
table.insert(uncreated, '"[[' .. item .. ']]"')
insert(uncreated, '"[[' .. item .. ']]"')
end
end
end
end
end
end
local scripts = { [m_scripts.getByCode("Hani"):getCharacters()] = "Hani" , [m_scripts.getByCode("Latnx"):getCharacters()] = "Latn", ["0-90-9"] = "Numb", [m_scripts.getByCode("Polyt"):getCharacters()] = "Grek", [m_scripts.getByCode("Bopo"):getCharacters()] = "Bopo" }
local scripts = { [m_scripts.getByCode("Hani"):getCharacters()] = "Hani" , [m_scripts.getByCode("Latn"):getCharacters()] = "Latn", ["0-90-9"] = "Numb", [m_scripts.getByCode("Polyt"):getCharacters()] = "Grek", [m_scripts.getByCode("Bopo"):getCharacters()] = "Bopo" }
local script = {}
local script = {}
for range, script_name in pairs(scripts) do
for range, script_name in pairs(scripts) do
if find(test_word, '[' .. range .. ']') then
if find(t1, '[' .. range .. ']') then
table.insert(script, script_name)
insert(script, script_name)
end
end
end
end


if find(t[1], "([^─…]+)%1") and args['gloss'] ~= '-' and len(t[1]) < 7 then
if find(t1, "([^─…]+)%1") and args['gloss'] ~= '-' and t1_len < 7 then
if gsub(comp_type or "", "1", "") == "" then
if gsub(comp_type or "", "1", "") == "" then
table.insert(annotation, '[[Category:Chinese reduplications]]')
insert(annotation, '[[Category:Chinese reduplications]]')
elseif find(t[1], "([^…][^…]+)%1") or find(table.concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then
elseif find(t1, "([^…][^…]+)%1") or find(concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then
local evil
local evil
for _, component in ipairs(char_set['trad']) do
for _, component in ipairs(char_set['trad']) do
if len(component) > 1 then
if len(component) > 1 then
local comp_content = mw.title.new(links.remove_links(component)):getContent() or false
local comp_content = new_title(links.remove_links(component)):getContent() or false
if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then
if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then
evil = true
evil = true
Line 230: Line 241:
end
end
end
end
if not evil then table.insert(annotation, '[[Category:Chinese reduplications]]') end
if not evil then insert(annotation, '[[Category:Chinese reduplications]]') end
end
end
end
end


table.insert(annotation, #script > 1 and '[[Category:Chinese terms written in multiple scripts]]' or nil)
insert(annotation, #script > 1 and '[[Category:Chinese terms written in multiple scripts]]' or nil)
table.insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '[[Category:Chinese entries with potentially decomposable titles]]' or nil)
insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '[[Category:Chinese entries with potentially decomposable titles]]' or nil)
table.insert(annotation, (#uncreated > 0 and mw.title.getCurrentTitle().nsText == "") and '[[Category:Chinese terms with uncreated forms]]' ..
insert(annotation, (#uncreated > 0 and current_title.nsText == "") and '[[Category:Chinese terms with uncreated forms]]' ..
'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') ..
'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') ..
' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' ..
' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' ..
table.concat(uncreated, ", ") .. '.)</small>' or nil)
concat(uncreated, ", ") .. '.)</small>' or nil)
local ss = ""
local ss = ""
if args["ss"] then
if args["ss"] then
ss = var_fmt(length, 'FFFFE0') .. '[[w:Second round of simplified Chinese characters|2nd round simp.]]' .. mw.ustring.sub(char_gap(length, 'simp', true), 1, -45)
ss = var_fmt(t1_len, 'FFFFE0') .. '[[w:Second round of simplified Chinese characters|2nd round simp.]]' .. sub(char_gap(t1_len, 'simp', true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { args["ss"], tr = "-" }, mw.title.getCurrentTitle().subpageText)
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ss"], tr = "-" }, current_title.subpageText)
end
end
local ns = ""
local ns = ""
if args["ns"] then
if args["ns"] then
ns = var_fmt(length, 'FFFFE0') .. 'nonstandard simp.' .. mw.ustring.sub(char_gap(length, 'simp', true), 1, -45)
ns = var_fmt(t1_len, 'FFFFE0') .. 'nonstandard simp.' .. sub(char_gap(t1_len, 'simp', true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { args["ns"], tr = "-" }, mw.title.getCurrentTitle().subpageText)
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ns"], tr = "-" }, current_title.subpageText)
end
end
Line 261: Line 272:
local altdecomp = mw.text.split(altform, ":")
local altdecomp = mw.text.split(altform, ":")
local altdecomp2 = mw.text.split(altdecomp[1], "-")
local altdecomp2 = mw.text.split(altdecomp[1], "-")
altdecomp3 = altdecomp2[2] and ' <span style="font-size:70%"><i>' .. gsub(altdecomp2[2], "‡", frame:expandTemplate{
local altdecomp3 = altdecomp2[2] and ' <span style="font-size:80%"><i>' .. gsub(altdecomp2[2], "‡", frame:expandTemplate{
title = "Template:zh-historical-dict",
title = "Template:zh-historical-dict",
args = { type = "form", nocat = "1" }
args = { type = "form", nocat = "1" }
}) .. '</i></span>' or ''
}) .. '</i></span>' or ''
table.insert(altform_list, '<span style="white-space:nowrap;">' ..
insert(altform_list, '<span style="white-space:nowrap;">' ..
zh_link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, mw.title.getCurrentTitle().subpageText) ..
zh_link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, current_title.subpageText) ..
altdecomp3 .. '</span>')
altdecomp3 .. '</span>')
end
end
if #altform_list > 5 then
if #altform_list > 5 then
altforms = '<div class="vsSwitcher" data-toggle-category="Chinese alternative forms"><span class="vsToggleElement">&nbsp;</span>' ..
altforms = '<div class="vsSwitcher" data-toggle-category="Chinese alternative forms">' ..
'<div class="vsShow">' .. table.concat(altform_list, "<br>", 1, 5) ..
'<div class="vsShow">' .. concat(altform_list, "<br>", 1, 5) ..
'</div><div class="vsHide">' .. table.concat(altform_list, "<br>") .. '</div></div>'
'</div><div class="vsHide">' .. concat(altform_list, "<br>") .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto">&nbsp;</span></div>'
else
else
altforms = table.concat(altform_list, "<br>")
altforms = concat(altform_list, "<br>")
end
end
altforms = var_fmt(length, 'F0FFE0') .. 'alternative forms' .. mw.ustring.sub(char_gap(length, "trad", true), 1, -45)
altforms = var_fmt(t1_len, 'F0FFE0') .. 'alternative forms' .. sub(char_gap(t1_len, "trad", true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. altforms
.. ' colspan="' .. #word_division .. '"|' .. altforms
end
end
local anagram = ""
local anagrams = ""
if len(t[1]) == 2 and not find(t[1], "(.)%1") then
if t1_len >= 2 and t1_len <= 5 then
local anagram_link = mw.ustring.sub(t[1], 2, 2) .. mw.ustring.sub(t[1], 1, 1)
local function generate_anagrams(term, n, anagrams, checked)
local anagram_content = mw.title.new(anagram_link):getContent() or false
if n == 0 then
if anagram_content and find(anagram_content, "==Chinese==") then
local anagram = concat(term)
anagram = var_fmt(length, 'F0FFE0') .. 'anagram' .. mw.ustring.sub(char_gap(length, 'trad', true), 1, -45)
if checked[anagram] then
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
return
.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { anagram_link, tr = "-" }, mw.title.getCurrentTitle().subpageText)
end
local title = new_title(anagram)
if not title then
return
end
title = title:getContent()
if title and get_section(title, "Chinese", 2) then
insert(anagrams, anagram)
end
checked[anagram] = true
else
for i = 1, n do
generate_anagrams(term, n - 1, anagrams, checked)
local i = n % 2 == 0 and i or 1
term[i], term[n] = term[n], term[i]
end
end
return anagrams
end
local term, checked = explode(t1), {[t1] = true}
anagrams = generate_anagrams(term, #term, {}, checked)
local anagrams_num = #anagrams
for i = 1, anagrams_num do
anagrams[i] = "<span style=\"white-space:nowrap;\">" ..
zh_link(nil, nil, {anagrams[i], tr = "-"}, current_title.subpageText) ..
"</span>"
end
if anagrams_num == 0 then
anagrams = ""
else
anagrams = concat(anagrams, "<br>")
local label = anagrams_num == 1 and "anagram" or "anagrams"
anagrams = var_fmt(t1_len, 'F0FFE0') .. label .. sub(char_gap(t1_len, "trad", true), 1, -45)
.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. anagrams
end
end
end
end
local literal = (args["lit"] or args["note"]) and '\n|-' ..
local literal = (args["lit"] or args["note"]) and '\n|-' ..
gloss_fmt(length, #word_division + 2, length) ..
gloss_fmt(t1_len, #word_division + 2, t1_len) ..
(args["lit"] and '<i>Literally:</i> “' .. args["lit"] ..
(args["lit"] and '<i>Literally:</i> “' .. args["lit"] ..
(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or ""
(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or ""
Line 299: Line 348:
local gloss = {}
local gloss = {}
if args['gloss'] == '-' then
if args['gloss'] == '-' then
gloss = { gloss_fmt(length * 1.6, #word_division, length) .. '<i>phonetic</i>' }
gloss = { gloss_fmt(t1_len * 1.6, #word_division, t1_len) .. '<i>phonetic</i>' }
elseif length == 1 then
elseif t1_len == 1 then
gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' }
gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' }
else
else
for i, position in ipairs(word_division) do
for i, position in ipairs(word_division) do
local character = mw.ustring.sub(t[1], position[1], position[2])
local character = sub(t1, position[1], position[2])
local gloss_text = args[1][i]
local gloss_text = args[1][i]
Line 329: Line 378:
}
}
end)
end)
if gloss_text == "" and find(character, "^[いち-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩]+$") and not find(glosses.nonlemma, character) then
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") and not find(glosses.nonlemma, character) then
require('Module:debug').track('zh-forms/no gloss found for Chinese character')
require('Module:debug').track('zh-forms/no gloss found for Chinese character')
end
end
Line 344: Line 393:
]]
]]
if find(gloss_text, "-", nil, true) then
if find(gloss_text, "-", nil, true) then
gloss_text = gsub(gloss_text, "^%-", nonbreaking_hyphen)
local nonbreaking_hyphen = mw.ustring.char(0x2011)
gloss_text = mw.ustring.gsub(gloss_text, "^%-", nonbreaking_hyphen)
gloss_text = gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen)
gloss_text = mw.ustring.gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen)
end
end
if gloss_text == "" and position[2] > position[1] then
if gloss_text == "" and position[2] > position[1] then
local senses = {}
local content = new_title(character):getContent() or false
local content = mw.title.new(character):getContent() or false
if content then
if content then
gloss_text = require("Module:zh/extract").extract_gloss(content, false)
gloss_text = require("Module:zh/extract").extract_gloss(content, false)
if gloss_text == "" and find(character, "^[いち-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩]+$") then
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then
require('Module:debug').track('zh-forms/no gloss found but entry exists')
require('Module:debug').track('zh-forms/no gloss found but entry exists')
end
end
Line 363: Line 410:
]]--
]]--
else
else
if gloss_text == "" and find(character, "^[いち-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩]+$") then
if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then
table.insert(annotation, '[[Category:Chinese redlinks/zh-forms]]')
insert(annotation, '[[Category:Chinese redlinks/zh-forms]]')
end
end
end
end
Line 372: Line 419:
gloss_text = gsub(gloss_text, ";+", ";")
gloss_text = gsub(gloss_text, ";+", ";")
if len(gsub(gloss_text, '[^;]', '')) > 2 then
if len(gsub(gloss_text, '[^;]', '')) > 2 then
gloss_text = '<div class="vsSwitcher" data-toggle-category="glosses"><span class="vsToggleElement">&nbsp;</span><div class="vsShow">' ..
gloss_text = '<div class="vsSwitcher" data-toggle-category="glosses"><div class="vsShow">' ..
match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div><div class="vsHide">' .. gloss_text .. '</div></div>'
match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div><div class="vsHide">' .. gloss_text .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto">&nbsp;</span></div>'
end
end
local word_length = match(character, '[,…%-]') and 0 or
local word_length = match(character, '[,…%-]') and 0 or
(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1))
(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1))
table.insert(gloss, gloss_fmt(word_length, 1, length) .. gloss_text)
insert(gloss, gloss_fmt(word_length, 1, t1_len) .. gloss_text)
end
end
end
end
Line 395: Line 442:
end
end
--]=]
--]=]
if length == 1 and not string.match(content, "===Definitions===") then
if t1_len == 1 and not string.match(content, "===Definitions===") then
require('Module:debug').track('zh-forms/no definitions section found')
require('Module:debug').track('zh-forms/no definitions section found')
end
end
--[[ disable, {{zh-der}} has been deprecated so this expression will always be false
--[[ disable, {{zh-der}} has been deprecated so this expression will always be false
if length == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then
if t1_len == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then
require('Module:debug').track('zh-forms/derived terms probably needing renaming')
require('Module:debug').track('zh-forms/derived terms probably needing renaming')
end
end
Line 418: Line 465:
local simp_note = ""
local simp_note = ""
--This is not a complete list!
--This is not a complete list!
if #identity == 1 and find(m_data.chars_unified, '[' .. test_word .. ']') then
if #identity == 1 and find(m_data.chars_unified, '[' .. t1 .. ']') then
simp_note = '<sup><span class="explain" title="Using the same code point' .. (length>1 and 's' or '') .. ' as the traditional form due to Han unification. Without proper font support, it may be displayed as the same as the traditional form.">#</span></sup>'
simp_note = '<sup><span class="explain" title="Using the same code point' .. (t1_len > 1 and 's' or '') .. ' as the traditional form due to Han unification. Without proper font support, it may be displayed as the same as the traditional form.">#</span></sup>'
char_set['simp'] = char_set['trad']
char_set['simp'] = char_set['trad']
end
end
return table.concat{
return concat{
header(length, math.max(#t, #s)), table.concat(gloss, ""),
header(t1_len, math.max(#t, #s)), concat(gloss, ""),
((#identity == 1 and simp_note == '') and
((#identity == 1 and simp_note == '') and
(var_fmt(length) .. '[[Simplified Chinese|simp.]] and [[Traditional Chinese|trad.]]<br>' ..
(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] and [[Traditional Chinese|trad.]]<br>' ..
form_fmt(t, length, 'both') .. char_fmt(char_set['trad'], length, 'both'))
form_fmt(t, t1_len, 'both') .. char_fmt(char_set['trad'], t1_len, 'both'))
or
or
var_fmt(length) .. '[[Traditional Chinese|trad.]] ' ..
var_fmt(t1_len) .. '[[Traditional Chinese|trad.]] ' ..
form_fmt(t, length, 'trad') .. char_fmt(char_set['trad'], length, 'trad') ..
form_fmt(t, t1_len, 'trad') .. char_fmt(char_set['trad'], t1_len, 'trad') ..


(var_fmt(length) .. '[[Simplified Chinese|simp.]] ' .. simp_note ..
(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] ' .. simp_note ..
form_fmt(#s == 0 and t or s, length, 'simp') .. char_fmt(char_set['simp'], length, 'simp'))
form_fmt(#s == 0 and t or s, t1_len, 'simp') .. char_fmt(char_set['simp'], t1_len, 'simp'))


), ss, ns, altforms, anagram, literal, '\n|}', table.concat(annotation)
), ss, ns, altforms, anagrams, literal, '\n|}', concat(annotation)
}
}
end
end

Latest revision as of 21:03, 19 June 2024

New Chinese Character box, used by {{zh-forms}}.


local export = {}
local links = require("Module:links")
local lang = require("Module:languages")
local m_data = require("Module:zh-forms/data")
local m_scripts = require("Module:scripts")
local m_str_utils = require("Module:string utilities")

local concat = table.concat
local explode = m_str_utils.explode_utf8
local find = m_str_utils.find
local get_section = require("Module:utilities").get_section
local gsub = m_str_utils.gsub
local insert = table.insert
local len = m_str_utils.len
local match = m_str_utils.match
local new_title = mw.title.new
local sub = m_str_utils.sub
local u = m_str_utils.char

local Hani_chars = m_scripts.getByCode("Hani"):getCharacters()
local nonbreaking_hyphen = u(0x2011)

local sc = {
	["trad"] = "Hant",
	["simp"] = "Hans",
	["both"] = "Hani",
}

-- Change one variant character to another variant character.
-- Currently, only do so once. Return false if there is more than one character to change, or if there is no change.
function export.change_to_variant(text)
	local count = 0
	local text = gsub(text, ('([%s])'):format(m_data.chars_variant_both), function(char)
		count = count + 1
		if (count > 1) then return '' end -- add |t2= manually, please
		local this_char_index = find(m_data.chars_variant_both, char)
		local first_or_second = (this_char_index % 2)
		local other_char_index = this_char_index + (first_or_second == 1 and 1 or -1)
		return sub(m_data.chars_variant_both, other_char_index, other_char_index)
	end)
	return (count == 1 and text or '')
end

local zh_link_impl = nil
local function zh_link(...)
	if zh_link_impl == nil then
		zh_link_impl = require("Module:zh/link")
	end
	return zh_link_impl.link(unpack(arg))
end

function export.make(frame)
	local params = {
		[1] = { list = true, allow_holes = true, allow_empty = true },
		["s"] = { list = true },
		["t"] = { list = true },
		["ss"] = {}, ["ns"] = {}, ["alt"] = {}, ["type"] = {}, ["delink"] = {}, ["lit"] = {}, ["note"] = {}, ["gloss"] = {}, ["align"] = {}
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local comp_type = args["type"]
	local s, t = {}, {}
	local annotation = {}
	
	local pagename = mw.loadData("Module:headword/data").pagename
	local current_title = mw.title.getCurrentTitle()
	local content = current_title:getContent()
	
	if not frame:getParent().args["t"] then
		insert(t, 1, current_title.subpageText)
	end
	
	for i = 1, #args.s do
		if (#args.s == 1) and pagename == args.s[i] then error('Redundant text in |s=.') end
		insert(s, args.s[i])
	end
	for i = 1, #args.t do
		if (#args.t == 1) and pagename == args.t[i] then error('Redundant text in |t=.') end
		insert(t, args.t[i])
	end
	
	local t1 = t[1]
	local t1_len = len(t1)
	
	-- temp tracking
	if #s == 0 and require("Module:zh").ts(t1) ~= t1 then
		require('Module:debug').track('zh-forms/entry possibly missing a simplified form')
	end
	if #t == 1 then
		local to_variant = export.change_to_variant(t1)
		if (to_variant ~= '') then
			-- automatically generate a |t2=
			insert(t, export.change_to_variant(t1))
		elseif find(t1, ('[%s]'):format(m_data.chars_variant_both .. m_data.chars_variant_one)) then
			require('Module:debug').track('zh-forms/entry possibly missing a variant form')
		end
	end
	s.name = "simp"
	t.name = "trad"
	
	if #t ~= 1 and #s == 0 then
		insert(s, t1)
	end

	local function asterisk(term, iscomp)
		if iscomp and t1_len > 1 then return "" end
		if term == current_title.subpageText or not (new_title(term) or {}).exists then return "" end
		local content = new_title(term):getContent()
		content = gsub(content, "zh%-pron", "Ꙁ")
		content = gsub(content, "zh%-see", "Ꙁ")
		content = gsub(content, "[^Ꙁ]", "")
		return len(content) > 1 and '<sup><span class="explain" title="This form has one or more other meanings.">*</span></sup>' or ''
	end

	local function var_fmt(length, color)
		return '\n|-\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
			'em;border: 1px solid #aaa;background: #' .. (color or 'E0FFFF') ..
			';font-weight: normal;font-size: smaller;" colspan="2" |'
	end
	
	local function char_gap(length, script, last)
		return '\n| style="padding: 0.' .. (length > 8 and '3' or '5') ..
			'em; background-color:white;' ..
			(last and 'border-right: 1px solid #aaa;border' .. (length ~= 1 and '-bottom' or '') .. ': 1px solid #aaa; '
				or 'border-bottom: 1px solid #aaa; ') ..
			'font-size:x-large" lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '" | '
	end
	
	local function header(length, var_count)
		return ((length > 3 or var_count * length > 5 or args.align == 'left') and '{|' or '{| class="floatright"') .. 
			' style="clear: right;margin: 1em 0 1em 1em;border-collapse: collapse;text-align: center"' ..
			(length ~= 1 and '\n|-\n! colspan=2|' or '')
	end
	
	local function gloss_fmt(word, colspan, length)
		return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
			'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' ..
			(length <= 8 and (40 * word + 40) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1)  ..'|'
	end

	local function form_fmt(text, length, script)
		local fmtd_text = {}
		for i,value in ipairs(text) do
			fmtd_text[i] = links.language_link{ lang = lang.getByCode("zh"), term = value } .. asterisk(value, false)
		end
		return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') ..
			'(<span lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '">' .. concat(fmtd_text, "/") .. '</span>)' ..
			(length > 8 and '' or '</span>')) or ''
	end
	
	local function char_fmt(text, length, script)
		return (#text ~= 1 and (char_gap(length, script) .. concat(text, char_gap(length, script), 1, #text-1)) or '') .. char_gap(length, script, true) .. text[#text]
	end
	
	local word_division = {}
	local i = 1
	local decomposable

	if comp_type then
		for index in mw.text.gsplit(comp_type, "", true) do
			if find(sub(t1, i, i), '[,%-]') then
				insert(word_division, { i, i } )
				i = i + 1
			elseif sub(t1, i, i) == '…' then
				insert(word_division, { i, i + 1 } )
				i = i + 2
			end
			insert(word_division, { i, i + index - 1 } )
			i = i + index
		end
		if i - 1 ~= len(gsub(t1, '…+$', '')) and not find(concat(t) .. concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then
			error("'type' parameter does not match word length.")
		end
	else
		for i = 1, t1_len do
			insert(word_division, { i, i } )
		end
		decomposable = len(gsub(t1, '…+$', '')) > 2 and true or false
	end
	
	local delink = {}
	if args["delink"] then
		if args["delink"] == "y" then
			for del_index, _ in ipairs(word_division) do
				delink[del_index] = "yes"
			end
		else
			for position in mw.text.gsplit(args["delink"], ",") do
				delink[tonumber(position)] = "yes"
			end
		end
	end
	
	local char_set = { ['simp'] = {}, ['trad'] = {} }
	local identity = #s == 0 and {t} or {s,t}
	local uncreated = {}
	
	for _, id in ipairs(identity) do
		for i, position in ipairs(word_division) do
			local char_string = ""
			for j = 1, #id do
				local word_form = sub(id[j], position[1], position[2])
				if not find(char_string, word_form) then
					char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form
				end
			end
			if not find(char_string, '[,%-]') then
				local hash = {}
				for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do
					insert(hash, links.language_link{ lang = lang.getByCode("zh"), term = thing } .. asterisk(thing, true))
				end
				char_string = concat(hash, (delink[i] and "" or "/"))
			end
			insert(char_set[id.name], char_string)
		end
		for _, item in ipairs(id) do
			if not (new_title(item) or {}).exists and item ~= current_title.subpageText then
				insert(uncreated, '"[[' .. item .. ']]"')
			end
		end
	end
	
	local scripts = { [m_scripts.getByCode("Hani"):getCharacters()] = "Hani" , [m_scripts.getByCode("Latn"):getCharacters()] = "Latn", ["0-90-9"] = "Numb", [m_scripts.getByCode("Polyt"):getCharacters()] = "Grek", [m_scripts.getByCode("Bopo"):getCharacters()] = "Bopo" }
	local script = {}
	for range, script_name in pairs(scripts) do
		if find(t1, '[' .. range .. ']') then
			insert(script, script_name)
		end
	end

	if find(t1, "([^─…]+)%1") and args['gloss'] ~= '-' and t1_len < 7 then
		if gsub(comp_type or "", "1", "") == "" then
			insert(annotation, '[[Category:Chinese reduplications]]')
		elseif find(t1, "([^…][^…]+)%1") or find(concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then
			local evil
			for _, component in ipairs(char_set['trad']) do
				if len(component) > 1 then
					local comp_content = new_title(links.remove_links(component)):getContent() or false
					if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then
						evil = true
					end
				end
			end
			if not evil then insert(annotation, '[[Category:Chinese reduplications]]') end
		end
	end

	insert(annotation, #script > 1 and '[[Category:Chinese terms written in multiple scripts]]' or nil)
	insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '[[Category:Chinese entries with potentially decomposable titles]]' or nil)
	insert(annotation, (#uncreated > 0 and current_title.nsText == "") and '[[Category:Chinese terms with uncreated forms]]' .. 
		'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') ..
		' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' .. 
		concat(uncreated, ", ") .. '.)</small>' or nil)
	
	local ss = ""
	if args["ss"] then
		ss = var_fmt(t1_len, 'FFFFE0') .. '[[w:Second round of simplified Chinese characters|2nd round simp.]]' .. sub(char_gap(t1_len, 'simp', true), 1, -45)
			.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
			.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ss"], tr = "-" }, current_title.subpageText)
	end
	
	local ns = ""
	if args["ns"] then
		ns = var_fmt(t1_len, 'FFFFE0') .. 'nonstandard simp.' .. sub(char_gap(t1_len, 'simp', true), 1, -45)
			.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
			.. ' colspan="' .. #word_division .. '"|' .. zh_link(nil, nil, { "*" .. args["ns"], tr = "-" }, current_title.subpageText)
	end
	
	local altforms = ""
	if args["alt"] then
		local altform_list = {}
		for altform in mw.text.gsplit(args["alt"], ",") do
			local altdecomp = mw.text.split(altform, ":")
			local altdecomp2 = mw.text.split(altdecomp[1], "-")
			local altdecomp3 = altdecomp2[2] and  ' <span style="font-size:80%"><i>' .. gsub(altdecomp2[2], "‡", frame:expandTemplate{
					title = "Template:zh-historical-dict",
					args = { type = "form", nocat = "1" }
				}) .. '</i></span>' or ''
			insert(altform_list, '<span style="white-space:nowrap;">' ..
				zh_link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, current_title.subpageText) .. 
				altdecomp3 .. '</span>')
		end
		if #altform_list > 5 then
			altforms = '<div class="vsSwitcher" data-toggle-category="Chinese alternative forms">' ..
				'<div class="vsShow">' .. concat(altform_list, "<br>", 1, 5) ..
				'</div><div class="vsHide">' .. concat(altform_list, "<br>") .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto">&nbsp;</span></div>'
		else
			altforms = concat(altform_list, "<br>")
		end
		altforms = var_fmt(t1_len, 'F0FFE0') .. 'alternative forms' .. sub(char_gap(t1_len, "trad", true), 1, -45)
			.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
			.. ' colspan="' .. #word_division .. '"|' .. altforms
	end
	
	local anagrams = ""
	if t1_len >= 2 and t1_len <= 5 then
		
		local function generate_anagrams(term, n, anagrams, checked)
			if n == 0 then
				local anagram = concat(term)
				if checked[anagram] then
					return
				end
				local title = new_title(anagram)
				if not title then
					return
				end
				title = title:getContent()
				if title and get_section(title, "Chinese", 2) then
					insert(anagrams, anagram)
				end
				checked[anagram] = true
			else
				for i = 1, n do
					generate_anagrams(term, n - 1, anagrams, checked)
					local i = n % 2 == 0 and i or 1
					term[i], term[n] = term[n], term[i]
				end
			end
			return anagrams
		end
		
		local term, checked = explode(t1), {[t1] = true}
		anagrams = generate_anagrams(term, #term, {}, checked)
		local anagrams_num = #anagrams
		
		for i = 1, anagrams_num do
			anagrams[i] = "<span style=\"white-space:nowrap;\">" ..
				zh_link(nil, nil, {anagrams[i], tr = "-"}, current_title.subpageText) ..
				"</span>"
		end
		
		if anagrams_num == 0 then
			anagrams = ""
		else
			anagrams = concat(anagrams, "<br>")
			local label = anagrams_num == 1 and "anagram" or "anagrams"
			anagrams = var_fmt(t1_len, 'F0FFE0') .. label .. sub(char_gap(t1_len, "trad", true), 1, -45)
				.. '; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
				.. ' colspan="' .. #word_division .. '"|' .. anagrams
		end
	end
	
	local literal = (args["lit"] or args["note"]) and '\n|-' .. 
		gloss_fmt(t1_len, #word_division + 2, t1_len) .. 
		(args["lit"] and '<i>Literally:</i> “' .. args["lit"] .. 
		(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or ""
	
	local gloss = {}
	if args['gloss'] == '-' then
		gloss = { gloss_fmt(t1_len * 1.6, #word_division, t1_len) .. '<i>phonetic</i>' }
	elseif t1_len == 1 then
		gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' }
	else
		for i, position in ipairs(word_division) do
			local character = sub(t1, position[1], position[2])
			
			local gloss_text = args[1][i]
			-- Load glosses module if no gloss was supplied and the word is a single character.
			if not gloss_text and position[1] == position[2] then
				local glosses = mw.loadData("Module:zh/data/glosses")
				gloss_text = glosses.glosses[character] or ""
				gloss_text = gsub(gloss_text, "{{taxlink|([^{}]+)}}",
					function (taxlink_text)
						local taxlink_args, argi = {}, 1
						for arg in mw.text.gsplit(taxlink_text, "|") do
							local arg_split = mw.text.split(arg, "=")
							if arg_split[2] then
								taxlink_args[arg_split[1]] = arg_split[2]
							else
								taxlink_args[argi] = (arg ~= "" and arg or nil)
								argi = argi + 1
							end
						end
						local frame = mw.getCurrentFrame()
						return frame:expandTemplate{
							title = 'taxlink',
							args = taxlink_args
						}
					end)
				if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") and not find(glosses.nonlemma, character) then
					require('Module:debug').track('zh-forms/no gloss found for Chinese character')
				end
			end
			if gloss_text == "-" then gloss_text = "''phonetic''" end
			if not gloss_text then
				gloss_text = ""
			end
			
			--[[
				To ensure that suffixes are not broken up between lines, like this:
				-
				ist
			]]
			if find(gloss_text, "-", nil, true) then
				gloss_text = gsub(gloss_text, "^%-", nonbreaking_hyphen)
				gloss_text = gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen)
			end
		
			if gloss_text == "" and position[2] > position[1] then
				local content = new_title(character):getContent() or false
				if content then
					gloss_text = require("Module:zh/extract").extract_gloss(content, false)
					if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then
						require('Module:debug').track('zh-forms/no gloss found but entry exists')
					end
					--[[
					if not string.match(content, character) then
						require('Module:debug').track('zh-forms/compounds not mentioned in derived terms on the component pages')
					end
					]]--
				else
					if gloss_text == "" and find(character, "^[" .. Hani_chars .. "]+$") then
						insert(annotation, '[[Category:Chinese redlinks/zh-forms]]')
					end
				end
			end
			gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]+;", ";")
			gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]*$", "")
			gloss_text = gsub(gloss_text, ";+", ";")
			if len(gsub(gloss_text, '[^;]', '')) > 2 then
				gloss_text = '<div class="vsSwitcher" data-toggle-category="glosses"><div class="vsShow">' ..
					match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div><div class="vsHide">' .. gloss_text .. '</div><span class="vsToggleElement" style="display:block;width:fit-content;margin:auto">&nbsp;</span></div>'
			end
			local word_length = match(character, '[,…%-]') and 0 or 
				(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1))
			insert(gloss, gloss_fmt(word_length, 1, t1_len) .. gloss_text)
		end
	end
	
	if content then
		--[[
		local applicable_pos = { ["Noun"] = 1, ["Verb"] = 1, ["Adjective"] = 1, ["Adverb"] = 1, 
			["Definitions"] = 1, ["Pronunciation"] = 1 }
		local previous_level = 2
		local subheading_wanted
		]]--
		--[=[
		-- Used under headers for Chinese varieties, for instance in [[āu-piah]]
		local Chinese_section = string.match(content, "\n==Chinese==\n(.-)\n==[^=]")
		if not Chinese_section then
			error("No Chinese section found.")
		end
		--]=]
		if t1_len == 1 and not string.match(content, "===Definitions===") then
			require('Module:debug').track('zh-forms/no definitions section found')
		end
		--[[ disable, {{zh-der}} has been deprecated so this expression will always be false
		if t1_len == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then
			require('Module:debug').track('zh-forms/derived terms probably needing renaming')
		end
		]]--
		--[[ comment out relatively expensive parsing
		for equals, heading_text in string.gmatch(content, "\n(%=%=+)([^%=]+)%=%=+") do
			local current_level = #equals
			if subheading_wanted and current_level ~= previous_level + 1 then
				if applicable_pos[heading_text] then
					error("The heading \"===Etymology ''n''===\" or \"===Pronunciation ''n''===\" must be followed by a subheading one level lower.")
				end
			end
			previous_level = current_level
			subheading_wanted = string.find(heading_text, " [0-9]+") and true or false
		end
		]]--
	end
	
	local simp_note = ""
	--This is not a complete list!
	
	if #identity == 1 and find(m_data.chars_unified, '[' .. t1 .. ']') then
		simp_note = '<sup><span class="explain" title="Using the same code point' .. (t1_len > 1 and 's' or '') .. ' as the traditional form due to Han unification. Without proper font support, it may be displayed as the same as the traditional form.">#</span></sup>'
		char_set['simp'] = char_set['trad']
	end
	
	return concat{
		header(t1_len, math.max(#t, #s)), concat(gloss, ""),
		((#identity == 1 and simp_note == '') and
			(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] and [[Traditional Chinese|trad.]]<br>' .. 
				form_fmt(t, t1_len, 'both') .. char_fmt(char_set['trad'], t1_len, 'both'))
		or
			 var_fmt(t1_len) .. '[[Traditional Chinese|trad.]] ' .. 
			 	form_fmt(t, t1_len, 'trad') .. char_fmt(char_set['trad'], t1_len, 'trad') .. 

			(var_fmt(t1_len) .. '[[Simplified Chinese|simp.]] ' .. simp_note ..
				form_fmt(#s == 0 and t or s, t1_len, 'simp') .. char_fmt(char_set['simp'], t1_len, 'simp'))

		), ss, ns, altforms, anagrams, literal, '\n|}', concat(annotation)
	}
end

return export