Module:links/data

From Wiktionary
local encode = mw.text.encode
local u = mw.ustring.char
local data = {}

data.ignore_cap = {
	["ko"] = true,
}

data.phonetic_extraction = {
	["th"] = "Module:th",
	["km"] = "Module:km",
}

data.pos_tags = {
	["a"] = "adjective",
	["adv"] = "adverb",
	["int"] = "interjection",
	["n"] = "noun",
	["pron"] = "pronoun",
	["v"] = "verb",
	["vi"] = "intransitive verb",
	["vt"] = "transitive verb",
	["vti"] = "transitive and intransitive verb",
}

--[[	The "actual title" is the page name with the prefix "Unsupported titles/" removed.
		["displayed_title"] = "actual title"	]]
data.unsupported_titles = {
	[" "] = "Space",
	["# #"] = "Enclosing number signs",
	["#"] = "Number sign",
	["#MeToo"] = "MeToo",
	["#MeTooed"] = "MeTooed",
	["#MeTooing"] = "MeTooing",
	["#MeToos"] = "MeToos",
	["&"] = "Amp",
	[\\_(ツ)_/¯"] = \\ (ツ) /¯",
	["¯_(ツ)_/¯"] = "¯ (ツ) /¯",
	["(^_^)"] = "(^ ^)",
	["*_*"] = "* *",
	["."] = "Full stop",
	[".."] = "Double period",
	["./."] = "Period slash period",
	[": :"] = ": :",
	[":"] = ":",
	[":-{"] = "Colon hyphen left curly bracket",
	[":{"] = "Colon left curly bracket",
	[":|"] = "Colon vertical line",
	["=_="] = "= =",
	["[ ]"] = "Square brackets",
	["["] = "Left square bracket",
	["[…]"] = "Square bracketed ellipsis",
	["[...]"] = "Left square bracket ... right square bracket",
	["[-0-]"] = "Australian Aboriginal Flag emoticon alternative",
	["[citation needed]"] = "Square bracketed citation needed",
	["[-o-]"] = "Australian Aboriginal Flag emoticon",
	["]"] = "Right square bracket",
	["^_^"] = "^ ^",
	["_ _"] = "Underscore space underscore",
	["-_-"] = "- -",
	["_"] = "Underscore",
	["{ }"] = "Curly brackets",
	["{"] = "Left curly bracket",
	["| |"] = "Enclosing vertical lines",
	["|"] = "Vertical line",
	["-||-"] = "Hyphen vertical line vertical line hyphen",
	["||"] = "Vertical line vertical line",
	["}"] = "Right curly bracket",
	["</s>"] = "End s tag",
	["< />"] = "Less than trailing slash greater than",
	["< > </ >"] = "HTML start tag end tag",
	["< >"] = "Enclosing less than greater than",
	["<!-- -->"] = "HTML comment",
	["<-"] = "Less than hyphen",
	["<"] = "Less than",
	["</3"] = "Less than slash three",
	["<\\3"] = "Less than backslash three",
	["<<"] = "Double less than",
	["<<<"] = "Triple less than",
	["<="] = "Less than equal",
	["<>"] = "Less than greater than",
	["<3"] = "Less than three",
	["<g>"] = "g tag",
	["=<"] = "Equal less than",
	["=>"] = "Equal greater than",
	[">"] = "Greater than",
	["->"] = "Hyphen greater than",
	[">_<"] = "Greater than low line less than",
	[">="] = "Greater than equal",
	[">>"] = "Double greater than",
	[">>>"] = "Triple greater than",
	["×_×"] = "× ×",
	["9_9"] = "9 9",
	["C#"] = "C sharp",
	["C|N>K"] = "C through N to K",
	["eq #"] = "eq number sign",
	["f##k"] = "f double number sign k",
	["f##ked"] = "f double number sign ked",
	["f##king"] = "f double number sign king",
	["f##ks"] = "f double number sign ks",
	["hr #"] = "hr number sign",
	["n_n"] = "n n",
	["O_O"] = "O O",
	["O_o"] = "O o",
	["o_O"] = "o O",
	["o_o"] = "o o",
	["snake_case"] = "snake case",
	["T_T"] = "T T",
	["u_u"] = "u u",
	["X_X"] = "X X",
	["x_x"] = "x x",
	["x86_64"] = "x86 64",
	["λοπαδοτεμαχοσελαχογαλεοκρανιολειψανοδριμυποτριμματοσιλφιοκαραβομελιτοκατακεχυμενοκιχλεπικοσσυφοφαττοπεριστεραλεκτρυονοπτοκεφαλλιοκιγκλοπελειολαγῳοσιραιοβαφητραγανοπτερύγων"] = "Ancient Greek dish",
	["о/."] = "о slash dot",
	["ಠ_ಠ"] = "ಠ ಠ",
	["ಥ_ಥ"] = "ಥ ಥ",
	["┬─┬ノ( º _ ºノ)"] = "┬─┬ノ( º ºノ)",
	["กรุงเทพมหานคร อมรรัตนโกสินทร์ มหินทรายุธยา มหาดิลกภพ นพรัตนราชธานีบูรีรมย์ อุดมราชนิเวศน์มหาสถาน อมรพิมานอวตารสถิต สักกะทัตติยวิษณุกรรมประสิทธิ์"] = "Thai name of Bangkok",
	[u(0x1680)] = "Ogham space",
	[u(0x3000)] = "Ideographic space",
	[u(0xFFFD)] = "Replacement character",
}

data.display_change = {
	[" "] = "] [", -- Space
	[u(0x00A0)] = "]" .. u(0x00A0) .. "[", -- No-break space
	[u(0x180E)] = "]" .. u(0x180E) .. "[", -- Mongolian vowel separator
	[u(0x2000)] = "]" .. u(0x2000) .. "[", -- En quad
	[u(0x2001)] = "]" .. u(0x2001) .. "[", -- Em quad
	[u(0x2002)] = "]" .. u(0x2002) .. "[", -- En space
	[u(0x2003)] = "]" .. u(0x2003) .. "[", -- Em space
	[u(0x2004)] = "]" .. u(0x2004) .. "[", -- Three-per-em space
	[u(0x2005)] = "]" .. u(0x2005) .. "[", -- Four-per-em space
	[u(0x2006)] = "]" .. u(0x2006) .. "[", -- Six-per-em space
	[u(0x2007)] = "]" .. u(0x2007) .. "[", -- Figure space
	[u(0x2008)] = "]" .. u(0x2008) .. "[", -- Punctuation space
	[u(0x2009)] = "]" .. u(0x2009) .. "[", -- Thin space
	[u(0x200A)] = "]" .. u(0x200A) .. "[", -- Hair space
	[u(0x202F)] = "]" .. u(0x202F) .. "[", -- Narrow no-break space
	[u(0x205F)] = "]" .. u(0x205F) .. "[", -- Medium mathematical space
	[u(0x3000)] = "]" .. u(0x3000) .. "[", -- Ideographic space
}

-- Valid URI schemes in external links, which therefore have to be escaped if used in entry names (e.g. [[sms:a]]).
local uri_schemes = {
	"bitcoin:",
	"ftp://",
	"ftps://",
	"geo:",
	"git://",
	"gopher://",
	"http://",
	"https://",
	"irc:",
	"ircs:",
	"magnet:",
	"mailto:",
	"mms://",
	"news:",
	"nntp://",
	"redis://",
	"sftp://",
	"sip:",
	"sips:",
	"sms:",
	"ssh://",
	"svn://",
	"tel:",
	"telnet://",
	"urn:",
	"worldwind://",
	"xmpp:",
}
-- Convert into lookup table.
local uri_lookup = {}
for _, scheme in ipairs(uri_schemes) do
	uri_lookup[scheme] = encode(scheme, ":")
end
data.uri_schemes = uri_lookup

return data