Module:Interwiki from P460

Lua
CodeDiscussionLinksLink count SubpagesDocumentationTestsResultsSandboxLive code All modules

Documentation for this module may be created at Module:Interwiki from P460/doc

Code

-- Original at https://www.wikidata.org/wiki/Module:Interwiki_from_P460
-- To update, please edit there first, then copy to your wiki.

-- Adapted from https://pl.wikipedia.org/wiki/Modu%C5%82:%C5%81atki by [[pl:User:Paweł Ziemian]]

-- Use with:
--	{{#invoke:Interwiki from P460|InterwikiP1420}}
--	{{#invoke:Interwiki from P460|InterwikiP1889}} -- uses sitelinks on items used as values with P1889 (different from)
--	{{#invoke:Interwiki from P460|InterwikiP1889fn}} -- uses sitelinks on items used as values with P1889 (different from) if qualified with 'criterion used' and Q27924673
--	{{#invoke:Interwiki from P460|Interwiki}} -- uses sitelinks on items used as values with P460 (same as)
--	{{#invoke:Interwiki from P460|InterwikiName}} -- to be used on talk pages of Wikidata items (Q-elements, properties,  lexemes): generates an inline comma-separated list of links (visible in the page content), rather than metadata for interwiki links (visible only on the navigation sidebar)

local insert = table.insert
local concat = table.concat
local sort = table.sort
local getEntityObject = mw.wikibase.getEntityObject
local currentTitle = mw.title.getCurrentTitle()
local htmldecode = require('Module:HTMLEntities').htmldecode

local m = {}

-- list from [[Module:Lang/data]]
local langlist = {'aa', 'ab', 'ace', 'af', 'ak', 'als', 'am', 'an', 'ang', 'ar', 'arc', 'ary', 'arz', 'as', 'ast', 'av', 'ay', 'az', 'ba', 'bar', 'bat-ltg', 'bat-smg', 'bcl', 'be', 'be-tarask', 'be-x-old', 'bej', 'bg', 'bh', 'bi', 'bjn', 'bm', 'bms', 'bn', 'bo', 'bpy', 'br', 'bs', 'bug', 'bxr', 'c', 'ca', 'cbk-zam', 'cdo', 'ce', 'ceb', 'ch', 'cho', 'chr', 'chy', 'ckb', 'co', 'cr', 'crh', 'crs', 'cs', 'csb', 'cu', 'cv', 'cy', 'd', 'da', 'dag', 'de', 'de-at', 'de-ch', 'diq', 'dsb', 'dv', 'dz', 'ee', 'egl', 'el', 'eml', 'en', 'en-ca', 'en-gb', 'en-us', 'enm', 'eo', 'es', 'ess', 'esu', 'et', 'eu', 'ext', 'fa', 'ff', 'fi', 'fiu-vro', 'fj', 'fo', 'fon', 'fr', 'frp', 'frr', 'fur', 'fy', 'ga', 'gaa', 'gag', 'gan', 'gcr', 'gd', 'gil', 'gl', 'glk', 'gn', 'got', 'grc', 'grc-koi', 'gsw', 'gu', 'gv', 'ha', 'hak', 'haw', 'he', 'hi', 'hif', 'hil', 'ho', 'hr', 'hsb', 'ht', 'hu', 'hy', 'hyw', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'ilo', 'inh', 'io', 'is', 'it', 'iu', 'ja', 'jam', 'jbo', 'jv', 'ka', 'kaa', 'kab', 'kbd', 'kg', 'khw', 'ki', 'kj', 'kk', 'kl', 'km', 'kn', 'ko', 'koi', 'kr', 'krc', 'krj', 'ks', 'ksh', 'ku', 'kv', 'kw', 'ky', 'la', 'lad', 'lb', 'lbe', 'lez', 'lfn', 'lg', 'li', 'lij', 'lld', 'lmo', 'ln', 'lo', 'lt', 'ltg', 'lv', 'lzh', 'lzz', 'map-bms', 'mdf', 'mg', 'mh', 'mhr', 'mi', 'min', 'mk', 'ml', 'mn', 'mo', 'mr', 'mrj', 'ms', 'ms-arab', 'mt', 'mus', 'mwl', 'my', 'myv', 'mzn', 'na', 'nah', 'nan', 'nap', 'nb', 'nds', 'nds-nl', 'ne', 'new', 'ng', 'nl', 'nn', 'no', 'non', 'nov', 'nrf', 'nqo', 'nrm', 'nso', 'nv', 'ny', 'oc', 'om', 'or', 'os', 'pa', 'pag', 'pam', 'pap', 'paw', 'pcd', 'pdc', 'pdt', 'peo', 'pfl', 'pi', 'pih', 'pl', 'pms', 'pnb', 'pnt', 'ps', 'pt', 'pt-br', 'qu', 'qya', 'rgn', 'rki', 'rm', 'rmy', 'rn', 'ro', 'roa-rup', 'roa-tara', 'ru', 'rue', 'rup', 'rw', 'sa', 'sah', 'sat', 'sc', 'scn', 'sco', 'sd', 'se', 'sg', 'sgs', 'sh', 'shi', 'si', 'sid', 'simple', 'sk', 'sl', 'sla', 'sli', 'sm', 'smn', 'sms', 'sn', 'so', 'sq', 'sr', 'sr-cyrl', 'sr-ec', 'sr-el', 'sr-latn', 'srn', 'ss', 'st', 'stq', 'su', 'sv', 'sw', 'syc', 'szl', 'ta', 'te', 'tet', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tok', 'tokipona', 'tpi', 'tr', 'ts', 'tt', 'tt-cyrl', 'tt-latn', 'tum', 'tvl', 'tw', 'ty', 'udm', 'ug', 'uk', 'ur', 'uz', 've', 'vec', 'vep', 'vi', 'vls', 'vo', 'w', 'wa', 'war', 'wbl', 'wo', 'wuu', 'wym', 'xal', 'xh', 'xmf', 'yai', 'yi', 'yo', 'yue', 'za', 'zea', 'zh', 'zh-classical', 'zh-cn', 'zh-hans', 'zh-hant', 'zh-hk', 'zh-min-nan', 'zh-mo', 'zh-simple', 'zh-sg', 'zh-tw', 'zh-wuu', 'zh-yue', 'zu', 'zun'}

local iwprefers = { -- remaps preferred codes for interwiki links
	['bat-smg'] = 'sgs', -- standard BCP47 code is working now, and preferred
	['be-x-old'] = 'be-tarask', -- standard BCP47 code is working now, and preferred
	['bho'] = 'bh', -- both are correct, but Wikimedia assumes that 'bh' refers to just 'bho'
	['bms'] = 'map-bms', -- incorrect/conflicting code 'map-bms' according ISO 639 and BCP47, but still required by Wikimedia
	['commons'] = 'c',
	['de-at'] = 'de', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['de-ch'] = 'de', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['en-ca'] = 'en', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['en-gb'] = 'en', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['en-us'] = 'en', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['egl'] = 'eml', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['fiu-vro'] = 'vro', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['gsw'] = 'als', -- incorrect/conflicting code 'als' according ISO 639 and BCP47, but still required by Wikimedia
	['ms-arab'] = 'ms', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['nb'] = 'no', -- both are correct, but Wikimedia assumes that 'no' refers to just 'nb', separating 'nn' (there are fallbacks between 'nb' and 'nn')
	['nrf'] = 'nrm', -- incorrect/conflicting code 'nrm' according ISO 639 and BCP47, but still required by Wikimedia
	['pt-br'] = 'pt', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['rgn'] = 'eml', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['rki'] = 'my', -- both are correct, but only 'my' is working for now (merged Wikis)
	['roa-rup'] = 'rup', -- standard BCP47 code is working now, and preferred
	['tokipona'] = 'tok', -- standard BCP47 code is working now, and preferred
	['sr-cyrl'] = 'sr', -- both are standard (merged Wikis)
	['sr-ec'] = 'sr', -- legacy code of Wikimedia, 'sr-cyrl' is standard (merged Wikis)
	['sr-el'] = 'sr', -- legacy code of Wikimedia, 'sr-cyrl' is standard (merged Wikis)
	['sr-latn'] = 'sr', -- both are standard (merged Wikis)
	['tt-cyrl'] = 'tt', -- both are standard (merged Wikis)
	['tt-latn'] = 'tt', -- both are standard (merged Wikis)
	['w'] = 'en',
	['wikipedia'] = 'en',
	['zh-classical'] = 'lzh', -- standard BCP47 code is working now, and preferred
	['zh-cn'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-hans'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-hant'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-hk'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-min-nan'] = 'nan', -- standard BCP47 code is working now, and preferred
	['zh-mo'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-simple'] = 'zh', -- only the standard BCP47 code is working
	['zh-sg'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-tw'] = 'zh', -- standard BCP47 code is working now, and preferred (merged Wikis)
	['zh-wuu'] = 'wuu', -- standard BCP47 code is working now, and preferred
	['zh-yue'] = 'yue', -- standard BCP47 code is working now, and preferred
}

local iwsort = { -- iws to be sorted 1st, by remapping them to smaller sort keys
	d = '', c = ' ', en = '0', simple = '1',
}

local knownLanguages = {}
for i = 1, #langlist do
	knownLanguages[langlist[i]] = true
end
knownLanguages['d'] = nil -- disable the local project

function m.completeiw(qid, prop, linking, filterknown)
	local data = getEntityObject(qid)
	if not data then
		return '' -- no data -> category?
	end

	local iws = {}
	local titles = {}
	local addiw = function(iw, title)
		iw = iw:lower():gsub('_', '-')
		if iwprefers[iw] then
			iw = iwprefers[iw]
		end
		if not filterknown or knownLanguages[iw] then
			if not titles[iw] then
				insert(iws, iw)
				titles[iw] = { title }
			else
				insert(titles[iw], title)
			end
		end
	end

	local extractLinks = function(data)
		if data and data.sitelinks then
			for k, v in pairs(data.sitelinks) do
				if k == 'commons' then
					addiw('c', v.title)
				elseif k:sub(-4) == 'wiki' then
					addiw(k:sub(1, -5), v.title)
				end
			end
		end
	end

	extractLinks(data)

	if prop == 'P1420' then
		if data.claims and data.claims.P1420 then
			for _, v in ipairs(data.claims.P1420) do
				if v.mainsnak.snaktype == 'value' then
					local seeid = v.mainsnak.datavalue.value.id
					if seeid then
						local seedata = getEntityObject(seeid)
						extractLinks(seedata)
					end
				end
			end
		end
	elseif prop == 'P1889' then
		if data.claims and data.claims.P1889 then
			for _, v in ipairs(data.claims.P1889) do
				if v.mainsnak.snaktype == 'value' then
					local seeid = v.mainsnak.datavalue.value.id
					if seeid then
						local seedata = getEntityObject(seeid)
						extractLinks(seedata)
					end
				end
			end
		end
	elseif prop == 'P1889P1013Q27924673' then
		if data.claims and data.claims.P1889 then
			for _, v in ipairs(data.claims.P1889) do
				if v.mainsnak.snaktype == 'value' then
					local seeid = v.mainsnak.datavalue.value.id
					if seeid and v.qualifiers and v.qualifiers.P1013 then
						for _, w in ipairs(v.qualifiers.P1013) do
							if w.snaktype == 'value' then
								local qualid = w.datavalue.value.id
								if qualid == 'Q27924673' then
									local seedata = getEntityObject(seeid)
									extractLinks(seedata)
									break
								end
							end
						end
					end
				end
			end
		end
	else
		if prop == 'P460'
		or prop == 'P460P1889B' then
			if data.claims and data.claims.P460 then
				for _, v in ipairs(data.claims.P460) do
					if v.mainsnak.snaktype == 'value' then
						local seeid = v.mainsnak.datavalue.value.id
						if seeid then
							local seedata = getEntityObject(seeid)
							extractLinks(seedata)
						end
					end
				end
			end
		end
		if prop == 'P1889P1013Q23765057'
		or prop == 'P460P1889B' then
			if data.claims and data.claims.P1889 then
				for _, v in ipairs(data.claims.P1889) do
					if v.mainsnak.snaktype == 'value' then
						local seeid = v.mainsnak.datavalue.value.id
						if seeid and v.qualifiers and v.qualifiers.P1013 then
							for _, w in ipairs(v.qualifiers.P1013) do
								if w.snaktype == 'value' then
									local qualid = w.datavalue.value.id
									if qualid == 'Q23765057' then
										local seedata = getEntityObject(seeid)
										extractLinks(seedata)
										break
									end
								end
							end
						end
					end
				end
			end
		end
	end

	if filterknown then
		-- Parse (partially) the page content to detect interwiki links that may still not be in Wikidata
		-- FIXME: may still detect false positives (incomplete parsing) and omit transcluded/generated interwikis 
		local content = htmldecode(
				currentTitle:getContent() -- may be costly
				:gsub('<!%-%-.-%-%->', ''):gsub('<!%-%-.*$', '') -- strip HTML comments
				:gsub('<includeonly%s*>.-</includeonly%s*>', ''):gsub('<includeonly%s*>.*$', '') -- strip "onlyinclude" sections
		)
		for iw, title in content:gmatch('%[%[([%-0-9A-Za-z]+):([^%|%]]+)%|?[^%]]*%]%]') do
			addiw(iw, title)
		end
	end

	sort(iws, function(a, b)
			return (iwsort[a] or a) < (iwsort[b] or b)
		end)
	local column = linking and ':' or ''
	for i = 1, #iws do
		local iw = iws[i]
		local title = titles[iw][1]
        local label = linking and ('|' .. iw .. ':<bdi>' .. title .. '</bdi>') or ''
		iws[i] = '[[' .. column .. iw .. ':' .. title .. label .. ']]'
	end
	return concat(iws, linking and ', ' or '')
end

function m.InterwikiP1420(frame)
	return m.completeiw(frame.args.id, 'P1420', false, true)
end

function m.InterwikiP1889(frame)
	return m.completeiw(frame.args.id, 'P1889', false, true)
end

function m.InterwikiP1889fn(frame)
	return m.completeiw(frame.args.id, 'P1889P1013Q27924673', false, true)
end

function m.Interwiki(frame)
	return m.completeiw(frame.args.id, 'P460', false, true)
end

function m.InterwikiName(frame)
	return m.completeiw(frame.args.id, 'P460P1889B', true, false)
end

function m.InterwikiNameTalk(qid)
	return m.completeiw(qid, 'P460P1889B', true, false)
end

return m