Module:ISO 639 name/ISO 639 name to code/make

require('strict');
local temp = {};


--[[--------------------------< A D D _ L A N G >--------------------------------------------------------------

temp table is a table of tables where the key is the language name and the value is a table listing the ISO 639
codes associated with that language name.

This function adds language name (as index) and its code (as a table element) using an appropriate index number.

<lang> is the language name from the source data
<code> is the associated ISO 639 code from the source data
<part> is 1 for ISO 639-1 language names and codes, 2, 2B, 3, 5. <part> for the override data is prefixed with 'O'

]]

local function lang_add (lang, code, part)
	part = ({																	-- convert string <part> to a numeric index
		['1']=1, ['2']=2, ['2B']=3, ['3']=4, ['5']=5,							-- for the base ISO 639 parts
		['O1']=6, ['O2']=7, ['O2B']=8, ['O3']=9, ['O5']=10,						-- for the override tables
		['D1']=11, ['D2']=12, ['D2B']=13, ['D3']=14, ['D5']=15,					-- for the deprecated tables
		})[part];

	lang = mw.ustring.lower (lang);												-- convert to lowercase for use as table index

	if not temp[lang] then														-- when no entry for this language
		temp[lang] = {};														-- make a blank entry
	end

	table.insert(temp[lang], string.format('[%s]=\"%s\"', part, code))			-- add the code; codes are added in the order that this function is called in iso_639_name_to_code()

end


-- TODO: better name
local function iterate_table(part_data, part_number)
	for code, v in pairs (part_data) do											
		for _, lang in ipairs (v) do											-- code can have multiple names so for each one
			lang_add (lang, code, part_number);									-- create and / or add this name / code pair to the output
		end
	end	
end

--[[--------------------------< I S O 6 3 9 _ N A M E _ T O _ C O D E >----------------------------------------

read code-to-name source tables and convert to a name-to-code table.

]]

local function iso_639_name_to_code ()
	local out = {};

	local part_data = mw.loadData ('Module:ISO 639 name/ISO 639-3');			-- ISO 639-3 language codes / names
	iterate_table(part_data, '3')												-- start with part 3 because it has the most codes

	part_data = mw.loadData ('Module:ISO 639 name/ISO 639-5');					-- ISO 639-5 language codes / names
	iterate_table(part_data, '5')

	part_data = mw.loadData ('Module:ISO 639 name/ISO 639-2');					-- ISO 639-2 language codes / names
	iterate_table(part_data, '2')

	part_data = mw.loadData ('Module:ISO 639 name/ISO 639-2B');					-- ISO 639-2B language codes / names
	iterate_table(part_data, '2B')

	part_data = mw.loadData ('Module:ISO 639 name/ISO 639-1');					-- ISO 639-1 language codes / names
	iterate_table(part_data, '1')

	part_data = mw.loadData ('Module:ISO 639 name/ISO 639 override');			-- has override data for all parts
	for _, o_part in ipairs ({'1', '2', '2B', '3', '5'}) do						-- for each of the override tables
		local o_part_data = part_data['override_' .. o_part];					-- point to override data
		o_part = 'O' .. o_part;													-- prefix o_part
		iterate_table(o_part_data, o_part)										-- for each code in the data table and for each language name associated with that code
	end

	part_data = mw.loadData ('Module:ISO 639 name/ISO 639 deprecated');			-- has deprecated data for all parts
	for _, d_part in ipairs ({'1', '2', '2B', '3', '5'}) do						-- for each of the deprecated tables
		local d_part_data = part_data['deprecated_' .. d_part];					-- point to deprecated data
		d_part = 'D' .. d_part;													-- prefix d_part
		iterate_table(d_part_data, d_part)										-- for each code in the data table and for each language name associated with that code
	end

	local function comp (a, b)
		return tonumber (a:match ('(%d+)')) < tonumber (b:match ('(%d+)'));
	end

	for lang, codes in pairs (temp) do
		table.sort (codes, comp);														-- codes are added in the order that lang_add() is called above; sort to make pretty
		table.insert (out, table.concat ({'["', lang, '"] = {', table.concat (codes, ', '), '}'}));	-- reformat
	end

	table.sort (out);															-- sort in language name order

	local key_str = table.concat ({
		'--[[--------------------------< I S O _ 6 3 9 _ N A M E _ T O _ C O D E >--------------------------------------<br /><br />',
		'Key:<br />&#9;',
		'[1]=ISO 639-1&#9;&#9;[6]=ISO 639-1 override&#9;&#9;[11]=ISO 639-1 deprecated<br />&#9;',
		'[2]=ISO 639-2&#9;&#9;[7]=ISO 639-2 override&#9;&#9;[12]=ISO 639-2 deprecated<br />&#9;',
		'[3]=ISO 639-2B&#9;&#9;[8]=ISO 639-2B override&#9;&#9;[13]=ISO 639-2B deprecated<br />&#9;',
		'[4]=ISO 639-3&#9;&#9;[9]=ISO 639-3 override&#9;&#9;[14]=ISO 639-3 deprecated<br />&#9;',
		'[5]=ISO 639-5&#9;&#9;[10]=ISO 639-5 override&#9;&#9;[15]=ISO 639-5 deprecated',
		'<br />]]<br /><br />'
		})
	
	return table.concat ({'<pre>', key_str, 'return {<br />&#9;', table.concat (out, ',<br />&#9;'), '<br />&#9;}<br /></pre>'});	-- render
end

--[[--------------------------< E X P O R T E D _ F U N C T I O N S >------------------------------------------
]]

return {iso_639_name_to_code = iso_639_name_to_code}