Module:ISBN RangeMessage xlate

require ('strict');

local title_object = mw.title.getCurrentTitle ();								-- get this module's title object
if not title_object.fullText:find ('/doc$') then								-- are we are looking at the ~/doc page or the module page?
	local module_doc_title = title_object.fullText .. '/doc';					-- looking at the module page so make a page name for this module's doc page
	title_object = mw.title.new (module_doc_title);								-- reset title object to this module's doc page
end
local range_message_raw = title_object:getContent();							-- get the doc page content

local timestamp_pattern = '<MessageDate>%D+(%d%d? %w%w%w %d%d%d%d %d%d:%d%d:%d%d)( %w%w%w)</MessageDate>' -- captures are timestamp and time zone (BST or GMT)
local group_pattern = '<Group>(.-)</Group>';									-- capture holds <Prefix>, <Agency>, and <Rules> groups
local prefix_pattern = '<Prefix>(%d+)%-(%d+)</Prefix>';							-- captures are GS1 prefix (978 or 979) and registration group
local agency_pattern = '<Agency>(.-)</Agency>';									-- capture holds language, country, territory
local rule_pattern = '<Rule>(.-)</Rule>';										-- capture holds registrant <Range> and <Length> groups
local range_pattern = '<Range>(%d+)%-(%d+)</Range>';							-- registrant; captures are min and max ranges right-padded to 7 digits; min not currently used
local length_pattern = '<Length>(%d)</Length>';									-- registrant; capture holds number of (left) digits to use from <Range>; for len=2 in max=1299999 -> 12


--[[--------------------------< R E N D E R _ O U T P U T >----------------------------------------------------

common function to render either of the two tables that this module creates.  Yeah, requires invoking the module
twice to get both tables (they are both created each time range_message_xlate() is called), but who cares; it
only needs to run occasionally.

]]

local function render_output (frame, table_name, out_t, timestamp, timezone, rep)
	table.insert (out_t, '\t}</syntaxhighlight>');								-- to close the table
	table.insert (out_t, 1, table.concat ({										-- insert this at the start of the output sequence
		'<syntaxhighlight lang="lua">local ' .. table_name .. ' = {',			-- opening stuff
		string.rep ('\t', rep),													-- tabs to place the timestamp (15 for hyphen_pos_t, 14 for inverse_range_t)
		frame:callParserFunction ('#time', {'-- "RangeMessage timestamp": Y-m-d"T"h:i:s', timestamp}),	-- the timestamp
		timezone																-- and timezone
		}));
	return frame:preprocess (table.concat (out_t, '\n'));						-- make a big string and done	
end


--[[--------------------------< R A N G E _ M E S S A G E _ X L A T E >----------------------------------------

{{#invoke:Sandbox/trappist the monk/ISBN RangeMessage xlate|range_message_xlate}}

This function translates a local copy of https://www.isbn-international.org/export_rangemessage.xml for use by
Module:Format ISBN by way of Module:Format ISBN/data.

To update <hyphen_pos_t> in Module:Format ISBN/data:
	1. copy the new RangeMessage.xml text from the ISBN international export_rangemessage.xml page (url above) to
		your clipboard and paste it over the existing xml data inside the html comments in this module's doc page.
	2. save.  The new translation is presented on this module's doc page.
	3. copy the new translation from the doc page to your clipboard
	4. edit Module:Format ISBN/data and paste the new <hyphen_pos_t> table over the old
	5. save

]]

local function range_message_xlate (frame)
	local out_t = {};															-- the base output goes here (before prettifying)
	local out_range_t = {}
	local timestamp, timezone = range_message_raw:match (timestamp_pattern);	-- get the RangeMessage timestamp
	
	for group in range_message_raw:gmatch (group_pattern) do					-- get a <Group>...</Group> block
		local prefix, registration_group = group:match (prefix_pattern);		-- get the prefix and registration group

		local agency = group:match (agency_pattern);							-- get the 'agency'
		local agency_used = false;												-- flag to for the avoidance of repeats
		
		for rule in group:gmatch (rule_pattern) do								-- get a <Rule>...</Rule> block
			local isbn_parts_min_t = {};										-- the prefix, registration_group, and registrant, then filled right with '0's to thirteen digits
			local isbn_parts_max_t = {};										-- the prefix, registration_group, and registrant, then filled right with '9's to thirteen digits
			local element_lengths_t = {};										-- sequence of lengths for registration group, registrant, and publication elements in an isbn

			table.insert (isbn_parts_max_t, prefix);							-- add prefix (1)  This applies to each <Rule> in the <Group>
			table.insert (isbn_parts_min_t, prefix);							-- add prefix (1)  This applies to each <Rule> in the <Group>
			
			table.insert (isbn_parts_max_t, registration_group);				-- add registration group (2)  This applies to each <Rule> in the <Group>
			table.insert (isbn_parts_min_t, registration_group);				-- add registration group (2)  This applies to each <Rule> in the <Group>

			table.insert (element_lengths_t, #registration_group);				-- add length of isbn registration group (1)  This applies to each <Rule> in the <Group>
			local range_min, range_max = rule:match (range_pattern);			-- get range limits
			local registrant_len = rule:match (length_pattern);					-- get the length of the registrant length

			if '0' ~= registrant_len then
				table.insert (element_lengths_t, tonumber(registrant_len));		-- add length of isbn registrant (2)
				table.insert (element_lengths_t, 9-#registration_group-registrant_len);	-- calculate and add length available for publication element (3)
				
				table.insert (isbn_parts_max_t, range_max:sub (1, registrant_len));	-- add <registrant_len> digits from left side of <range_max> (3)
				table.insert (isbn_parts_min_t, range_min:sub (1, registrant_len));	-- add <registrant_len> digits from left side of <range_min> (3)

				local fill_length = 0;											-- calculate how many '9's (or '0's) are needed to right-fill an isbn
				for _, v in ipairs (isbn_parts_max_t) do						-- for each part get its length
					fill_length = fill_length + #(tostring (v));				-- and accumulate
				end
				fill_length = 13 - fill_length;									-- and figure how many are needed
				table.insert (isbn_parts_max_t, string.rep ('9', fill_length));	-- and right fill to 13 digits
				table.insert (isbn_parts_min_t, string.rep ('0', fill_length));	-- and right fill to 13 digits

				local pretty_string = '\t[' .. table.concat (isbn_parts_max_t) .. '] = {' .. table.concat (element_lengths_t, ', ') .. '},';	-- prettify
				local pretty_string_range = '\t[' .. table.concat (isbn_parts_max_t) .. '] = ' .. table.concat (isbn_parts_min_t) .. ',';		-- prettify
				if not agency_used then
					pretty_string = pretty_string .. string.rep ('\t', 12) .. '-- ' .. agency;	-- add agency to output
					pretty_string_range = pretty_string_range .. string.rep ('\t', 11) .. '-- ' .. agency;	-- add agency to output
					agency_used = true;											-- this 'agency' added to output; don't repeat it
				end
				table.insert (out_t, pretty_string);							-- add the prettified string to the output sequence
				table.insert (out_range_t, pretty_string_range);
			end
		end
	end

	if 'yes' == frame.args.range then											-- select the output
		return render_output (frame, 'inverse_range_t', out_range_t, timestamp, timezone, 14);	-- inverse_range_t table
	else
		return render_output (frame, 'hyphen_pos_t', out_t, timestamp, timezone, 15);			-- hyphen_pos_t table
	end
end


--[[--------------------------< E X P O R T S >----------------------------------------------------------------
]]

return {
	range_message_xlate = range_message_xlate,
	}