--[[
   padfixer.lua -- read "Poker After Dark" wikipedia source, produce summary tables
   2009-06-22  WF  Note no '#!' line since not on usual box


   This program allows a quick update of the Wikipedia "Poker After Dark" (PAD) "Stats"
   section after additional information about a tournament has been added to the page.


   To update the "Stats" section:

   1) Have Lua available; see lua.org  If you have a C or C++ compiler you can download
      and install Lua on your computer in less than five minutes.
   2) Copy/paste this page (or the source for it) into a text file named "padfixer.lua"
      on your computer.
   3) Edit the full page for "Poker After Dark" and copy/paste its full text into a text
      file named "padinput" on your computer.  Be sure to use the "Back" button to avoid
      changing the PAD page.
   4) Note the number of "seasons" of PAD that have been added to the page.  At this
      writing the number is 5.  This number will be designated "nSeasons", below.
      Ditto the last week of play "nWeek" (this/these should be fixed).
   5) From a command line, run:
        lua padfixer.lua nSeasons nWeek <padinput >padoutput
      This reads the source of the PAD page from "padinput" and produces a new "Stats"
      section in "padoutput".
   6) It is probable that one or more errors/inconsistencies in player names will be
      discovered by the program.  The program requires that all occurrences of a player's
      name in the tables be identical.  Fix the "padinput" file accordingly.  You might
      also want to fix the tables on the PAD page itself to limit this problem in the
      future.
   7) When the program has run correctly, edit the PAD "Stats" section, delete all its
      content, then cut/paste the "padoutput" file into it, completely replacing it.
   8) "Show preview" the changes and check them before saving them.


   Potential problems:

   - The PAD source has some unicode characters so you have to select UTF-8 encoding
     in order to save it.  This does not affect the "Stats" tables now but may if we
     get, for example, a player with an umlaut in his/her name.  Plain Lua is rather
     dumb about this.  Maybe someone will be moved to translate this program into,
     say, Python?
   - The complete "Stats" section is reproduced.  When accepted edits are made to it,
     even if the table contents remain the same, the "forms" in the below source code
     will have to be changed.

   I plan to additionally sort on the other finishes columns, sometime or other.
--]]

-------------------------------------------------------------------------------------
-- "Stats" forms sans table contents ([[ and ]] for multi-line strings)

local form1 = [[
==Stats==

These statistics are meant to help readers judge the players' relative performances in the main type of event featured on "Poker After Dark": The six-player winner-take-all tournament. So, with the exception of the "Total appearances" column, the statistics omit all "Cash Games" (which do not even have declared winners) and the one "Heads Up" event played so far (Season 4, Week 3, won by Phil Hellmuth). The notations (x2) and (x3) mean the player achieved that placement two or three times, respectively.

:''Stats updated to reflect results through Season %d, Week %d.''

]]

-----

local form2 = [[
{{col-begin}}
{{col-2}}

===Most wins===
The following players have won more than once. (Aired episodes of regular-format tournaments only.)
{| class="wikitable"
|-
! Player !! Wins !! Appearances !! % Won !! Other finishes
]]

-----

local form3 = [[
|-
|}

===Never won===
The following players have appeared more than twice, but have yet to win. (Aired episodes of regular-format tournaments only.)
{| class="wikitable"
|-
! Player !! Appearances !! Highest finish
]]

-----

local form4 = [[
|-
|}
{{col-2}}

===Most appearances===
The following players have been invited to play on ''Poker After Dark'' more than twice. ("Total appearances" column includes all filmed events; "Aired" omits non-regular format tournaments as well as yet-to-air episodes.)
{| class="wikitable"
|-
! Player !! Total<br />appearances !! Aired !! Highest<br />aired finish
]]

-----

local form5 = [[
|-
|}
{{col-end}}
]]

-------------------------------------------------------------------------------------
-- input parsing

-- normal quoting only
function removeQuotes(x)
  local y = string.gsub(x,'".-"',' ')   -- replace quoted subsstrings with one space
  if string.find(y,'"') then
    error('unmatched quote in string |'..x..'|')
  end
  return y
end

-- remove quoted nicknames, link '[]'s, parenthetical disambigs, excess spacing
function cleanName(name)
  name = removeQuotes(name)
  name = string.gsub(name, '[%[%]]', '')
  local bad = string.find(name,'%(')
  if bad then name = string.sub(name,1,bad-1) end
  name = string.gsub(name, '^%s*(.-)%s*$', '%1')
  name = string.gsub(name, '%s+', ' ')
  if name == '' or string.find(name, '=') then name = nil end
  return name
end

-- last name first for later sorting
-- primitive but deals with II, III and de/De prior last name
function makeLastFirst(name)
  local t,n = {}, 0
  for w in string.gfind(name,'(%S+)') do
    n = n + 1
    t[n] = w
  end

  local last = n
  if last > 1 and
    (t[n] == 'Jr'  or t[n] == 'Jr.' or
     t[n] == 'II'  or t[n] == '2nd' or
     t[n] == 'III' or t[n] == '3rd' or
     t[n] == 'IV'  or t[n] == '4th')
  then
    last = last - 1
  end

  for i=last-1,2,-1 do
    if t[i] == 'de' or t[i] == 'De' then
      last = i
      t[i] = 'De'   -- sort 'de' names correctly
      break
    end
  end

  name = t[last]
  for i = last+1,n do
    name = name..' '..t[i]
  end
  name = name..','
  for i = 1,last-1 do
    name = name..' '..t[i]
  end

  return name
end

function parse(nSeasons)
  local t, nFound = {},0
  while true do
    local line = io.read()   -- not io.lines() since also reading internally
    if not line then break end
    if string.find(line, 'wikitable') then
      nFound = nFound + 1
      if nFound > 2*nSeasons then break end
      while true do
        local line = io.read()
        assert(line, 'EOF inside wikitable')
        if string.find(line, '|}') then break end
        if not string.find(line, 'N/A') and not string.find(line, 'Applicable')
        then
          line = line..'|'
          if nFound <= nSeasons then
            -- parse Episode Guide
            -- | 1 || 1-6 || date || title || [[player1]] || p2 || [[p3]] || p4 || p5 || [[p6]]
            local nField = -4
            for name in string.gfind(line, '|([^|]+)|') do
              nField = nField + 1
              if nField > 0 then
                name = cleanName(name)
                if name then
                  if not t[name] then
                    t[name] = {0,0,0,0,0,0, name=name, lastFirst=makeLastFirst(name),
                      appears=1, aired=0}
                  else
                    t[name].appears = t[name].appears + 1
                  end
                end
              end
            end
          else
            -- parse Results and Notes
            -- | 5 || title ||player1 ||p2 ||p3 ||p4 ||p5 || p6
            local nField = -2
            for name in string.gfind(line, '|([^|]+)|') do
              nField = nField + 1
              if nField > 0 then
                name = cleanName(name)
                if name then
                  local t = t[name]
                  if not t then
                    error('Player "'..name..
                      '" is in a Results table but not in an Episode Guide table')
                  end
                  t[7-nField] = t[7-nField] + 1
                  t.aired = t.aired + 1
                end
              end
            end
          end
        end
      end
    end
  end
  assert(nFound >= 2*nSeasons, 'Too few wikitables')
  return t
end

-------------------------------------------------------------------------------------
-- table generation

-- English form of final position
local pos = {'1st', '2nd', '3rd', '4th', '5th', '6th'}

function posEng(t, n)
  if t[n] == 0 then return nil end
  if t[n] == 1 then return pos[n] end
  return string.format('%s (x%d)', pos[n], t[n])
end

-- players with two or more wins (by number of wins, then winning percentage)
function mostWins(t)
  for _,x in pairs(t) do
    x.percent = 100*x[1]/x.aired
  end
  table.sort(t,
    function(a,b)
      if a[1] > b[1] then return true end              -- descending on [1] count
      if a[1] < b[1] then return false end
      if a.percent > b.percent then return true end    -- descending on percentage
      if a.percent < b.percent then return false end
      return a.lastFirst < b.lastFirst                 -- ascending on name
    end)
  for _,x in ipairs(t) do
    if x[1] < 2 then break end
    local line = string.format('|-\n| %s || %d || %d || %.0f%% || ',
      x.name, x[1], x.aired, x.percent)
    for i=2,6 do
      if x[i] > 0 then
        line = string.format('%s %s,',line,posEng(x,i))
      end
    end
    if string.sub(line,-1) == ',' then line = string.sub(line,1,-2) end
    print(line)
  end
end

-- appeared more than twice but never won
function neverWon(t)
  table.sort(t,
    function(a,b)
      if a[1] < b[1] then return true end          -- ascending on [1] count (really eof)
      if a[1] > b[1] then return false end
      if a.aired > b.aired then return true end    -- descending on aired
      if a.aired < b.aired then return false end
      return a.lastFirst < b.lastFirst             -- ascending on name
    end)
  for _,x in ipairs(t) do
    if x[1] > 0 or x.aired < 3 then break end
    local line = string.format('|-\n| %s || %d || ', x.name, x.aired)
    for i=2,6 do
      if x[i] > 0 then
        line = line..posEng(x,i)
        break
      end
    end
    print(line)
  end
end

-- appeared three or more times
function mostAppearances(t)
  table.sort(t,
    function(a,b)
      if a.appears > b.appears then return true end    -- descending by appearances
      if a.appears < b.appears then return false end
      if a.aired > b.aired then return true end        -- descending by aired
      if a.aired < b.aired then return false end
      return a.lastFirst < b.lastFirst                 -- ascending by name
    end)
  for _,x in ipairs(t) do
    if x.appears < 3 then break end
    local line = string.format('|-\n| %s || %d || %d || ',x.name,x.appears,x.aired)
    for i=1,6 do
      if x[i] > 0 then
        line = line..posEng(x,i)
        break
      end
    end
    print(line)
  end
end

-------------------------------------------------------------------------------------
-- main pgm

-- debug
function dump(t)
  table.sort(t,
    function(a,b)
      return a.lastFirst < b.lastFirst   -- ascending by name to help w/ misspellings
    end)
  io.stderr:write('dumping...\n')
  for _,x in ipairs(t) do
    if string.len(x.lastFirst) < 14 then
      io.stderr:write(string.format(
        '  %s\t\tname="%s", appears=%d, aired=%d, results=%d,%d,%d,%d,%d,%d\n',
        x.lastFirst, x.name, x.appears, x.aired, x[1], x[2], x[3], x[4], x[5], x[6]))
    else
      io.stderr:write(string.format(
        '  %s\tname="%s", appears=%d, aired=%d, results=%d,%d,%d,%d,%d,%d\n',
        x.lastFirst, x.name, x.appears, x.aired, x[1], x[2], x[3], x[4], x[5], x[6]))
    end
  end
  io.stderr:write('...end dump\n')
end

-- convert table to array
function toarray(t)
  local arr,i = {},0
  for _,x in pairs(t) do
    i = i + 1
    arr[i] = x
  end
  return arr
end

local usage = 'usage: lua padfixer.lua nSeasons <padinput >padoutput'

function padfixer(arg)
  assert(arg[1] and arg[2] and not arg[3], usage)
  local nSeasons,nWeek = tonumber(arg[1]),tonumber(arg[2])
  assert(nSeasons and nSeasons > 0 and nWeek and nWeek > 0, usage)
  local t = toarray(parse(nSeasons))   -- array for sorting
  --dump(t)
  io.write(string.format(form1,nSeasons,nWeek))
  io.write(form2)
  mostWins(t)
  io.write(form3)
  neverWon(t)
  io.write(form4)
  mostAppearances(t)
  io.write(form5)
end

-------------------------------------------------------------------------------------

padfixer(arg)   -- command-line arguments

-- padfixer.lua end