User:Allens/GOCE/GOCE.barnstars1.pl

#!/usr/bin/perl

use Carp qw(cluck confess);
use Memoize;
use warnings FATAL => qw(uninitialized);
use warnings;
use strict qw(subs refs);

# does not do "most articles first day"!

if (defined($ARGV[0]) && ($ARGV[0] ne '-')) {
  (open(INPUT,$ARGV[0]) || (die "Can't open $ARGV[0] for input: $!; stopped"));
} else {
  (open(INPUT,"-") || (die "Can't dup STDIN for input: $!; stopped"));
}

if (defined($ARGV[1]) && ($ARGV[1] ne '-')) {
  (open(OUTPUT,">>" . $ARGV[1]) || (die "Can't open $ARGV[1] for output: $!; stopped"));
} else {
  (open(OUTPUT,">-") || (die "Can't dup STDOUT for output: $!; stopped"));
}

$largest_article_size = 0;
$largest_article_user = "";
$largest_article_name = "";

%size_poss_ties = ();

%user_header = (); # without =
%user_first_letter = ();
%user_article_size = ();
%user_total_articles = ();
%user_total_words = ();
%user_total_5k = ();
%user_10k_articles = ();
%user_rollover_words = ();
%user_alias = ();
%user_0k_articles = ();

#^=====???=====
#^{{GOCE Article list
# |articles =
# # {{[Cc]ompleted}} [[???]] (###)
# # {{[Ww]orking}} [[???]] (###) <- optional
# |total-articles = ##
# |total-words = ###
# |rollover-words = ###
# }}
#^----

#^====?====

$curr_first_letter = "";

$user = "";
$user_num = 0;

$expecting = "dashes";

$in_comment = 0;

LINE: while (defined($line = <INPUT>)) {
  chomp($line);
  if ($line =~ m/^====([0-9a-zA-Z])====$/) {
    $curr_first_letter = uc($1);
  } elsif (($line =~ m/^\s*$/) || ($line =~ m/^===Totals===\s*$/) ||
	   ($line =~ m/^\s*\{\{CompactTOC8\b/i)) {
    next LINE;
  } elsif ($line =~ m/^\s*<!--/) {
    unless ($line =~ m/-->/) {
      $in_comment = 1;
    }
    next LINE;
  } elsif ($in_comment) {
    if ($line =~ m/-->/) {
      $in_comment = 0;
    }
    next LINE;
  } elsif ($line =~ m/^\s*\#\s*\{\{\s*[Ww]orking\b/) {
    next LINE;
  } elsif ($line =~ m/^=====((?:[^=]+|.)+?)=====\s*$/) {
    my $header = $1;
    $header =~ s/^\s+//;
    $header =~ s/\s+$//;
    unless (($expecting eq "header") || ($expecting eq "dashes")) {
      warn "Unexpected header '$header' (expected $expecting)\n";
    }
    $user = $header;
    my $alias = $user;
    my $found = 0;
    if ($header =~ m/\[\[\s*User:([^\]\|]+)/i) {
      $user = $1;
      $found = 1;
      $alias = $user;
      my $qm_user = quotemeta($user);
      if ($header =~ m/\[\[\s*User:$qm_user\|\s*([\s\w\.]+)\]\]/) {
	$alias = $1;
	$alias =~ s/\s+$//;
      }
    } elsif ($header =~ m/\[\[\s*User[\s_][Tt]alk:([^\]\|]+)/i) {
      $user = $1;
      $found = 1;
      $alias = $user;
    } elsif ($header =~ m/^([0-9A-Za-z])$/) {
      $curr_first_letter = uc($1);
      next LINE;
    } else {
      warn "Can't distinguish user in '$header'\n";
    }
    if (exists($user_header{$user})) {
      die "Duplicate user '$user' (line '$line'); stopped";
    }
    $user_alias{$user} = $alias;
    if ($found) {
      $user_header{$user} = "[[User:$user|$alias]] ("
	. "[[User talk:$user|talk]])";
      if ($user =~ m/^[^0-9A-Za-z]*([0-9A-Za-z])/) {
	$curr_first_letter = uc($1);
      }
    } else {
      $user_header{$user} = $header;
    }
    $user_first_letter{$user} = $curr_first_letter;
    $user_total_articles{$user} = 0;
    $user_total_words{$user} = 0;
    $user_total_5k{$user} = 0;
    $user_0k_articles{$user} = 0;
    $user_rollover_words{$user} = 0;
    $expecting = "GOCE";
    $user_num{$user} = $user_num;
    $user_num++;
  } elsif ($line =~ m/^----\s*$/) {
    unless (($expecting eq "dashes") || ($expecting eq "rollover")) {
      warn "$user; Unexpected dashes '$line' (expecting $expecting)\n";
    }
    $expecting = "header";
  } elsif (! length($user)) {
    warn "Unexpected line '$line' - no current user\n";
  } elsif ($line =~ m/^\{\{\s*GOCE\s+Article\s+list\s*$/) {
    unless ($expecting eq "GOCE") {
      warn "$user: Unexpected GOCE: '$line' (expecting $expecting)\n";
    }
    $expecting = "articles";
  } elsif ($line =~ m/^\s*\|\s*articles\s*=\s*$/) {
    unless ($expecting eq "articles") {
      warn "$user: Unexpected articles: '$line'\n";
    }
    $expecting = "Completed";
  } elsif ($line =~
	   m/^\s*\#\s*(?:<[Ss]>\s*)?\{\{\s*[Cc]ompleted\s*\}\}\s*
	     \'*\[\[((?:[^]]+|.)*?)\]\]\'*\s*
             (?:\(section\)\s*)?(\([^)]*\)|[\d,]+)\s*(.*)$/x) {
    my $article = $1;
    my $words = $2;
    my $rest = $3;
    unless ($expecting eq "Completed") {
      warn "$user: Unexpected Completed: '$line' (expecting $expecting)\n";
    }
    if (defined($rest) && length($rest) &&
	($rest =~ m/(?:\{\{rejected\}\}|decline|denied|disagree|invalid|
		      \bnot\b|redflag|\bX\b|Cross|\bN[ao]?\b|
		      nay|negative)/xi)) {
      warn "$user: Skipping '$line' and subtracting 1200 due to '$rest'\n";
      $user_total_words{$user} -= 1200;
      next LINE;
    }
    unless (defined($article) && length($article) && defined($words) &&
	    length($words)) {
      $expecting = "Completed";
      next LINE;
    }
    $words =~ s/^\s*[\\(]+\s*//;
    $words =~ s/\s*[\\)]+\s*$//;
    $words =~ s/,+//g;
    if ($words =~ m/^\s*(?:app\w*?x\.?\s*)?(\d+)(?:\s*words)?\s*$/i) {
      $words = ($1+0);
    } elsif (! length($words)) {
      $words = 0;
    } elsif ($words =~ m/^\s*zero\s*$/i) {
      $words = 0;
    } else {
      warn "$user: Can't interpret words '$words' from '" . $line
	. "'; treating as 0\n";
      $words = 0;
    }
    if ($words > $largest_article_size) {
      $largest_article_size = $words;
      $largest_article_user = $user;
      $largest_article_name = $article;
    } elsif ($words && ($words == $largest_article_size)) {
      $size_poss_ties{$words} = 1;
    }
    $user_article_size{$user}{$article} = $words;
    $user_total_articles{$user}++;
    $user_total_words{$user} += $words;
    if ($words == 0) {
      $user_0k_articles{$user}++;
    }
    if ($words >= 5000) {
      $user_total_5k{$user} += int($words/5000);
    }
    if ($words >= 10000) {
      $user_10k_articles{$user}{$article} = $words;
    }
    $expecting = "Completed";
  } elsif ($line =~ m/^\s*\#\s*\{\{\s*[Cc]ompleted\s*\}\}\s*<!--/) {
    unless ($expecting eq "Completed") {
      warn "$user: Unexpected Completed: '$line' (expecting $expecting)\n";
    }
    $expecting = "Completed";
  } elsif ($line =~ m/^\s*\#\s*\{\{\s*[Cc]ompleted\s*\}\}\s*\[\[\]\]/) {
    unless ($expecting eq "Completed") {
      warn "$user: Unexpected Completed: '$line' (expecting $expecting)\n";
    }
    $expecting = "Completed";
  } elsif ($line =~ m/^\s*\|\s*total-(?:articles|words)\b/i) {
    unless (($expecting eq "Completed") || ($expecting eq "rollover")) {
      warn "$user: Unexpected total-articles/words '$line' (expecting "
	. $expecting . ")\n";
    }
    $expecting = "rollover";
  } elsif ($line =~ m/^\s*\|\s*rollo?ver-words\s*=\s*([\d,]+)\s*$/i) {
    my $words = $1;
    unless ($expecting eq "rollover") {
      warn "$user: Unexpected rollover-words '$line' (expecting $expecting)\n";
    }
    # note for future improvement: Read old file to figure out
    $words =~ s/,+//g;
    if ($words =~ m/^(\d+)$/) {
      my $rollover = ($1+0);
      if ($user_total_articles{$user} > 0) {
	$user_rollover_words{$user} = $rollover;
      } elsif ($rollover > 0) {
	warn "$user: Not doing rollover ($rollover): no articles\n";
      }
    } else {
      warn "$user: Can't interpret rollover-words '$words' (from '" . $line
	. "'); treating as 0\n";
    }
    $expecting = "close";
  } elsif ($line =~ m/^\s*\|\s*rollo?ver-words\s*=\s*([\d,]+)\s*\}\}\s*$/i) {
    my $words = $1;
    unless ($expecting eq "rollover") {
      warn "$user: Unexpected rollover-words '$line' (expecting $expecting)\n";
    }
    # note for future improvement: Read old file to figure out
    $words =~ s/,+//g;
    if ($words =~ m/^(\d+)$/) {
      my $rollover = ($1+0);
      if ($user_total_articles{$user} > 0) {
	$user_rollover_words{$user} = $rollover;
      } elsif ($rollover > 0) {
	warn "$user: Not doing rollover: no articles\n";
      }
    } else {
      warn "$user: Can't interpret rollover-words '$words' (from '" . $line
	. "'); treating as 0\n";
    }
    $expecting = "dashes";
  } elsif ($line =~ m/^\s*\}\}\s*$/) {
    unless ($expecting eq "close") {
      warn "$user: Unexpected close '$line' (expecting $expecting)\n";
    }
    $expecting = "dashes";
  } else {
    warn "$user: Can't interpret line '$line' (expecting $expecting)\n";
  }
}
close(INPUT);
warn "Finished reading input; have " . scalar(keys %user_header)
  . " users; largest article was '$largest_article_name' ("
  . "$largest_article_size) by $largest_article_user; "
  . scalar(keys %user_10k_articles) . " users had 10k+ articles\n";

%awards_total_count =
  (4000 => "Modest",
   8000 => "Working Wikipedian",
   12000 => "Cleanup",
   20000 => "Tireless Contributor",
   30000 => "(old school) League of Copy Editors",
   40000 => "(modern) GOCE",
   60000 => "Diligence",
   80000 => "Order of the Superior Scribe",
   100000 => "Most Excellent Order of the Caretaker");

#$largest_article_size = 0;
#$largest_article_user = "";
#$largest_article_name = "";

#%user_header = (); # without =
#%user_article_size = ();
#%user_total_articles = ();
#%user_total_words = ();
#%user_total_5k = ();
#%user_10k_articles = ();
#%user_rollover_words = ();

#=====$user_header=====
#* total-articles = $user_total_articles
#* total-words = $user_total_words
#* rollover-words = $user_rollover_words
#* grand total = $user_total_words+$user_rollover_words
#* new rollover words = ###
#* barnstars =
#**[for total words]
#**[for total article rank?]
#**[for total words rank?]
#**[for total 5k+ rank?]
#**[for largest article?]
#**[for articles >= 10k?]

if (exists($size_poss_ties{$largest_article_size})) {
  warn "Have tie for largest article; check manually!\n";
}

# figure out ranks here

@users_by_num_articles =
  sort {$user_total_articles{$b} <=> $user_total_articles{$a}}
  (keys %user_total_articles);
@users_by_num_articles = grep {$user_total_articles{$_} > 0}
  (@users_by_num_articles);
%user_rank_articles = ();
%rank_articles_users = ();
$curr_num = $user_total_articles{$users_by_num_articles[0]};
$curr_rank = 1;
$user_rank_articles{$users_by_num_articles[0]} = $curr_rank;
$rank_articles_users{$curr_rank}{$users_by_num_articles[0]} = 1;
RANK_ARTICLES: for (my $i = 1; $i <= $#users_by_num_articles; $i++) {
  if ($user_total_articles{$users_by_num_articles[$i]} < $curr_num) {
    $curr_rank++;
    if ($curr_rank > 5) {
      last RANK_ARTICLES;
    }
  }
  $user_rank_articles{$users_by_num_articles[$i]} = $curr_rank;
  $curr_num = $user_total_articles{$users_by_num_articles[$i]};
  $rank_articles_users{$curr_rank}{$users_by_num_articles[$i]} = 1;
}

@users_by_num_words =
  sort {$user_total_words{$b} <=> $user_total_words{$a}}
  (keys %user_total_words);
@users_by_num_words = grep {$user_total_words{$_} > 0}
  (@users_by_num_words);
%user_rank_words = ();
%rank_words_users = ();
$curr_num = $user_total_words{$users_by_num_words[0]};
$curr_rank = 1;
$user_rank_words{$users_by_num_words[0]} = $curr_rank;
$rank_words_users{$curr_rank}{$users_by_num_words[0]} = 1;
RANK_WORDS: for (my $i = 1; $i <= $#users_by_num_words; $i++) {
  if ($user_total_words{$users_by_num_words[$i]} < $curr_num) {
    $curr_rank++;
    if ($curr_rank > 5) {
      last RANK_WORDS;
    }
  }
  $user_rank_words{$users_by_num_words[$i]} = $curr_rank;
  $curr_num = $user_total_words{$users_by_num_words[$i]};
  $rank_words_users{$curr_rank}{$users_by_num_words[$i]} = 1;
}

@users_by_num_5k =
  sort {$user_total_5k{$b} <=> $user_total_5k{$a}}
  (keys %user_total_5k);
@users_by_num_5k = grep {$user_total_5k{$_} > 0}
  (@users_by_num_5k);
%user_rank_5k = ();
%rank_5k_users = ();
$curr_num = $user_total_5k{$users_by_num_5k[0]};
$curr_rank = 1;
$user_rank_5k{$users_by_num_5k[0]} = $curr_rank;
$rank_5k_users{$curr_rank}{$users_by_num_5k[0]} = 1;
RANK_5K: for (my $i = 1; $i <= $#users_by_num_5k; $i++) {
  if ($user_total_5k{$users_by_num_5k[$i]} < $curr_num) {
    $curr_rank++;
    if ($curr_rank > 5) {
      last RANK_5K;
    }
  }
  $user_rank_5k{$users_by_num_5k[$i]} = $curr_rank;
  $curr_num = $user_total_5k{$users_by_num_5k[$i]};
  $rank_5k_users{$curr_rank}{$users_by_num_5k[$i]} = 1;
}

sub num_format { # for Number-Format-1.73
  my $num = $_[0];
  unless (defined($num)) {
    confess "Undefined input to num_format; stopped";
  }
  unless ($num == int($num)) {
    confess "$num is not an integer; stopped";
  }
  $num = '0'x(3 - (length($num) % 3)) . $num;
  $num = join(",", grep {$_ ne ''} (split(/(...)/, $num)));
  $num =~ s/^0*,?//;
  if ($num eq '') {
    $num = 0;
  }
  return $num;
}
memoize('num_format');

# print out overall chart here

print OUTPUT "\{| class=\"wikitable\"\n";
print OUTPUT "|+ '''Gold Star Award Leaderboard'''\n";
print OUTPUT "!\n";
print OUTPUT "! Articles\n";
print OUTPUT "! Words\n";
print OUTPUT "! 5k+ Articles\n";
for $rank (1..5) {
  print OUTPUT "|-\n";
  print OUTPUT "| $rank.\n";
  print OUTPUT "| "
    . join(", ",map {$user_alias{$_}
		       . " (" . num_format($user_total_articles{$_}) . ")"}
	   (sort(keys %{ $rank_articles_users{$rank} }))) . "\n";
  print OUTPUT "| "
    . join(", ",map {$user_alias{$_}
		       . " (" . num_format($user_total_words{$_}) . ")"}
	   (sort(keys %{ $rank_words_users{$rank} }))) . "\n";
  print OUTPUT "| "
    . join(", ",map {$user_alias{$_} . " (" . $user_total_5k{$_} . ")"}
	   (sort(keys %{ $rank_5k_users{$rank} }))) . "\n";
}


#{| class="wikitable"
#|+ '''Gold Star Award Leaderboard'''
#!
#! Articles
#! Words
#! 5k+ Articles
#|-
#| 1.
#| ??? (###)
#| ??? (###)
#| ??? (###)

#|}

print OUTPUT "|}\n\n";

print OUTPUT "* Most articles, first day: FILL THIS IN!!\n";
print OUTPUT "* Largest single article: \[\[$largest_article_name\]\] ("
  . num_format($largest_article_size) . ") - " 
  . $user_alias{$largest_article_user} . "\n";

if (scalar(keys %user_10k_articles)) {
  $to_print_10k = "* 10K article(s):";
  
  foreach $user (sort(keys %user_10k_articles)) {
    $to_print_10k .= ", " . $user_alias{$user};
    if (scalar(keys %{ $user_10k_articles{$user} }) > 1) {
      $to_print_10k .= " (" . scalar(keys %{ $user_10k_articles{$user} })
	. ")";
    }
  }
  $to_print_10k =~ s/:,/:/;
  print OUTPUT $to_print_10k . ".\n";
}
print OUTPUT "----\n";
print OUTPUT "\n";
print OUTPUT "{{CompactTOC8|num=yes|side=yes}}\n";
print OUTPUT "\n";

$num_users_with_barnstars = 0;
$num_users_with_multiple_barnstars = 0;
$curr_first_letter = "";

%num_to_th = (1 => '1st',
	      2 => '2nd',
	      3 => '3rd',
	      4 => '4th',
	      5 => '5th');

@users = sort {($user_first_letter{$a} cmp $user_first_letter{$b}) ||
		 ($user_num{$a} <=> $user_num{$b})} (keys %user_num);

foreach $user (@users) {
  unless ($user_total_articles{$user} > 0) {
    next;
  }
  if ($user_first_letter{$user} ne $curr_first_letter) {
    print OUTPUT "====" . $user_first_letter{$user} . "====\n";
    $curr_first_letter = $user_first_letter{$user};
  }
  print OUTPUT "=====" . $user_header{$user} . "=====\n";
  if ($user_0k_articles{$user} > 0) {
   print OUTPUT "* total-articles = "
      . num_format($user_total_articles{$user}) . " ("
	. num_format($user_0k_articles{$user}) . " 0-words)\n";
  } else {
    print OUTPUT "* total-articles = "
      . num_format($user_total_articles{$user}) . "\n";
  }
  print OUTPUT "* total-words = "
    . num_format($user_total_words{$user}) . "\n";
  print OUTPUT "* rollover-words = "
    . num_format($user_rollover_words{$user}) . "\n";
  my $grand_total = $user_total_words{$user} + $user_rollover_words{$user};
  print OUTPUT "* grand total = " . num_format($grand_total) . "\n";
  my $curr_barnstar_total = "";
  my $curr_barnstar_rollover = $grand_total;
  foreach $barnstar_level (keys %awards_total_count) {
    if ($barnstar_level < $grand_total) {
      my $diff = $grand_total - $barnstar_level;
      if ($diff < $curr_barnstar_rollover) {
	$curr_barnstar_total = $awards_total_count{$barnstar_level};
	$curr_barnstar_rollover = $diff;
      }
    }
  }
  print OUTPUT "* new rollover words = "
    . num_format($curr_barnstar_rollover) . "\n";
  my $num_barnstars = ((length($curr_barnstar_total) > 0) +
		       ($user eq $largest_article_user) +
		       exists($user_10k_articles{$user}) +
		       exists($user_rank_articles{$user}) +
		       exists($user_rank_words{$user}) +
		       exists($user_rank_5k{$user}));
  if ($num_barnstars > 0) {
    #my(@barnstars_to_print) = ();
    $num_users_with_barnstars++;
    my $list_stars = "**";
    if ($num_barnstars > 1) {
      print OUTPUT "* barnstars =\n";
      $num_users_with_multiple_barnstars++;
    } else {
      print OUTPUT "* barnstar =";
      $list_stars = "";
    }
    if (length($curr_barnstar_total)) {
      print OUTPUT "$list_stars $curr_barnstar_total\n";
      # add to $barnstars_to_print
    }
    # do rank barnstars
    if (exists($user_rank_articles{$user})) {
      my $rank = $user_rank_articles{$user};
      if (scalar(keys %{ $rank_articles_users{$rank} }) > 1) {
	print OUTPUT "$list_stars equal " . $num_to_th{$rank}
	  . " place, number of articles ("
	    . num_format($user_total_articles{$user}) . ")\n";
      } else {
	print OUTPUT "$list_stars " . $num_to_th{$rank}
	  . " place, number of articles ("
	    . num_format($user_total_articles{$user}) . ")\n";
      }
      # add to $barnstars_to_print
    }
    if (exists($user_rank_words{$user})) {
      my $rank = $user_rank_words{$user};
      if (scalar(keys %{ $rank_words_users{$rank} }) > 1) {
	print OUTPUT "$list_stars equal " . $num_to_th{$rank}
	  . " place, word count ("
	    . num_format($user_total_words{$user}) . ")\n";
      } else {
	print OUTPUT "$list_stars " . $num_to_th{$rank}
	  . " place, word count ("
	    . num_format($user_total_words{$user}) . ")\n";
      }
      # add to $barnstars_to_print
    }
    if (exists($user_rank_5k{$user})) {
      my $rank = $user_rank_5k{$user};
      if (scalar(keys %{ $rank_5k_users{$rank} }) > 1) {
	print OUTPUT "$list_stars equal " . $num_to_th{$rank}
	  . " place, 5k+ (" . $user_total_5k{$user} . ")\n";
      } else {
	print OUTPUT "$list_stars " . $num_to_th{$rank} . " place, 5k+ ("
	  . $user_total_5k{$user} . ")\n";
      }
      # add to $barnstars_to_print
    }
    if ($user eq $largest_article_user) {
      print OUTPUT "$list_stars Largest single article: [["
	. $largest_article_name
	  . "]] (" . num_format($largest_article_size) . ")\n";
      # add to $barnstars_to_print
    }
    if (exists($user_10k_articles{$user})) {
      print OUTPUT "$list_stars article >= 10k ("
	. scalar(keys %{ $user_10k_articles{$user} }) . ")\n";
      # add to $barnstars_to_print
    }

    # print <nowiki>\n$barnstars_to_print\n</nowiki>
  } else {
    print OUTPUT "* barnstar = (none)\n";
  }
  print OUTPUT "\n";
}
close(OUTPUT);
warn "Finished; had $num_users_with_barnstars users with barnstars, "
  . $num_users_with_multiple_barnstars . " with multiple barnstars\n";