Wikipedia:Featured articles nominated in 2006/scripts

The monthly tables at Wikipedia:Featured articles nominated in 2006 can be generated using the following scripts. The process I've used (on a Mac OS X) is:

  1. edit script1 to change the month and run it, producing a file featured.noms.YEAR.MONTH
  2. carefully check the output (the script makes a guess about who the nominator is, and is sometimes fooled)
  3. run script2 featured.noms.YEAR.MONTH
  4. copy and paste the output into the by year list

Please feel free to use and/or improve these scripts.


Script1:

#!/bin/bash

# get the log files

YEAR=2006
MONTH=October
# for i in January February March April May June July August September October November December ; do
  # for j in 2003 2004 2005; do
    # /usr/bin/curl "http://en.wikipedia.org/w/index.php?title=Wikipedia:Featured_article_candidates/Featured_log/${i}_$j&action=raw" 
  # done
# done >featured.logs


# they have transclusions (not more than one per line), so fetch the transclusions as well
i=0
[ "$1" = "" ] && /usr/bin/curl "http://en.wikipedia.org/w/index.php?title=Wikipedia:Featured_article_candidates/Featured_log/${MONTH}_$YEAR&action=raw" |
grep '{{' | cut -n -d '{' -f 3- | cut -n -d '}' -f1 | grep "Wikipedia:Featured" | cut -n -d '|' -f1 | tr " " "_" >transcludes 

cat transcludes | while read f; do
    let i=$i+1
    # make sure there's a line feed
    echo
    /usr/bin/curl "http://en.wikipedia.org/w/index.php?title=$f&action=raw" 
    if [[ $i -ge 10 ]]; then
        sleep 5
        i=0
    fi
done >featured.$YEAR.$MONTH

# now, have all the log files
cat featured.$YEAR.$MONTH | awk -v YEAR=$YEAR -v MONTH=$MONTH '
{
  gsub(/[=][=][=][=]/,"",$0)
}
$0 ~ "^===" {
  sub(/^[=][=][=]/,"",$0)
  article=$0
  sub(/[=][=][=].*/,"",article)
}

/[[][[][uU][sS][eE][rR]:/ {
  if (article != "") {
    user = $0
    while ( user !~ /^[[][[][uU][sS][eE][rR]:/ ) {
      sub(/^.[^[]*/,"",user)
    }
    sub(/^[^[]*[[][[][uU][sS][eE][rR]:/,"",user)
    user2 = user
    sub(/].*/,"",user)
    sub(/[|].*/,"",user)
    users[user] = users[user] + 1
    if (sub(/^[^[]*[[][[][uU][sS][eE][rR]:/,"",user2) == 1) {
      sub(/].*/,"",user2)
      sub(/[|].*/,"",user2)
      users[user2] = users[user2] + 1
    } else {
      user2 = ""
    }
    # [[August]] [[2005]] ||  || [[Gray Wolf]] || [[user:Sango123|Sango123]]
    if (user2 == "") {
       print "[[" MONTH "]] [[" YEAR "]] ||  || " article " || [[user:" user "|" user "]]"
   } else {
       print "[[" MONTH "]] [[" YEAR "]] ||  || " article " || [[user:" user "|" user "]] & [[user:" user2 "|" user2 "]]"
    }
    article = ""
  }

}' >featured.noms.$YEAR.$MONTH

Script2:

#!/bin/bash

# make a by-month table
cat $1| awk '
BEGIN {
  FS=" \\|\\| "
}

{
  if ($1 != lastdate) {
    print "|}"
    monthyear=$1
    gsub("[[]*","",monthyear)
    gsub("]]","",monthyear)
    print "== Nominated in " monthyear "  =="
    print ":'"''"'See logs at [[Wikipedia:Featured article candidates/Featured log/" monthyear "]]'"''"'"
    print "{| class=\"wikitable\" style=\"margin:auto;\" width=\"90%\""
    print "|-"
    print "!Article!!Main page date!!Nominator"
  }
  lastdate = $1
  print "|-"
  print "|| " $3 " || " $2 " || " $4
}

END {
  print "|}"
} '