User:AnomieBOT/source/tasks/AltLinkTemplateSubster.pm

package tasks::AltLinkTemplateSubster;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     AltLinkTemplateSubster
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 33
Status:   Approved 2009-12-13
Created:  2009-10-19

Replace all transclusions of {{tl|cbb link}} or {{tl|cfb link}} where the most
specific alternative exists with the specific link, to reduce the #ifexist
parserfunction load. Similar templates may be added in the future as needed.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use AnomieBOT::Task qw/onlylist/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

# List of pages to skip
my %skip=(
    'Template:Cbb link' => 1,
    'Template:Cbb link/doc' => 1,
    'Template:Cbb link/sandbox' => 1,
    'Template:Cbb link/testcases' => 1,
    'Template:CBSB link' => 1,
    'Template:CBSB link/doc' => 1,
    'Template:CBSB link/sandbox' => 1,
    'Template:CBSB link/testcases' => 1,
    'Template:Cfb link' => 1,
    'Template:Cfb link/doc' => 1,
    'Template:Cfb link/sandbox' => 1,
    'Template:Cfb link/testcases' => 1,
    'Template:Cih link' => 1,
    'Template:Cih link/doc' => 1,
    'Template:Cih link/sandbox' => 1,
    'Template:Cih link/testcases' => 1,
    'Template:Csb link' => 1,
    'Template:Csb link/doc' => 1,
    'Template:Csb link/sandbox' => 1,
    'Template:Csb link/testcases' => 1,
);

# List the rev_id of each template in %repl. If the template on-wiki doesn't
# match, the bot will not touch the template. This prevents it from making the
# wrong replacement if someone changes the template code.
my %goodrevisions=(
    'Template:Cbb link'     => 963569499,
    'Template:Cfb link'     => 631665143,
    'Template:CBSB link'    => 1051188360,
    'Template:Cih link'     => 930168340,
    'Template:Csb link'     => 1046925326,
    'Template:NFLDraft-row' => undef, # bot just adds a parameter, no subst
);

# Replacement functions: return the page to check if it exists and the
# replacement wikitext.
my %repl=(
    'Template:Cbb link' => sub {
        my %p=%{$_[0]};

        my $pg='';
        my $y=$p{'1'} // substr($p{'year'} // '{{{year}}}', 0, 4);
        $pg.=$y.'–'.($y eq '1999'?($y+1):sprintf('%02d',($y+1)%100));
        $pg.=' '.($p{'team'} // '{{{team}}}').' ';
        my $sex=lc($p{'sex'} // '{{{sex}}}');
        if($sex eq 'f' || $sex eq 'female' || $sex eq 'w' || $sex eq 'women' || $sex eq 'womens' || $sex eq 'women\'s'){ $pg.='women\'s' }
        elsif($sex eq 'none'){ $pg =~ s/ $//; }
        else { $pg.='men\'s'; }
        $pg.=' basketball team';
        $pg=~s/[\s_]+/ /g;

        return ($pg,defined($p{'title'})?"[[$pg|$p{title}]]":"[[$pg]]");
    },
    'Template:CBSB link' => sub {
        my %p=%{$_[0]};
        my $pg=($p{'year'} // '{{{year}}}').' '.($p{'team'} // '{{{team}}}').' baseball team';
        $pg=~s/[\s_]+/ /g;
        return ($pg,defined($p{'title'})?"[[$pg|$p{title}]]":"[[$pg]]");
    },
    'Template:Cfb link' => sub {
        my %p=%{$_[0]};
        my $pg=($p{'year'} // '{{{year}}}').' '.($p{'team'} // '{{{team}}}').' football team';
        $pg=~s/[\s_]+/ /g;
        return ($pg,defined($p{'title'})?"[[$pg|$p{title}]]":"[[$pg]]");
    },
    'Template:Cih link' => sub {
        my %p=%{$_[0]};

        my $pg='';
        my $y=$p{'1'} // substr($p{'year'} // '{{{year}}}', 0, 4);
        $pg.=$y.'–'.($y eq '1999'?($y+1):sprintf('%02d',($y+1)%100));
        $pg.=' '.($p{'team'} // '{{{team}}}').' ';
        my $sex=lc($p{'sex'} // '{{{sex}}}');
        if($sex eq 'f' || $sex eq 'female' || $sex eq 'w' || $sex eq 'women' || $sex eq 'womens' || $sex eq 'women\'s'){ $pg.='women\'s' }
        else { $pg.='men\'s'; }
        $pg.=' ice hockey season';
        $pg=~s/[\s_]+/ /g;

        return ($pg,defined($p{'title'})?"[[$pg|$p{title}]]":"[[$pg]]");
    },
    'Template:Csb link' => sub {
        my %p=%{$_[0]};
        my $pg=($p{'year'} // '{{{year}}}').' '.($p{'team'} // '{{{team}}}').' softball team';
        $pg=~s/[\s_]+/ /g;
        return ($pg,defined($p{'title'})?"[[$pg|$p{title}]]":"[[$pg]]");
    },
    'Template:NFLDraft-row' => sub {
        my %p=%{$_[0]};
        my $pg=($p{'collegeyear'} // '{{{collegeyear}}}').' '.($p{'collegeteam'} // '{{{collegeteam}}}').' football team';
        $pg=~s/[\s_]+/ /g;
        my $ex=lc($p{'cfb page exists'} // '');
        return undef if($ex eq 'yes' || $ex eq 'y' || $ex eq 1);
        my $r=$_[2];
        unless($r=~s/(\|\s*cfb page exists\s*=\s*)[^|}]*?(\s*[|}])/${1}yes$2/){
            $r=~s/}}$/|cfb page exists=yes}}/;
        }
        return ($pg,$r);
    },
);

# Iterator definitions
my @iterators=(
    {
        list    => 'categorymembers',
        cmnamespace => 10, # Do the template namespace first, to (hopefully) eliminate pages using those templates too
        cmtype  => 'page',
        cmtitle => [
            'Category:Excessive uses of cbb link',
            'Category:Excessive uses of cfb link',
            'Category:Excessive uses of cbsb link',
            'Category:Excessive uses of csb link',
        ],
        cmlimit => '100',
    },
    {
        list    => 'categorymembers',
        cmtitle => [
            'Category:Excessive uses of cbb link',
            'Category:Excessive uses of cfb link',
            'Category:Excessive uses of cbsb link',
            'Category:Excessive uses of csb link',
        ],
        cmlimit => '100',
    },
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'iter'}=undef;
    $self->{'iterators'}=[@iterators];
    $self->{'check templates'}={};
    $self->{'checked templates'}={};
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2009-12-13<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 33]]

=cut

sub approved {
    return 3;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('AltLinkTemplateSubster',0,10,qw/d::Redirects d::Templates d::Talk/);

    # Check if the templates are still known-good versions
    $res=$api->query(titles=>join('|',keys %goodrevisions), prop=>'info');
    if($res->{'code'} ne 'success'){
        $api->warn("Failed to check template revisions: ".$res->{'error'}."\n");
        return 60;
    }
    my %r=map { $_->{'title'},$_ } values %{$res->{'query'}{'pages'}};
    while(my ($k,$v)=each %repl){
        next if(defined($r{$k}) && !defined($goodrevisions{$k}));
        next if(defined($r{$k}) && $r{$k}{'lastrevid'}==$goodrevisions{$k});
        my $r=$api->whine("[[$k]] was modified!", "The template [[$k]] was modified from the last known good version ({{diff|page=$k|diff=cur|oldid=".$goodrevisions{$k}."|label=diff}}). Please check that my replacement function for that template is still correct, and then update the good revision id to the current revision.");
        if($r->{'code'} ne 'success'){
            $api->warn("Whine failed: ".$r->{'error'}."\n");
            return 60;
        }
        delete $repl{$k};
    }

    return undef unless %repl;

    # Get a list of templates redirecting to our targets
    my %templates=$api->redirects_to_resolved(keys %repl);
    if(exists($templates{''})){
        $api->warn("Failed to get redirects to target templates: ".$templates{''}{'error'}."\n");
        return 60;
    }
    my ($k,$v);
    $templates{$k}=$repl{$v} while(($k,$v)=each %templates);

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    my %check_templates=%{$self->{'check templates'}};
    my %checked_templates=%{$self->{'checked templates'}};

    while(1){
        my $iter=$self->{'iter'};
        if(!defined($iter)){
            my $i=shift @{$self->{'iterators'}};
            unless($i){
                last unless %check_templates;
                $i={ titles=>[] };
                my @x=keys %check_templates;
                while(@x){
                    push @{$i->{'titles'}}, join('|',splice(@x,0,500));
                }
                %checked_templates=(%checked_templates, %check_templates);
                %check_templates=();
                $self->{'check templates'}={};
                $self->{'checked templates'}=\%checked_templates;
            }
            $iter=$api->iterator(%$i);
            $self->{'iter'}=$iter;
        }
        while($_=$iter->next){
            if(!$_->{'_ok_'}){
                $api->warn("Failed to retrieve category members: ".$_->{'error'}."\n");
                return 60;
            }

            next if exists($skip{$_->{'title'}});

            my $title=$_->{'title'};
            $api->log("Checking for templates in $title");

            # WTF?
            if(exists($_->{'missing'})){
                $api->warn("$title is missing? WTF?\n");
                next;
            }

            # Ok, check the page
            my $tok=$api->edittoken($title, EditRedir=>1);
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                return 300;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
                next;
            }
            next if exists($tok->{'missing'});

            # Get page text
            my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};

            # First, scan the page for template uses
            my %pages=();
            $api->process_templates($intxt, sub {
                my $name=shift;
                my $params=shift;
                my $wikitext=shift;

                return undef unless exists($templates{"Template:$name"});

                my %p=();
                foreach ($api->process_paramlist(@$params)){
                    $p{$_->{'name'}}=$_->{'value'};
                }
                my ($pg,$rp)=$templates{"Template:$name"}(\%p, $params, $wikitext);
                $pages{$pg}=0 if defined($pg);
                return undef;
            });

            # Next, query all those linked pages
            my @pages=keys %pages;
            while(@pages){
                my $res=$api->query(titles=>join('|', splice(@pages,0,500)));
                if($res->{'code'} ne 'success'){
                    $api->warn("Failed to get existence for links in $title: ".$res->{'error'}."\n");
                    return 60;
                }
                $pages{$_->{'title'}}=(!exists($_->{'invalid'}) && !exists($_->{'missing'})) foreach (values %{$res->{'query'}{'pages'}});
            }

            # Then perform the actual replacements
            my %r=();
            my $outtxt=$api->process_templates($intxt, sub {
                my $name=shift;
                my $params=shift;
                my $wikitext=shift;

                return undef unless exists($templates{"Template:$name"});

                my %p=();
                foreach ($api->process_paramlist(@$params)){
                    $p{$_->{'name'}}=$_->{'value'};
                }
                my ($pg,$rp)=$templates{"Template:$name"}(\%p, $params, $wikitext);
                return undef unless(defined($pg) && $pages{$pg});

                $r{$name}=1;
                return $rp;
            });

            # Need to edit?
            if($outtxt ne $intxt){
                my $summary="Substing/adjusting templates to reduce #ifexist parserfunction usage: {{".join("}} {{", keys %r)."}}";
                $api->log("$summary in $title");
                my $r=$api->edit($tok, $outtxt, $summary, 1, 1);
                if($r->{'code'} ne 'success'){
                    $api->warn("Write failed on $title: ".$r->{'error'}."\n");
                    next;
                }
            } else {
                $api->log("Nothing to do in $title");
                # MediaWiki has major problems with updating category
                # membership for things like this. So check all templates in
                # this page too.
                $res=$api->iterator(
                    titles    => $title,
                    generator => 'templates',
                    gtllimit  => 'max',
                    prop      => 'templates',
                    tllimit   => 'max',
                    onlylist('tltemplates',500,keys %templates),
                );
                while(my $t=$res->next){
                    if(!$t->{'_ok_'}){
                        $api->warn("Could not retrieve templates from iterator: ".$t->{'error'}."\n");
                        return 60;
                    }
                    next if exists($checked_templates{$t->{'title'}});
                    $check_templates{$t->{'title'}}=1 if grep exists($templates{$_->{'title'}}), @{$t->{'templates'}};
                }
                $self->{'check templates'}=\%check_templates;
            }

            # If we've been at it long enough, let another task have a go.
            return 0 if time()>=$endtime;
        }
        $self->{'iter'}=undef;
    }

    # No more pages to check for now, start again later to pick up new page
    # creations.
    $self->{'iter'}=undef;
    $self->{'iterators'}=[@iterators];
    $self->{'check templates'}={};
    $self->{'checked templates'}={};
    return 7200;
}

1;