User:AnomieBOT/source/tasks/TemplateSubster/Base.pm

package tasks::TemplateSubster::Base;

use utf8;
use strict;

use Data::Dumper;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'deferred'}=0;
    $self->{'curtitle'}=undef;
    $self->{'ei iter'}=undef;
    bless $self, $class;
    return $self;
}

# Process a set of templates. Params:
#  - $api: AnomieBOT::API
#  - $process: Hash mapping templates to process to their status bitmaps.
#    Values may be modified during processing.
#  - $r: Hash mapping redirect names to template names
#  - $endtime: Timestamp at which to return to allow other tasks a chance
# Returns the value to return from run().
sub process {
    my ($self, $api, $process, $r, $endtime) = @_;

    my @process = sort keys %$process;
    if(defined($self->{'curtitle'})) {
        $api->debug( 2, "Skipping templates before $self->{'curtitle'}" );
        @process = grep { $_ ge $self->{'curtitle'} } @process;
        if ( !@process || $self->{'curtitle'} ne $process[0] ) {
            $self->{'curtitle'} = $process[0] // undef;
            $self->{'ei iter'} = undef;
            if ( !defined( $self->{'curtitle'} ) ) { # Err...
                $api->debug( 2, "Nothing? Will continue." );
                $self->{'deferred'} = 0;
                return 0;
            }
        }
    } else {
        $self->{'curtitle'} = $process[0];
        $self->{'ei iter'} = undef;
    }

    my $checkEnd = 0;
    while(defined($self->{'curtitle'})){
        if(!defined($self->{'ei iter'})){
            $api->debug( 2, "Starting processing of $self->{'curtitle'}" );
            $self->{'ei iter'}=$api->iterator(
                generator    => 'embeddedin',
                geititle     => $self->{'curtitle'},
                geilimit     => '100',
                prop         => 'info',
            );
            $process->{$self->{'curtitle'}} = 0;
        } else {
            $api->debug( 2, "Continuing processing of $self->{'curtitle'}" );
        }
        while($_=$self->{'ei iter'}->next){
            return 0 if $api->halting;
            if(!$_->{'_ok_'}){
                $api->warn("Failed to retrieve transclusions for $self->{curtitle}: ".$_->{'error'}."\n");
                return 60;
            }

            my $title=$_->{'title'};

            # Can't edit user js or css
            if($_->{'ns'}==2 && $title=~/\.(?:js|css)$/){
                $process->{$self->{'curtitle'}} |= 0x01;
                next;
            }

            # Can't edit Mediawiki namespace either
            if($_->{'ns'}==8){
                $process->{$self->{'curtitle'}} |= 0x01;
                next;
            }

            # Skip if we checked this revision already
            my $revid=$_->{'lastrevid'};
            my $key=$self->{'curtitle'}."|$title";
            my $tried = $api->store->{$key} // [ 0, 0 ];
            if ( ref($tried) eq 'ARRAY' && $tried->[0] == $revid ) {
                $process->{$self->{'curtitle'}} |= $tried->[1];
                next;
            }

            # Did we run out of time?
            if ( $checkEnd ) {
                shift @process;
                $self->{'curtitle'} = $process[0] // undef;
                $self->{'ei iter'} = undef;
                $self->{'deferred'} = defined( $self->{'curtitle'} ) ? 1 : 0;
                $api->debug( 2, "Ran out of time, will continue with the following template." );
                return 0;
            }

            # Ok, check the page
            my $tok=$api->edittoken($title, EditRedir=>1);
            $revid=$tok->{'lastrevid'} // $revid; # In case MW somehow returned an older revision than it did earlier, use the rev in the actual token.
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                # Clear iterators so a restart actually restarts
                $self->{'ei iter'} = undef;
                $self->{'curtitle'} = undef;
                return 300;
            }
            if($tok->{'code'} eq 'pageprotected'){
                # Don't worry about protected pages, just mark them and continue
                $process->{$self->{'curtitle'}} |= 0x01;
                $api->store->{$key} = [ $revid, 0x01 ];
                next;
            }
            if($tok->{'code'} eq 'botexcluded'){
                # Don't retry on bot exclusion either
                $api->warn("TemplateSubster excluded from $title: ".$tok->{'error'}."\n");
                $process->{$self->{'curtitle'}} |= 0x02;
                $api->store->{$key} = [ $revid, 0x02 ];
                next;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
                $process->{$self->{'curtitle'}} |= 0x4000;
                next;
            }
            next if exists($tok->{'missing'});

            # Get page text
            my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};

            # Perform the removal
            my %remv=();
            my $fail=0;
            my $outtxt=$api->process_templates($intxt, sub {
                return undef if $fail;
                my $name=shift;
                my $params=shift;
                my $wikitext=shift;
                my $data=shift;
                my $oname=shift;
                my $nl=shift;

                return undef unless exists($r->{"Template:$name"}) || exists($r->{$name});
                foreach ($api->process_paramlist(@$params)){
                    if ($_->{'name'}=~/^\s*(?:nosubst|demo)\s*$/) {
                        $process->{$self->{'curtitle'}} |= 0x04;
                        return undef;
                    }
                }
                my ($ret, $fatal) = $self->do_subst($api, $title, $oname, $name, $wikitext, $nl);
                $fail = 1 if $fatal;
                $remv{$name}=1 if defined( $ret );
                return $ret;
            });
            if($fail) {
                $process->{$self->{'curtitle'}} |= 0x4000;
                return 60;
            }

            # Need to edit?
            if(%remv){
                my @remv=sort keys %remv;
                my $summary=$self->summary( $api, @remv );
                $api->log( "$summary in $title" );
                my $res2=$api->edit($tok, $outtxt, $summary, 1, 1);
                if($res2->{'code'} ne 'success'){
                    $api->warn("Write failed on $title: ".$res2->{'error'}."\n");
                    $process->{$self->{'curtitle'}} |= 0x4000;
                    next;
                }
                $revid=$res2->{'edit'}{'newrevid'};
            }

            # Check whether the edit (or lack thereof) actually removed all transclusions of the template
            my $res2=$api->query( action => 'parse', oldid => $revid, prop => 'templates', formatversion => 2 );
            if($res2->{'code'} eq 'success') {
                my $flag = ( grep { $_->{'title'} eq $self->{'curtitle'} } @{$res2->{'parse'}{'templates'}} ) ? 0x08 : 0;
                $process->{$self->{'curtitle'}} |= $flag;
                $api->store->{$key} = [ $revid, $flag ];
            } else {
                # Err? Just re-check it later.
                $api->warn( "Failed to parse $title (rev $revid): " . $res2->{'error'} . "\n" );
                $process->{$self->{'curtitle'}} |= 0x4000;
                $api->store->{$key} = [ 0, 0 ];
            }

            # If we've been at it long enough, let another task have a go. Set
            # a flag here and exit once we know if we need to set the
            # 'deferred' flag or not.
            if ( time()>=$endtime ) {
                $checkEnd = 1;
            }
        }

        $api->debug( 2, "Finished processing of $self->{'curtitle'}" );
        $process->{$self->{'curtitle'}} |= 0x8000;
        shift @process;
        $self->{'curtitle'} = $process[0] // undef;
        $self->{'ei iter'} = undef;
        if ( $checkEnd ) {
            $self->{'deferred'} = 0 if !defined( $self->{'curtitle'} );
            $api->debug( 2, "Ran out of time, will continue." );
            return 0;
        }
    }

    # If we deferred any during this go-round, do another right away.
    if ( $self->{'deferred'} ) {
        $self->{'deferred'} = 0;
        $api->debug( 2, "Finished list, but deferred. Will continue." );
        return 0;
    }

    # No more pages to check.
    $api->debug( 2, "No more pages to check, sleeping" );
    return 3600;
}

# Generate an edit summary for the removal
sub summary {
    my ($self, $api, @remv) = @_;
    die "You must override summary()";
}

# Call this somewhere near the start of run().
sub fetchSig {
    my ($self, $api) = @_;

    if(!exists($self->{'sig'})){
        my $res=$api->query(action=>'parse', text=>"~\x7e~", pst=>1, onlypst=>1, 'contentmodel'=>'wikitext');
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to load bot sig: ".$res->{'error'}."\n");
            return 60;
        }
        $self->{'sig'}=$res->{'parse'}{'text'}{'*'};
    }

    return undef;
}

sub do_subst {
    my ($self,$api,$title,$oname,$name,$txt,$nl)=@_;
    my $bot=$api->user;
    my $sig=$self->{'sig'};

    my $botr=$bot;
    $botr=~s/(.)/ sprintf("&#%d;",ord($1)) /ge;

    my $itxt=$txt;
    $itxt=~s/^\{\{\Q$oname\E/{{subst:$name/;
    if ( $itxt eq $txt ) {
        $api->warn("Huh, \$txt doesn't begin with {{\$oname?\noname = $oname\ntxt = $txt\n");
        return (undef, 0);
    }
    $itxt=~s/\}\}$/|subst=subst:}}/;
    $itxt=~s/\Q$bot\E/$botr/g;

    $itxt="T14974\n$itxt" if $nl; # Work around T14974

    my $res=$api->query(action=>"parse", text=>$itxt, title=>$title, pst=>1, onlypst=>1);
    if($res->{'code'} ne 'success'){
        $api->warn("Failed to expand template: ".$res->{'error'}."\n");
        return (undef, 1);
    }
    my $otxt=$res->{'parse'}{'text'}{'*'};
    $otxt=substr($otxt,7) if $nl;
    if($otxt =~ /^\{\{subst:/ ) {
        my $err = $otxt;
        $err =~ s/\|.*/|.../s;
        $api->warn("Template didn't subst: $err\n");
        return (undef, 0);
    }
    if($otxt=~/\Q$bot\E/){
        my %q=(
            titles => $title,
            prop => 'revisions',
            rvprop => 'user',
            rvlimit => 1,
        );
        my $u='';
        do {
            $res=$api->query(%q);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to fetch revisions for $title: ".$res->{'error'}."\n");
                return (undef, 1);
            }
            if(exists($res->{'query-continue'}{'revisions'}{'rvcontinue'})){
                $q{'rvcontinue'}=$res->{'query-continue'}{'revisions'}{'rvcontinue'};
                $q{'rvprop'}='user|content';
                $q{'rvslots'}='main';
            } else {
                delete $q{'rvcontinue'};
            }
            $res=(values %{$res->{'query'}{'pages'}})[0]{'revisions'}[0];
            if(!exists($res->{'slots'}{'main'}{'*'}) || $res->{'slots'}{'main'}{'*'}=~/\Q$txt\E/){
                $u=$res->{'user'};
            } else {
                delete $q{'rvcontinue'};
            }
        } while(exists($q{'rvcontinue'}));

        # Signatures
        $otxt=~s/\Q$sig\E/[[User:$u]] ([[User talk:$u|talk]])/g;

        # Try to handle User links inside URLs. Not perfect, but the best we
        # can do in the situation.
        my $eu = $u;
        $eu =~ s/ /_/g;
        $eu =~ s/([%"&])/ sprintf("%%%02X", ord($1)) /ge;
        my $tmp;
        do {
            $tmp = $otxt;
            $otxt=~s!((?:\[|https?:)//[^][<>"\x00-\x20\x7F\p{Zs}]+)\Q$bot\E!$1$eu!g;
        } while ( $tmp ne $otxt );

        # Other usename mentions
        $otxt=~s/\Q$bot\E/$u/g;
    }

    $otxt=~s/\Q$botr\E/$bot/g;
    $botr=~s/&/%26/g;
    $botr=~s/#/%23/g;
    $botr=~s/;/%3B/g;
    $otxt=~s/\Q$botr\E/$bot/g;
    return ($otxt, 0);
}

1;