User:AnomieBOT/source/tasks/TemplateUnsubstifier.pm

package tasks::TemplateUnsubstifier;

=pod

=begin metadata

Bot:      AnomieBOT II
Task:     TemplateUnsubstifier
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT II 2
Status:   Approved 2013-11-17
Created:  2013-10-31

Apply [[Module:Unsubst]] to maintenance templates.

=end metadata

=cut

use utf8;
use strict;

use POSIX;
use Data::Dumper;
use AnomieBOT::API;
use AnomieBOT::Task qw/bunchlist ISO2timestamp/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'templates'}=undef;
    $self->{'templates rev'}=undef;
    $self->{'nextrun'}=undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2013-11-17<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT II 2]]

=cut

sub approved {
    return 200;
}

sub run {
    my ($self, $api)=@_;
    my ($res, $iter);

    $api->task('TemplateUnsubstifier',0,0,qw/d::Talk d::Redirects d::Templates/);

    # Time to run?
    my $nextrun = $self->{'nextrun'} // $api->store->{'nextrun'} // 0;
    my $t = $nextrun - time();
    return $t if $t > 0;

    if ( defined( $self->{'templates rev'} ) ) {
        $res = $api->query(
            titles  => 'Wikipedia:AutoWikiBrowser/Dated templates',
            prop    => 'info',
        );
        if ( $res->{'code'} ne 'success' ) {
            $api->warn( "Failed to load info for Wikipedia:AutoWikiBrowser/Dated templates: " . $res->{'error'} . "\n" );
            return 60;
        }
        $res = (values %{$res->{'query'}{'pages'}})[0];
        if ( $res->{'lastrevid'} ne $self->{'templates rev'} ) {
            $self->{'templates'} = undef;
            $self->{'templates rev'} = undef;
        }
    }

    # Get the list of templates to check
    my @templates;
    if ( defined( $self->{'templates'} ) ) {
        @templates = @{$self->{'templates'}};
    } else {
        $api->log( "Loading templates list from Wikipedia:AutoWikiBrowser/Dated templates" );
        $res = $api->query(
            titles  => 'Wikipedia:AutoWikiBrowser/Dated templates',
            prop    => 'revisions',
            rvprop  => 'ids|timestamp|content',
            rvslots => 'main',
            rvlimit => 1,
        );
        if ( $res->{'code'} ne 'success' ) {
            $api->warn( "Failed to load Wikipedia:AutoWikiBrowser/Dated templates: " . $res->{'error'} . "\n" );
            return 60;
        }
        $res = (values %{$res->{'query'}{'pages'}})[0]{'revisions'}[0];
        $t = ISO2timestamp( $res->{'timestamp'} ) + 86400;
        if ( $t > time() ) {
            # Wait, so some vandal can't so easily add a bogus template to the
            # list and get the bot to edit it.
            $nextrun = $t - ($t % 86400) + 86520;
            $api->store->{'nextrun'} = $self->{'nextrun'} = $nextrun;
            $t = $nextrun - time();
            return $t if $t > 0;
        }
        my $txt = $res->{'slots'}{'main'}{'*'};
        $txt = $api->strip_nowiki($txt);
        $txt =~ s/_/ /g;
        $txt =~ s/\{\{\s*Template\s*:/\{\{/gi;
        @templates = ($txt=~/\{\{\s*[tT]lx?\s*\|\s*([^|]+?)\s*(?:\||\}\})/g);
        my %templates = $api->resolve_redirects( map "Template:$_", @templates );
        if ( exists( $templates{''} ) ) {
            $api->warn( "Failed to resolve redirects in target template list: " . $templates{''}{'error'} . "\n" );
            return 60;
        }
        @templates = (values %templates);

        $iter = $api->iterator(
            titles => bunchlist( 500, @templates ),
            prop => 'info',
        );
        @templates = ();
        while ( my $p = $iter->next ) {
            return 0 if $api->halting;

            if ( !$p->{'_ok_'} ) {
                $api->warn( "Failed to retrieve templates for WP:AWB/DT templates: " . $p->{'error'} . "\n" );
                return 60;
            }
            
            next unless $p->{'ns'} == 10; # Sanity check
            next unless exists( $p->{'pageid'} ) && exists( $p->{'lastrevid'} ); # Page missing or invalid?
            if ( exists( $p->{'redirect'} ) ) { # Redirect?
                $api->warn( "How did we manage to get a redirect ($p->{title}) in here? Skipping it." );
                next;
            }
            next if $p->{'lastrevid'} eq ( $api->store->{'lastrev ' . $p->{'pageid'}} // 0 );
            push @templates, $p->{'title'};
        }
        $self->{'templates'} = [@templates];
        $self->{'templates rev'} = $res->{'revid'};
    }

    # Check each template
    my $endtime = time() + 300;
    my @retry = ();
    my $re = $api->redirect_regex();
    while ( @templates ) {
        return 0 if $api->halting;

        my $title = shift @templates;
        $res = $api->query(
            titles  => $title,
            prop    => 'revisions',
            rvprop  => 'ids|content',
            rvslots => 'main',
            rvlimit => 1,
        );
        if ( $res->{'code'} ne 'success' ) {
            $api->warn( "Failed to load $title" . $res->{'error'} . "\n" );
            return 60;
        }
        $res = (values %{$res->{'query'}{'pages'}})[0];
        my $pageid = $res->{'pageid'};
        my $revid = $res->{'revisions'}[0]{'revid'};
        my $intxt = $res->{'revisions'}[0]{'slots'}{'main'}{'*'};
        (my $name = $title) =~ s/^Template://;
        my $outtxt = undef;

        # Sanity check
        if ( $intxt =~ /$re/ ) {
            $api->warn( "HELP: $title looks like a redirect, refusing to edit\n" );
            goto skip;
        }

        # Split into template and noinclude parts, then process
        if ( $intxt =~ /^\s*+(.+?)((?><noinclude>(?>[^<]+|<(?!\/?noinclude))*(?:<\/noinclude>|$))*)\s*$/s ) {
            my $trail = $2;
            my ($txt, $params) = $self->upgrade_unsubst( $api, $title, $1 );
            $outtxt = "{{ {{{|safesubst:}}}#invoke:Unsubst|$params|\$B=\n$txt\n}}$trail" if defined( $txt );
        } else {
            $api->warn( "HELP: $title doesn't match the basic regular expression, refusing to edit\n" );
            goto skip;
        }

        if ( defined( $outtxt ) ) {
            my $tok = $api->edittoken( $title, EditRedir => 1 );
            if ( $tok->{'code'} eq 'shutoff' ) {
                $api->warn( "Task disabled: " . $tok->{'content'} . "\n" );
                return 300;
            } elsif ( $tok->{'code'} eq 'pageprotected' || $tok->{'code'} eq 'botexcluded' ) {
                # Skip protected and excluded pages
                $api->warn( "HELP: Cannot edit $title: " . $tok->{'error'} . "\n" );
            } elsif ( $tok->{'code'} ne 'success' ) {
                $api->warn( "Failed to get edit token for $title: " . $tok->{'error'} . "\n" );
                push @retry, $title;
            } elsif ( exists( $tok->{'missing'} ) ) {
                # Was deleted, ignore
            } elsif ( $tok->{'lastrevid'} ne $revid ) {
                # Edited since it was loaded! Retry it.
                push @retry, $title;
            } else {
                $api->log( "Unsubstifying $title" );
                my $r = $api->edit( $tok, $outtxt, "[[Module:Unsubst|Unsubstifying]] template, so {{subst:$name}} results in {{$name|date=...}}", 1, 1 );
                if ( $r->{'code'} ne 'success' ) {
                    $api->warn( "Write failed on $title: " . $r->{'error'} . "\n" );
                    push @retry, $title;
                } else {
                    $revid = $r->{'edit'}{'newrevid'};
                }
            }
        }

        skip:
        $api->store->{"lastrev $pageid"} = $revid;
        $self->{'templates'} = [@templates, @retry];
        return 0 if time() > $endtime;
    }

    return 300 if @retry;

    # No more pages to check for now
    $self->{'templates'} = undef;
    $t = time();
    $nextrun = $t - ($t % 86400) + 86520;
    $api->store->{'nextrun'} = $self->{'nextrun'} = $nextrun;
    return $nextrun - time();
}

sub upgrade_unsubst {
    my ( $self, $api, $title, $txt ) = @_;
    my $params = '|date=__DATE__';

    # If it doesn't have "unsubst", it's fine to wrap.
    unless ( $txt =~ /unsubst/i ) {
        # sanity-check that the existing template code doesn't break template
        # syntax, see https://en.wikipedia.org/w/index.php?diff=prev&oldid=582081088
        my $tmp = $api->process_templates( "{{\x02foo\x03|1=$txt}}", sub {
            my $name = shift;
            my $params = shift;
            return undef unless $name eq "\x02foo\x03";
            return 'bad' unless @$params == 1;
            return 'ok';
        } );
        return ($txt, $params) if $tmp eq 'ok';
        $api->warn( "HELP: $title contains unwrappable content" );
        return undef;
    }

    # If it already uses Module:Unsubst, then we don't need to do anything to
    # it.
    return undef if $txt =~ /#invoke\s*:\s*[uU]nsubst\s*\|/;

    # Sanity check: if it contains anything other than other than a top-level
    # {{ifsubst}}, fail.
    my $module = 0;
    my $unsubst = undef;
    my $body = undef;
    my $tmp = $api->process_templates( $txt, sub {
        my $name = shift;
        my $params = shift;
        $name =~ s!<includeonly>safesubst:</includeonly>!!;
        $name =~ s!\{\{\{\|safesubst:\}\}\}!!;
        return undef unless $name =~ /^\s*[iI]fsubst\s*$/;
        foreach ($api->process_paramlist(@$params)) {
            $unsubst = $_->{'value'} if $_->{'name'} eq '1';
            ($body = $_->{'value'}) =~ s/^\s+|\s+$//g if $_->{'name'} eq '2';
        }
        return '';
    } );
    unless ( $tmp =~ /^\s*$/ ) {
        $api->warn( "HELP: $title contains text other than {{ifsubst}}, cannot edit\n" );
        return undef;
    }
    unless ( $unsubst ) {
        $api->warn( "HELP: $title doesn't have anything in the 'unsubst' case of {{ifsubst}}, cannot edit\n" );
        return undef;
    }
    unless ( $body ) {
        $api->warn( "HELP: $title doesn't have anything in the 'body' case of {{ifsubst}}, cannot edit\n" );
        return undef;
    }

    # Extract parameters from the existing invocation of {{unsubst}}
    my $found = 0;
    $api->process_templates( $unsubst, sub {
        my $name=shift;
        my $uparams=shift;

        $name =~ s!^<includeonly>(?:safe)?subst:</includeonly>!!i;
        $name =~ s!^\{\{\{\|(?:safe)?subst:\}\}\}!!i;
        $name =~ s!^(?:safe)?subst:!!i;
        return undef unless $name =~ /^\s*[uU]nsubst\s*$/;

        $found = 1;
        my %params = ();
        foreach ($api->process_paramlist(@$uparams)) {
            $params{$_->{'name'}} = $_->{'value'};
        }
        for ( my $i = 1; $i < 10; $i++ ) {
            my ( $k, $v ) = ( $params{ $i*2 } // '', $params{ $i*2+1 } // '' );
            $k =~ s/^\s+|\s+$//g;
            $v =~ s/^\s+|\s+$//g;
            next if $k eq '' || $k eq 'date';
            next if $v =~ /^{{{\Q$k\E\|¬}}}$/;
            $params .= " |$k=$v";
        }
        return undef;
    } );
    if ( !$found ) {
        $api->warn( "HELP: $title doesn't contain {{unsubst}} in the 'unsubst' case of {{ifsubst}}, cannot edit\n" );
        return undef;
    }

    return ($body, $params);
}

1;