User:AnomieBOT/source/tasks/NewArticleAFDTagger.pm

package tasks::NewArticleAFDTagger;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    NewArticleAFDTagger
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 62
Status:  Approved 2012-04-15
Created: 2012-03-27

If a new article has been deleted in the past via AFD, add {{tl|old AfD multi}} to its talk page.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use POSIX;
use Date::Parse;
use AnomieBOT::Task qw/:time bunchlist/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'errct'}=0;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2012-04-15<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 62]]

=cut

sub approved {
    return 3;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('NewArticleAFDTagger', 0, 10, qw/d::Templates d::Talk d::Redirects/);

    my $screwup="Errors? [[User:AnomieBOT/shutoff/NewArticleAFDTagger]]";

    # Get template list
    my %templates=$api->redirects_to_resolved('Template:Old AfD', 'Template:Old AfD multi', 'Template:ArticleHistory');
    if(exists($templates{''})){
        if($templates{''}{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$templates{''}{'content'}."\n");
            return 300;
        }
        $api->warn("Failed to get template redirects: ".$templates{''}{'error'}."\n");
        return 60;
    }

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    # Process new pages
    my $rcstart=$api->store->{'rcstart'} // (time-7*86400);
    my $iter=$api->iterator(
        list        => 'recentchanges',
        rcstart     => timestamp2ISO($rcstart),
        rcdir       => 'newer',
        rcnamespace => 0,
        rcprop      => 'title|timestamp',
        rctype      => 'new',
        rclimit     => 'max',
    );
    if($self->{'errct'}>=50){
        $api->whine('Persistent errors!', 'The NewArticleAFDTagger task is running into repeated API errors. Please check the log. Thanks.');
    }
    while(my $p=$iter->next){
        return 0 if $api->halting;

        # If we've been at it long enough, let another task have a go.
        return 0 if time()>=$endtime;

        if(!$p->{'_ok_'}){
            if($p->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$p->{'content'}."\n");
                $self->{'errct'}=0;
                return 300;
            }
            $api->warn("Failed to retrieve new pages list: ".$p->{'error'}."\n");
            $self->{'errct'}++;
            return 60;
        }

        PROC: {
            #$api->log("Checking $p->{title}");

            # Has this page been deleted before? And does it still exist?
            $res=$api->query(
                titles  => $p->{'title'},
                list    => 'logevents',
                letype  => 'delete',
                letitle => $p->{'title'},
                leprop  => 'comment',
                lelimit => 'max',
            );
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to fetch log events for $p->{title}: ".$res->{'error'}."\n");
                $self->{'errct'}++;
                return 60;
            }
            my @le=@{$res->{'query'}{'logevents'}//[]};
            last PROC unless @le;

            #$api->log("$p->{title} was previously deleted!");

            if ( defined( (values %{$res->{'query'}{'pages'}})[0]{'missing'} ) ) {
                #$api->log("$p->{title} no longer exists!");
                last PROC;
            }

            # Yes. Find AFDs linked from deletion summaries
            my %afd=();
            my %try=();
            foreach my $le (@le) {
                $le->{'comment'}=~s/_/ /g;
                #$api->log("$p->{title}: $le->{comment}");
                $try{"Wikipedia:Articles for deletion/$1"}=1 if $le->{'comment'}=~/\[\[\s*:?\s*(?i:WP|Wikipedia)\s*:\s*Articles for deletion\/(.+?)(?:\||\]\])/;
            }
            
            # Now double check these linked AfDs. If they're really for this
            # article, the article should be linked from the AfD. It'll miss
            # some cases where a redirect was G8ed, but that's arguably ok too.
            if(%try){
                my $iter2=$api->iterator(
                    titles   => bunchlist(500, keys %try),
                    prop     => 'links',
                    pllimit  => 'max',
                    pltitles => $p->{'title'}
                );
                while(my $pp=$iter2->next){
                    if(!$pp->{'_ok_'}){
                        $api->warn("Failed to fetch links for AfDs for $p->{title}: ".$pp->{'error'}."\n");
                        $self->{'errct'}++;
                        return 60;
                    }
                    $afd{$pp->{'title'}}=1 if @{$pp->{'links'}//[]};
                }
            }

            # Find AFDs matching the page name
            my $iter2=$api->iterator(
                generator    => 'allpages',
                gapprefix    => "Articles for deletion/$p->{title}",
                gapnamespace => 4,
                gaplimit     => 'max',
            );
            while(my $pp=$iter2->next){
                if(!$pp->{'_ok_'}){
                    $api->warn("Failed to fetch AFD list for $p->{title}: ".$pp->{'error'}."\n");
                    $self->{'errct'}++;
                    return 60;
                }
                my $t=$pp->{'title'};
                #$api->log("$p->{title}: $t");
                $afd{$t}=1 if $t=~m{^Wikipedia:Articles for deletion/\Q$p->{title}\E(?i:\s*\((?:\d+(?:st|nd|rd|th)?|first|second|third|fourth|fifth|sixth|seventh|eighth|ninth)?\s*(?:relist)?\s*(?:nom|nomination)?\))?$};
            }

            # Found any AFDs?
            last PROC unless %afd;

            # Now, try to find the info for each AFD.
            my @results=();
            foreach my $afd (keys %afd){
                my ($date,$result,$page);
                ($page=$afd)=~s!^Wikipedia:Articles for deletion/!!;
                $res=$api->query(
                    titles  => $afd,
                    prop    => 'revisions',
                    rvprop  => 'timestamp',
                    rvlimit => 1,
                    rvdir   => 'newer'
                );
                if($res->{'code'} ne 'success'){
                    $api->warn("Failed to fetch creation timestamp for $afd: ".$res->{'error'}."\n");
                    $self->{'errct'}++;
                    return 60;
                }
                $res=(values %{$res->{'query'}{'pages'}})[0];
                next if exists($res->{'missing'});
                $date=ISO2timestamp($res->{'revisions'}[0]{'timestamp'});
                $res=$api->query(
                    titles  => $afd,
                    prop    => 'revisions',
                    rvprop  => 'content',
                    rvslots => 'main',
                    rvlimit => 1,
                );
                if($res->{'code'} ne 'success'){
                    $api->warn("Failed to fetch current revision for $afd: ".$res->{'error'}."\n");
                    $self->{'errct'}++;
                    return 60;
                }
                my $txt=(values %{$res->{'query'}{'pages'}})[0]{'revisions'}[0]{'slots'}{'main'}{'*'};
                unless($txt=~/boilerplate.*[axv]fd/){
                    # Currently active AfD?
                    #$api->log("$p->{title}: $afd is active!");
                    next;
                }
                $result='Unknown';
                $result="'''$1'''" if($txt=~/The result(?: of the (?:debate|nomination|discussion))? was:?\s+'''(.+?)'''/ || $txt=~/The result(?: of the (?:debate|nomination|discussion))? was:?\s+((?:\[\[.*?\]\]|.){1,40}?)(?:\. | by )/);
                push @results, { date=>$date, result=>$result, page=>$page };
            }
            @results = sort { $a->{'date'} <=> $b->{'date'} } @results;
            last PROC unless @results;

            # Ok, tag the talk page
            my $title=$p->{'title'};
            if($p->{'ns'}==0){
                $title="Talk:$title";
            } else {
                $title=~s/:/ talk:/;
            }
            my $tok=$api->edittoken($title, EditRedir=>1);
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                $self->{'errct'}=0;
                return 300;
            }
            if($tok->{'code'} eq 'pageprotected' || $tok->{'code'} eq 'botexcluded'){
                # Skip protected and excluded pages
                $api->warn("Cannot edit $title: ".$tok->{'error'}."\n");
                last PROC;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
                $self->{'errct'}++;
                return 60;
            }
            if(exists($tok->{'redirect'})){
                $api->log("$title is a redirect, skipping");
                last PROC;
            }
            my $intxt=($tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '');
            my $found=0;
            $api->process_templates($intxt, sub {
                my $name=shift;
                $found=$name if exists($templates{"Template:$name"});
                return undef;
            });
            if($found){
                $api->log("$title already has {{$found}}, skipping");
                last PROC;
            }

            my $outtxt="{{old AfD multi";
            my $i=1;
            foreach my $r (@results){
                $outtxt.="\n |date$i = ".strftime('%B %-d, %Y', gmtime $r->{'date'})." |result$i = ".$r->{'result'}." |page$i = ".$r->{'page'};
                $i++;
            }
            $outtxt.="\n}}\n$intxt";

            $api->log("Marking $title with {{Old AfD multi}}");
            $res=$api->edit($tok, $outtxt, "Adding {{old AfD multi}} for prior AfDs related to this article. $screwup");
            if($res->{'code'} ne 'success'){
                $api->warn("Write failed on $title: ".$res->{'error'}."\n");
                $self->{'errct'}++;
                return 60;
            }
        }

        $self->{'errct'}=0;
        $api->store->{'rcstart'}=ISO2timestamp($p->{'timestamp'});
    }

    # Done, wait a bit until the next run
    return 3600;
}

1;