User:AnomieBOT/source/tasks/PERTableUpdater.pm

package tasks::PERTableUpdater;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    PERTableUpdater
BRFA:    N/A
Status:  Begun 2011-12-04
Created: 2011-12-01

Update [[User:AnomieBOT/PERTable]], [[User:AnomieBOT/TPERTable]],
[[User:AnomieBOT/EPERTable]], [[User:AnomieBOT/SPERTable]],
[[User:AnomieBOT/COIREQTable]], and [[User:AnomieBOT/PREQTable]].

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task qw/:time bunchlist/;
use URI::Escape;
use HTML::Entities;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my %protact=(
    'modify'    => 'Modified',
    'protect'   => 'Protected',
    'unprotect' => 'Unprotected',
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Per [[WP:BOT#Approval]], any bot or automated editing process that only
affects only the operators' user and talk pages (or subpages thereof),
and which are not otherwise disruptive, may be run without prior
approval.

=cut

sub approved {
    return 999;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('PERTableUpdater', 0, 10, qw/d::Sections d::Timestamp d::Talk d::IWNS/);

    my $screwup=' Errors? [[User:'.$api->user.'/shutoff/PERTableUpdater]]';

    # Upgrade
    if(exists($api->store->{"pages0"})){
        $api->store->{"PER pages"}=$api->store->{"pages0"};
        delete $api->store->{"pages0"};
    }
    if(exists($api->store->{"pages1"})){
        $api->store->{"SPER pages"}=$api->store->{"pages1"};
        delete $api->store->{"pages1"};
    }
    if(($api->store->{"ver"}//1) < 2){
        for my $tag (qw/PER TPER SPER/) {
            next unless exists($api->store->{"$tag pages"});
            my %old = %{$api->store->{"$tag pages"}};
            my %new = ();
            while(my ($k,$v) = each %old) {
                $new{$k}{$v->{'talk'}} = $v if exists($v->{'talk'});
            }
            $api->store->{"$tag pages"} = \%new;
        }
        $api->store->{"ver"} = 2;
    }

    # Flush warnings daily
    my $ts = time;
    $ts -= $ts % 86400;
    if ( ($api->store->{'warnedBadRegexDate'} // 0) < $ts) {
        $api->store->{'warnedBadRegex'} = {};
        $api->store->{'warnedBadRegexDate'} = $ts;
    }

    # First, load MediaWiki:Titleblacklist to catch pages protected by
    # that mechanism
    my %tb=();
    my @sources=(
        [ 1, 'Global title blacklist', 'meta:Title blacklist' ],
        [ 2, 'Title blacklist', 'MediaWiki:Titleblacklist' ],
        [ 3, undef, 'MediaWiki:Titlewhitelist' ],
    );
    for my $source (@sources) {
        my ($i, $name, $page) = @$source;
        my $tb;
        if ( $page=~/^meta:(.+)$/ ) {
            $tb=$api->copy( wikibase => 'https://meta.wikimedia.org/w/', assert => 'user' )->rawpage( $1 );
        } else {
            $tb=$api->rawpage($page);
        }
        if($tb->{'code'} ne 'success'){
            $api->warn("Failed to load $page: ".$tb->{'error'}."\n");
            return 60;
        }
        my $ln = 0;
        for my $line (split /\r?\n/, $tb->{'content'}){
            $ln++;
            my $re=$line;
            my %opts=();
            $re=~s/^\s*([^#]*?)\s*(?:#.*)?$/$1/;
            if($re=~s/\s*<([^<>]*)>$//){
                my $opts=$1;
                $opts=~s/^\s+|\s+$//g;
                for my $opt (split /\s*\|\s*/, $opts){
                    if($opt=~/^([^=]*?)\s*=\s*(.+)$/){
                        $opts{lc($1)}=$2;
                    } else {
                        $opts{lc($opt)}=1;
                    }
                }
            }
            $re=~s/_/ /g;
            $re="(?-i:$re)" if($opts{'casesensitive'}//0);

            $re=~s!(\{\{\s*ns\s*:\s*(.+?)\s*\}\})! replace_ns($api,$2) // $1 !ge;

            # Try to escape left-braces that aren't quantifiers or parameters to escapes
            my $tmp = $re;
            $re=~s#(?<!\\[a-zA-Z])(?<!\\)\{(?!\d+(?:,\d*)?})#\\{#g;
            $self->warnBadRegex( $api, "$page:$ln: Escaped left-braces in regex (old): $tmp" ) if $tmp ne $re;
            $self->warnBadRegex( $api, "$page:$ln: Escaped left-braces in regex (new): $re" ) if $tmp ne $re;

            # Validate each line, in case someone screws up the blacklist page
            eval {
                no warnings;
                qr/^(?:$re)$/si;
            };
            if ( $@ ) {
                $self->warnBadRegex( $api, "$page:$ln: Ignoring bad regex '$re': $@\n");
                next;
            }

            # Log non-fatal warnings too.
            eval {
                use warnings FATAL => 'all';
                qr/^(?:$re)$/si;
            };
            if ( $@ ) {
                $self->warnBadRegex( $api, "$page:$ln: Warning: $@\n");
            }

            # Let's just hope no one ever uses {{int:}} here...

            $tb{$re}={
                i => $i,
                source => $name ? "[[$page|$name]]" : undef,
                line => $line,
                opts => \%opts
            } unless(($opts{'moveonly'}//0) || ($opts{'newaccountonly'}//0));
        }
    }

    # Fields are:
    # 0: Namespaces to color "attention" instead of "normal"
    # 1: "Tag", also used in the name of the subpage the table is put on
    # 2: Category name, no prefix
    # 3: Type of request, i.e. "$type edit requests"
    # 4: URL fragment for request links
    # 5: NID component of the urn links
    # 6: List of color-classes to apply based on page protection level:
    #    0: unprotected
    #    1: semi-protected
    #    2: semi-protected via title blacklist
    #    3: extended-confirmed protected
    #    4: template protected
    #    5: User JSON page
    #    6: User CSS/JS page
    #    7: fully protected
    #    8: "fully" protected via title blacklist
    #    9: cascading protection
    #   10: MediaWiki-namespace page
    #   11: MediaWiki-namespace CSS/JS page
    my @data=(
        [[10,828],'PER','Wikipedia fully protected edit requests','protected','editprotected','x-wp-editprotected',[qw/error error error error error normal error normal caution caution caution error/]],
        [[10,828],'TPER','Wikipedia template-protected edit requests','template-protected','edittemplateprotected','x-wp-edittemplateprotected',[qw/error error error error normal error error error caution error error error/]],
        [[10,828],'EPER','Wikipedia extended-confirmed-protected edit requests','extended-confirmed-protected','editextendedprotected','x-wp-editextendedprotected',[qw/error error error normal error error error error error error error error/]],
        [[10,828],'SPER','Wikipedia semi-protected edit requests','semi-protected','editsemiprotected','x-wp-editsemiprotected',[qw/error normal caution error error error error error error error error error/]],
        [[],'IPER','Wikipedia interface-protected edit requests','interface-protected','editinterfaceprotected','x-wp-editinterfaceprotected',[qw/error error error error error error normal error error error error normal/]],
        [[0],'COIREQ','Wikipedia conflict of interest edit requests','COI','requestedit','x-wp-requestedit',[qw/normal caution caution caution error error error error error error error error/]],
        [[],'PREQ','Wikipedia partial-block edit requests','partial block','editpartiallyblocked','x-wp-editpartiallyblocked',[qw/normal caution caution caution error error error error error error error error/]],
    );
    my $starttime=time;
    for my $data (@data){
        my ($attentionns,$tag,$cat,$type,$tgt,$urn,$colors)=@$data;
        my $iter=$api->iterator(
            generator      => 'categorymembers',
            gcmtitle       => "Category:$cat",
            gcmlimit       => 'max',
            prop           => 'info|extlinks',
            elprotocol     => 'urn',
            ellimit        => 'max',
        );
        my %oldpages=%{$api->store->{"$tag pages"}//{}};
        my %pages=();
        while(my $p=$iter->next){
            if(!$p->{'_ok_'}){
                $api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
                return 60;
            }
            next unless $p->{'ns'}&1;
            my @pages = map {
                if($_->{'*'}=~/^urn:$urn:(.+)$/i) {
                    my $url = $1;
                    $url = uri_unescape( $url );
                    $url = decode_entities( $url );
                    utf8::decode( $url );
                    $url =~ s/_/ /g;
                    $url;
                } else {
                    ();
                }
            } @{$p->{'extlinks'}//[]};
            unless(@pages){
                my $t=$p->{'title'};
                if($p->{'ns'}==1){
                    $t=~s/^Talk://;
                } else {
                    $t=~s/^([^:]+) talk:/$1:/;
                }
                push @pages, $t;
            }
            for my $t (@pages) {
                my $tt = $p->{'title'};
                $pages{$t}{$tt}=($oldpages{$t}{$tt} // {
                        title   => $t,
                        talk    => $p->{'title'},
                        touched => ISO2timestamp($p->{'touched'}),
                    });
                $pages{$t}{$tt}{'reqisredir'} = defined( $p->{'redirect'} );
                delete $pages{$t}{$tt}{'color'};
                delete $pages{$t}{$tt}{'prottype'};
                delete $pages{$t}{$tt}{'reason'};
                delete $pages{$t}{$tt}{'logtitle'};
            }
        }
        $api->store->{"$tag pages"}=\%pages;
        $api->store->{"ver"} = 2;

        if(%pages){
            $iter=$api->iterator(
                titles => bunchlist(500, keys %pages),
                prop   => 'info',
                inprop => 'protection',
            );
            while(my $p=$iter->next){
                if(!$p->{'_ok_'}){
                    $api->warn("Failed to retrieve members for CAT:$tag: ".$p->{'error'}."\n");
                    return 60;
                }
                my $t=$p->{'title'};

                my ($k,$pd) = each %{$pages{$t}}; # Get first

                # Protection scoring "bitmap":
                # 0x8000 = MediaWiki-namespace CSS/JS auto-protection
                # 0x4000 = MediaWiki-namespace auto-protection
                # 0x2000 = Cascading protection
                # 0x1000 = Full protection
                #  0x100 = User script auto-protection
                #   0x80 = User JSON auto-protection
                #   0x40 = Template-protection
                #   0x20 = Extended-confirmed protection
                #   0x10 = Semi-protection
                #   0x08 = Directly-applied protection
                #   0x02 = Title blacklist protection
                # Highest score by int value "wins".
                my $protscore=0;
                $pd->{'prottype'}='Not protected';
                $pd->{'reason'}='';
                if($p->{'ns'}==8 && ($p->{'contentmodel'} eq 'javascript' || $t=~m!\.js$!)){
                    $pd->{'prottype'}='Site JS page';
                    $protscore=0xc000;
                } elsif($p->{'ns'}==8 && ($p->{'contentmodel'} eq 'css' || $t=~m!\.css$!)){
                    $pd->{'prottype'}='Site CSS page';
                    $protscore=0xc000;
                } elsif($p->{'ns'}==8){
                    $pd->{'prottype'}='MediaWiki page';
                    $protscore=0x4000;
                } elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'javascript' || $t=~m!/.*\.js$!)){
                    $pd->{'prottype'}='User JS page';
                    $protscore=0x100;
                } elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'css' || $t=~m!/.*\.css$!)){
                    $pd->{'prottype'}='User CSS page';
                    $protscore=0x100;
                } elsif($p->{'ns'}==2 && ($p->{'contentmodel'} eq 'json' || $t=~m!/.*\.json$!)){
                    $pd->{'prottype'}='User JSON page';
                    $protscore=0x80;
                }

                my $tb = undef;
                while(my ($re,$data)=each %tb){
                    next if $tb && $tb->{'i'} >= $data->{'i'};
                    next unless(exists($p->{'missing'}) || ($data->{'opts'}{'noedit'}//0));
                    next unless $t=~/^(?:$re)$/si;
                    $tb = $data;
                }
                if ( $tb && $tb->{'source'} ) {
                    my $sc=exists($tb->{'opts'}{'autoconfirmed'})?0x12:0x42;
                    next if $sc<$protscore;
                    $pd->{'prottype'}=$tb->{'source'};
                    my $line=$tb->{'line'};
                    $pd->{'reason'}=qq(Matching line: <syntaxhighlight lang="text" inline>$line</syntax).qq(highlight>);
                    $protscore=$sc;
                }

                my $expiry=undef;
                my $pg=$t;
                my $prottype = exists( $p->{'missing'} ) ? 'create' : 'edit';
                for my $pp (@{$p->{'protection'}//[]}){
                    next unless $pp->{'type'} eq $prottype;
                    my $sc=0;
                    $sc|=0x1000 if $pp->{'level'} eq 'sysop';
                    $sc|=0x40 if $pp->{'level'} eq 'templateeditor';
                    $sc|=0x20 if $pp->{'level'} eq 'extendedconfirmed';
                    $sc|=0x10 if $pp->{'level'} eq 'autoconfirmed';
                    $sc|=exists($pp->{'source'})?0x2000:0x08;
                    $sc|=0x2000 if exists($pp->{'cascade'});
                    next if $sc<$protscore;
                    if(exists($pp->{'source'})){
                        $pg=$pp->{'source'};
                        $pd->{'prottype'}="Cascade-protected from [[:$pg]]";
                    } else {
                        $pg=$t;
                        $pd->{'prottype'}='Fully protected' if $pp->{'level'} eq 'sysop';
                        $pd->{'prottype'}='Template-protected' if $pp->{'level'} eq 'templateeditor';
                        $pd->{'prottype'}='Extended-confirmed protected' if $pp->{'level'} eq 'extendedconfirmed';
                        $pd->{'prottype'}='Semiprotected' if $pp->{'level'} eq 'autoconfirmed';
                        $pd->{'prottype'}.=' with cascading' if exists($pp->{'cascade'});
                    }
                    $pd->{'prottype'}.=strftime(', expires %F at %T UTC', gmtime ISO2timestamp($pp->{'expiry'})) if $pp->{'expiry'} ne 'infinity';
                    $protscore=$sc;
                    $pd->{'reason'}='';
                }

                $pd->{'color'}=$colors->[0];
                if($protscore & 0x10){
                    $pd->{'color'}=$colors->[1];
                    $pd->{'color'}=$colors->[2] if($protscore & 0x02);
                }
                $pd->{'color'}=$colors->[3] if($protscore & 0x20);
                if($protscore & 0x40){
                    $pd->{'color'}=$colors->[4] if($protscore & 0x40);
                    $pd->{'color'}=$colors->[8] if($protscore & 0x02);
                }
                $pd->{'color'}=$colors->[5] if($protscore & 0x80);
                $pd->{'color'}=$colors->[6] if($protscore & 0x100);
                $pd->{'color'}=$colors->[7] if($protscore & 0x1000);
                $pd->{'color'}=$colors->[9] if($protscore & 0x2000);
                $pd->{'color'}=$colors->[10] if($protscore & 0x4000);
                $pd->{'color'}=$colors->[11] if($protscore & 0x8000);
                $pd->{'color'}='attention' if($pd->{'color'} eq 'normal' && grep($p->{'ns'}==$_, @$attentionns));

                if($pd->{'reason'} eq ''){
                    my $iter=$api->iterator(
                        list    => 'logevents',
                        letype  => 'protect',
                        letitle => $pg,
                    );
                    my $from='';
                    while(my $le=$iter->next){
                        if(!$le->{'_ok_'}){
                            $api->warn("Failed to retrieve protection log for $pg: ".$le->{'error'}."\n");
                            return 60;
                        }
                        if($le->{'action'} eq 'move_prot'){
                            $from="From [[:".$le->{'params'}{'oldtitle_title'}."]]: ";
                            $iter=$api->iterator(
                                list    => 'logevents',
                                letype  => 'protect',
                                letitle => $le->{'params'}{'oldtitle_title'},
                                lestart => $le->{'timestamp'},
                            );
                            next;
                        }
                        next unless exists($protact{$le->{'action'}});
                        $le->{'timestamp'}=~s/T.*//;
                        my $comment=$le->{'comment'};
                        $comment=~s/\s*\[[^]]*\](?: \(expires [^)]*\))?$//;
                        $pd->{'reason'}=$from.$protact{$le->{'action'}}.' by [[User:'.$le->{'user'}.'|'.$le->{'user'}.']] on '.$le->{'timestamp'}.': "'.esccomment($comment).'"';
                        last;
                    }
                }
                $pd->{'logtitle'}=$pg;
                $pd->{'isredir'}=defined( $p->{'redirect'} );

                # now fill in the rest
                my @keys = qw/prottype reason color logtitle/;
                while(my ($k, $pd2) = each %{$pages{$t}}) {
                    @{$pd2}{@keys} = @{$pd}{@keys};
                }
            }
        }

        # The formatting here is a little strange, for backwards compat
        my @pages=map { values %$_ } values %pages;
        my $txt = qq(<noinclude>{{User:AnomieBOT/PERTableHeader}}</noinclude>\n);
        $txt.=qq(<div class="veblenbot-pertable">\n);
        $txt.=qq(<templatestyles src="Template:Edit_fully-protected/color_legend/styles.css"/>\n);
        $txt.=qq({| class="wikitable" style="padding:0em"\n);
        $txt.=qq(|-\n);
        my $ct=scalar @pages;
        my $s=($ct==1?'':'s');
        my $pg='User:AnomieBOT/'.$tag.'Table';
        $txt.=qq(! <section begin="count" />$ct<section end="count" /> [[:Category:$cat|$type edit request$s]] <div style="float:right;white-space:nowrap">[[$pg|v]]&middot;<span class="plainlinks">[//en.wikipedia.org/w/index.php?title=$pg&action=history h]</span></div>\n);
        $txt.=qq(|-\n);
        $txt.=qq(|\n);
        $txt.=qq({| class="wikitable sortable" width=100% style="margin:0em"\n);
        $txt.=qq(! Page\n);
        $txt.=qq(! Tagged since\n);
        $txt.=qq(! Protection level\n);
        $txt.=qq(! class = "unsortable" | Last protection log entry\n);
        for my $p (sort { my $x = $a->{'touched'} <=> $b->{'touched'}; $x = $a->{'title'} cmp $b->{'title'} if $x == 0; return $x; } @pages){
            my $c=$p->{'color'};
            my $t=$p->{'title'};
            my $et=encodetitle($p->{'logtitle'});
            my $tt=$p->{'talk'};
            my $pt=$p->{'prottype'};
            my $r=$p->{'reason'};
            my $tl = $p->{'isredir'} ? "{{-r|1=$t}}" : "[[:$t]]";
            my $ttl = $p->{'reqisredir'} ? "{{-r|1=$tt#$tgt|2=request}}" : "[[$tt#$tgt|request]]";
            $txt.=qq(|- class="protectededit-legend-$c"\n);
            $txt.=qq(| $tl ($ttl)\n);
            $txt.=strftime("| %F %H:%M\n", gmtime $p->{'touched'});
            $txt.=qq(| $pt <span class="plainlinks">([//en.wikipedia.org/w/index.php?title=Special:Log&type=protect&page=$et log])</span>\n);
            $txt.=qq(| $r\n);
        }
        $txt.=qq(|}\n);
        $txt.=qq(|-\n);
        $txt.=qq(|style="text-align:right;font-size:smaller"| Updated as needed. Last updated: <!--TS-->~~~~~<!--/TS-->\n);
        $txt.=qq(|}</div>);

        my $tok=$api->edittoken($pg, EditRedir=>1);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $pg: ".$tok->{'error'}."\n");
            next;
        }
        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'}//'';
        $intxt=~s/^\s+|\s+$//;
        $intxt=~s#<!--TS-->.*<!--/TS-->#<!--TS-->~~~~~<!--/TS-->#g;
        if($intxt ne $txt){
            $api->log("Updating $pg ($ct request$s)");
            my $r=$api->edit($tok, $txt, "Update table ($ct request$s)", 0, 0);
            if($r->{'code'} ne 'success'){
                $api->warn("Write failed on $pg: ".$r->{'error'}."\n");
            }
        }
    }

    my $t=$starttime-time+300;
    $t=0 if $t<0;
    return $t;
}

sub warnBadRegex {
    my ($self, $api, $msg) = @_;

    $msg =~ s/\s+$//;
    my $file = __FILE__;
    $msg =~ s/ at \Q$file\E line \d+\.$//;

    if ( ! defined( $api->{'noedit'} ) ) {
        my $warned = $api->store->{'warnedBadRegex'};
        return if exists( $warned->{$msg} );
        $warned->{$msg} = 1;
        $api->store->{'warnedBadRegex'} = $warned;
    }

    $api->warn( "$msg\n" );
}

sub esccomment {
    my $c=shift;
    $c=~s/{/&#x7b;/g;
    $c=~s/</&lt;/g;
    $c=~s/>/&gt;/g;
    $c=~s/~/&#x7e;/g;
    $c=~s/\|\]\]/]]/g; # Pipe trick
    $c=~s/\[\[\|/[[/g; # Reverse pipe trick
    return $c;
}

sub encodetitle {
    my $t=shift;
    $t=~s/ /_/g;
    $t=uri_escape_utf8($t, '^A-Za-z0-9_\-.:/~');
    return $t;
}

sub replace_ns {
    my ($api,$ns)=@_;

    if($ns=~/^([+-]?[0-9]+)/){
        $ns=int($1);
    } else {
        $ns=~s/_/ /g;
        my %x=$api->namespace_map();
        $ns=$x{$ns} // undef;
        return undef unless defined($ns);
    }
    my %x=$api->namespace_reverse_map();
    return $x{$ns} // undef;
}

1;