User:AnomieBOT/source/tasks/ArticleCreationGrapher.pm

package tasks::ArticleCreationGrapher;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     ArticleCreationGrapher
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 41
Status:   Inactive 2022-01-25
Created:  2010-08-25

Creates a graph showing article creation for a project.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use IPC::Open2;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $epoch=2; # Increment this to force a new graph, e.g. if an improved version of gnuplot is installed

my @gnuplot=("/usr/bin/gnuplot");
my $nonet=0;

my @pages=(
    {
        for => 'WikiProject Medicine - Dermatology task force',
        cats => [ 'Category:Dermatology task force articles' ],
        page => 'Wikipedia:WikiProject Medicine/Dermatology task force/Articles created',
        page_cats => [],
        writepage => 0,
        graph => 'File:File-WikiProject Medicine - Dermatology task force - Articles created.svg',
        size => [1000,600],
        thumbsize => 800,
        graph_cats => [],
        x2ticsettings => 'rotate by 0 scale 0.4',
        x2tics => '',
    },
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2010-08-29<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 41]]

=for info
Task discontinued 2022-01-25. The graph has trailed off, the page it's on hasn't been updated in years and gets minimal views, and we now have [[mw:Extension:Graph]] that can generate graphs from data instead of having to have an image uploaded monthly. If someone still wants such a graph, it would be better to have a bot create a data table somewhere to be graphed via that extension.

=cut

sub approved {
    return -6;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('ArticleCreationGrapher', 0, 10, qw/d::Timestamp/);

    foreach my $page (@pages){
        return 0 if $api->halting;

        # Update
        if ( exists( $api->store->{$page->{'page'}} ) ) {
            my $pgdata=$api->store->{$page->{'page'}};
            $api->store->{'epoch:'.$page->{'page'}} = $pgdata->{'epoch'} // 0;
            $api->store->{'nextrun:'.$page->{'page'}} = $pgdata->{'nextrun'};
            $api->store->{'pg:'.$page->{'page'}} = $pgdata->{'pages'};
            delete( $api->store->{$page->{'page'}} );
        }

        my $nextrun = $api->store->{'nextrun:'.$page->{'page'}} // 0;
        my $pages = $api->store->{'pg:'.$page->{'page'}} // {};
        my @pp=();
        if($nonet){
            foreach my $v (values %$pages){
                next unless ref($v) eq 'HASH';
                push @pp, { redir=>$v->{'r'}, ts=>$v->{'t'}, title=>$v->{'tt'} };
            }
        } else {
            next if($nextrun > time() && ($api->store->{'epoch:'.$page->{'page'}}//0) >= $epoch);
            $api->log( "Loading data to graph " . $page->{'page'} );
            my $i=$api->iterator(
                generator    => 'categorymembers',
                gcmtitle     => $page->{'cats'},
                gcmlimit     => 'max',
                gcmnamespace => '0|1',
                gcmtype      => 'page',
                prop         => 'info',
                inprop       => 'subjectid',
            );
            my @pageids=();
            while($_=$i->next){
                if(!$_->{'_ok_'}){
                    $api->warn("Failed to retrieve page list for $page->{page}: ".$_->{'error'}."\n");
                    return 60;
                }
                push @pageids, $_->{'subjectid'} if exists($_->{'subjectid'});
            }
            my @px=();
            my $ct=0;
            foreach my $p (@pageids) {
                return 0 if $api->halting;
                if(exists($pages->{$p})){
                    push @px, $p;
                    next;
                }
                if($ct++>=100){
                    # periodic save
                    $api->store->{'pg:'.$page->{'page'}}=$pages;
                    $ct=0;
                }
                my $res=$api->query(
                    pageids => $p,
                    prop    => 'info|revisions',
                    rvprop  => 'timestamp',
                    rvdir   => 'newer',
                    rvlimit => 1
                );
                if($res->{'code'} eq 'shutoff'){
                    $api->store->{'pg:'.$page->{'page'}}=$pages;
                    $api->warn("Task disabled: ".$res->{'content'}."\n");
                    return 300;
                }
                if($res->{'code'} ne 'success'){
                    $api->store->{'pg:'.$page->{'page'}}=$pages;
                    $api->warn("Failed to get info for $p: ".$res->{'error'}."\n");
                    return 60;
                }
                my $pg=$res->{'query'}{'pages'}{$p} // undef;
                next unless defined($pg);
                my $is_redir=exists($pg->{'redirect'});
                my $ts=$pg->{'revisions'}[0]{'timestamp'} // '';
                next if $ts eq '';
                $ts=$api->ISO2timestamp($ts);
                $pages->{$p}={
                    t=>$ts,
                    r=>$is_redir,
                    tt=>$pg->{'title'},
                };
                push @pp, {
                    title=>$pg->{'title'},
                    redir=>$is_redir,
                    ts=>$ts,
                };
            }
            $api->store->{'pg:'.$page->{'page'}}=$pages;
            if(@px){
                my %q=(
                    pageids => [],
                    prop    => 'info',
                );
                while(@px){
                    push @{$q{'pageids'}}, join('|', splice(@px, 0, 500));
                }
                $i=$api->iterator(%q);
                while($_=$i->next){
                    if(!$_->{'_ok_'}){
                        $api->warn("Failed to retrieve page list for $page->{page}: ".$_->{'error'}."\n");
                        return 60;
                    }
                    my $p=$_->{'pageid'};
                    $pages->{$p}{'r'}=exists($_->{'redirect'});
                    $pages->{$p}{'tt'}=$_->{'title'};
                    push @pp, {
                        title=>$_->{'title'},
                        redir=>$pages->{$p}{'r'},
                        ts=>$pages->{$p}{'t'},
                    };
                }
                $api->store->{'pg:'.$page->{'page'}}=$pages;
            }
        }
        $api->log( "Graphing data for " . $page->{'page'} );
        @pp=sort {
            my $x=($a->{'ts'} <=> $b->{'ts'});
            $x=($a->{'title'} cmp $b->{'title'}) if !$x;
            $x;
        } @pp;
        my $pid = open2(*R, *W, @gnuplot);
        my ($w,$h)=@{$page->{'size'}};
        my $range='["'.g_tt($pp[0]{'ts'}).'":"'.g_tt(time).'"]';
        print W <<EOH ;
            set terminal svg enhanced size $w $h font "DejaVu Sans"

            set key horizontal bmargin center
            set autoscale
            set ytics nomirror out
            set yrange [0:*]

            set xdata time
            set x2data time
            set timefmt "%Y-%m"
            set xtics nomirror out format "%b %Y"

            set xrange $range
            set x2range $range
EOH
        print W <<EOH if $page->{x2tics} ne '';
            set x2tics nomirror out $page->{x2ticsettings}
            set x2tics ($page->{x2tics})
            set grid x2tics
EOH

        my @t=gmtime $pp[0]{'ts'};
        my $end=strftime('%Y-%m', gmtime);
        my $x;
        print W "set xtics (";
        my $f=1;
        do {
            $x=strftime('%Y-%m', 0,0,0,1,$t[4]++,$t[5]);
            ($t[4], $t[5]) = (1, $t[5]+1) if $t[4] == 13;
            print W "," unless $f;
            if($x=~/^(\d+)-01$/){
                print W qq("$1" "$x" 0);
            } else {
                print W qq("" "$x" 1);
            }
            $f=0;
        } while($x ne $end);
        print W ")\n";
        print W "plot '-' using 1:2 title \"Articles\" w filledcurves x1 fs transparent solid 0.1 lc rgb \"#0000ff\", ";
        print W "'-' using 1:2 title \"Redirects\" w filledcurves x1 fs transparent solid 0.1 lc rgb \"#ff0000\"\n";
        my $xx=g_init($pp[0]{'ts'});
        foreach my $p (@pp){
            $xx=g_update($xx, $p->{'ts'}, 0, \*W);
            g_add($xx) unless $p->{'redir'};
        }
        g_update($xx, time, 1, \*W);
        print W "e\n";
        $xx=g_init($pp[0]{'ts'});
        foreach my $p (@pp){
            $xx=g_update($xx, $p->{'ts'}, 0, \*W);
            g_add($xx) if $p->{'redir'};
        }
        g_update($xx, time, 1, \*W);
        print W "e\n";
        close W;
        waitpid($pid, 0);
        my $svg;
        {
            local $/=undef;
            $svg=<R>;
        }
        close R;

        if($page->{'writepage'}){
            $api->log( "Updating " . $page->{'page'} );
            my $tok=$api->edittoken($page->{'page'});
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                return 300;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $page->{page}: ".$tok->{'error'}."\n");
                next;
            }
            my $txt="This is a compilation of article creation dates for $page->{for}\n\nThis page is generated by {{User|".$api->user."}} once a month. Last generated: ~~~~~\n\n[[$page->{graph}|".$page->{'thumbsize'}."px|frameless|center|alt=Line graph of article and redirect creation dates by month]]\n\n{| class=\"wikitable\"\n! Article Created !! Article Title !! Type\n";
            foreach my $p (@pp){
                $txt.="|-\n| ".strftime('%F, %T', gmtime $p->{'ts'})." || [[:".$p->{'title'}."]] || ".($p->{'redir'}?'Redirect':'Article')."\n";
            }
            $txt.="|}\n\n";
            foreach my $c (@{$page->{'page_cats'}}){
                $txt.="[[Category:$c]]\n";
            }
            $res=$api->edit($tok, $txt, "Update page statistics", 0, 0);
            if($res->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$res->{'content'}."\n");
                return 300;
            }
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to update $page->{page}: ".$res->{'error'}."\n");
                next;
            }
        }

        $api->log( "Uploading image file " . $page->{'graph'} . ' for ' . $page->{'page'} );
        my $tok=$api->edittoken($page->{'graph'});
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $page->{graph}: ".$tok->{'error'}."\n");
            next;
        }
        my $comment="Updated graph";
        my $txt="{{imbox|type=style|image=[[File:Ambox warning yellow.svg|50px]]|imageright=[[File:Crystal Clear action run.svg|50px]]|text=This image is automatically updated by a bot, {{User|".$api->user."}}. Any changes will be overwritten automatically.<center>'''Do not move this file to Wikimedia Commons.'''<br /><small>If for some reason you need to stop the bot, place {{tl|nobots}} on this page or post a message [[User:".$api->user."/shutoff/ArticleCreationGrapher|here]].</small></center>}}\n{{Information\n|description=Bot-generated graph of [[$page->{page}|$page->{for}]] article creation\n|source=Own work, created using [[Gnuplot]]\n|date=~~~~~\n|author={{User|".$api->operator."}} as the author of {{User|".$api->user."}}\n|permission={{PD-self|date=August 2010}}\n}}\n{{esoteric file}}\n\n[[Category:Wikipedia charts]]\n";
        foreach my $c (@{$page->{'graph_cats'}}){
            $txt.="[[Category:$c]]\n";
        }
        $comment=$txt if exists($tok->{'missing'});
        my $res=$api->upload($tok, Data=>$svg, Comment=>$comment, IgnoreWarnings=>1);
        if($res->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$res->{'content'}."\n");
            return 300;
        }
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to upload new version for $page->{graph}: ".$res->{'error'}."\n");
            next;
        }
        @t=gmtime;
        $api->store->{'pg:'.$page->{'page'}}=$pages;
        $api->store->{'epoch:'.$page->{'page'}}=$epoch;
        $api->store->{'nextrun:'.$page->{'page'}}=timegm(0,0,0,1,$t[4]+1,$t[5]);

        if(!exists($tok->{'missing'})){
            $api->log( "Updating image description for " . $page->{'graph'} );
            $res=$api->edit($tok, $txt, "Update page text", 0, 0);
            if($res->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$res->{'content'}."\n");
                return 300;
            }
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to update page text for $page->{graph}: ".$res->{'error'}."\n");
            }
        }
    }

    # No more pages to check, try again later
    my $t=undef;
    foreach my $page (@pages){
        my $nextrun=$api->store->{'nextrun:'.$page->{'page'}};
        $t=$nextrun if(!defined($t) || $t>$nextrun);
    }
    return $t-time();
}

sub g_tt {
    my @t=gmtime shift;
    return strftime('%Y-%m', 0,0,0,1,$t[4],$t[5]);
}

sub g_init {
    my $ts=shift;
    return [0,0,g_tt($ts),undef];
}

sub g_add {
    $_[0][0]++;
}

sub g_update {
    my ($a,$aa,$dt,$pv)=@{shift()};
    my $ts=shift;
    my $force=shift;
    my $fh=shift;

    my $xx=g_tt($ts);
    if($dt ne $xx && $a!=$aa){
        my @t=split(/-/,$dt);
        my $dt2=strftime('%Y-%m', 0,0,0,1,$t[1]-2,$t[0]-1900);
        print $fh "$dt2 $aa\n" if(defined($pv) && $pv ne $dt2);
        print $fh "$dt $a\n";
        $pv=$dt;
        $dt=$xx;
        $a=0;
        $aa=$a;
    }
    print $fh "$xx $a\n" if($force && $xx ne $pv);
    return [$a,$aa,$dt,$pv];
}

1;