use MediaWiki::Bot; use Encode; $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;#$ # Gently pruned from the standard exclusion code to hardcode $user and $opt sub allowBots { my($text) = @_; my $user = "Joe's Null Bot"; return 0 if $text =~ /{{[nN]obots}}/; return 1 if $text =~ /{{[bB]ots}}/; if($text =~ /{{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*}}/s){ return 1 if $1 eq 'all'; return 0 if $1 eq 'none'; my @bots = split(/\s*,\s*/, $1); return (grep $_ eq $user, @bots)?1:0; } if($text =~ /{{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*}}/s){ return 0 if $1 eq 'all'; return 1 if $1 eq 'none'; my @bots = split(/\s*,\s*/, $1); return (grep $_ eq $user, @bots)?0:1; } return 1; } # Have the bot check in to see if it's run past it's "expiration date", typically of 86400 seconds # (that is, one day). Mostly here to avoid ten copies of the bot running if nothing can run for # ten days. $epoch = time(); $listcount =0; $purgecount = 0; sub check_expirations() { my $secs = time() - $epoch; if ($secs > 86400) { die "Bot expired of old age.\n"; } if ($purgecount > 1500) { die "This category is looking disturbingly large. Quitting.\n"; } } # Within a single MediaWiki call, we ask the API to make up to 5 attempts, 10 s apart, until # the worst-case server lag is better than 5s. my $mw = MediaWiki::Bot->new({ assert => 'bot', host => 'en.wikipedia.org', protocol => 'https', operator => "Joe's Null Bot", #debug => "2", }) or die "new mwbot fail"; $mw->{config}->{api_url} = 'https://en.wikipedia.org/w/api.php'; # Delay/retry parameters $mw->{config}->{max_lag} = 5; # Tell MediaWiki to put us off it there's a 5s+ db lag out there $mw->{config}->{max_lag_delay} = 10; # ..and to wait 10s between retries $mw->{config}->{max_lag_retries} = 4; # ..and to only make 4 retries before dropping back to our code # Our own delay parameters $standardelay = 2; # Wait 15s or more between purge calls.... $longdelay = 50; # ...if the API puts us off several times in a row, take a 15-minute break my $articles = null; # login while (1) { if ($mw->login( { username => "Joe's Null Bot", password => 'REDACTED' } )) { last; } check_expirations(); if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } elsif ($mw->{error}->{details} =~ /Bad Gateway/) { print "bad gateway\n"; sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } # Get list of articles while (1) { check_expirations(); $articles = $mw->list ( { action => 'query', list => 'categorymembers', cmtitle => 'Category:Pending AfC submissions', cmlimit => 'max'} ); if ($articles) { last; } if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } elsif ($mw->{error}->{details} =~ /Bad Gateway/) { print "bad gateway\n"; sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } # scan through the articles... foreach (@{$articles}) { my $thistitle = $_->{title}; $listcount++; print "T: " . encode("iso-8859-1", $thistitle) . "\n"; while (1) { check_expirations(); my $pagehash = $mw->get_page( { title => $thistitle } ); if ($pagehash) { last; } if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } elsif ($mw->{error}->{details} =~ /Bad Gateway/) { print "bad gateway\n"; sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } sleep $standardelay; # There's no hurry! if (allowBots($pagehash->{'*'})) { $purgecount++; while (1) { check_expirations(); # …and purge each one my $apires = $mw->api( { action => 'purge', titles => $thistitle, forcelinkupdate => 1} ); if ($apires) { last; } if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { sleep $longdelay; } elsif ($mw->{error}->{details} =~ /Bad Gateway/) { print "bad gateway\n"; sleep $longdelay; } else { die $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } } else { print "….DENIED\n"; } } my $apires = $mw->api( { action => 'purge', titles => "Category:AfC pending submissions by age"} ); if ($apires) { print "AFCpsba: cat purged\n"; } else { if ($mw->{error}->{details} =~ /Server has reported lag above the configure/) { die "AFCpsba: delayed for replag\n"; } else { die "AFCpsba: " . $mw->{error}->{code} . ': ' . $mw->{error}->{details}; } } print $purgecount . " from a total list of " . $listcount . " articles in " . (time()-$epoch) . " seconds.\n";