#!/usr/bin/perl # Insist that all variables be declared use strict; # Allow for utf8 characters use utf8; # Use the MediaWiki::Bot library use MediaWiki::Bot; # Use the Encode library use Encode; # Declare some global variables my ($user, $pass) = ("",""); my ($task, $edit_summary, $is_minor) = ("","",""); my (@pagelist, @tasklist, @templatelist, @regexplist); my ($namespace_string, $templates) = ("",""); # Set default values $is_minor = 1; # Label edits as minor $user = 'SporkBot'; $edit_summary = 'Orphan per [[WP:TFD|TFD outcome]]'; $task = 0; my @tasklist = ( "Remove a template and trailing whitespace and newline", "Replace a template and whitespace with a single space", "Replace a template with nothing", "Substitute a template and use subst=subst:", "Substitute a template" ); # Do we want to orphan or substitute? print "What task is being performed?\n"; for( my $i=0; $i <= $#tasklist; $i++) { print $i .") ". $tasklist[$i] ."\n"; } print "Press enter for default [".$task."]\n"; print "Task: "; $task = &my_get_response($task); # Get the name of the template to orphan print "Which template(s) would you like to orphan?\n"; print "For a list, delimit with commas\n"; print "Template: "; $templates = &my_get_response("BLANK RESPONSE"); # Replace underscores and excess spaces $templates =~ s/[_ ]+/ /g; $templates =~ s/^ [ ]*//; $templates =~ s/ [ ]*$//; # Strip off leading "Template:" and $templates =~ s/[ ]*[Tt]emplate[ ]*:[ ]*//; # Create the template orphan list @templatelist = split /[ ]*,[ ]*/, $templates; # Set the namespace $namespace_string = &my_get_namespace_list(); # Get username print "Username [".$user."]: "; $user = &my_get_response($user); # Get password print "Password [".$pass."]: "; $pass = &my_get_response($pass); # Create an editor object on the English language WP my $editor=MediaWiki::Bot->new($user); $editor->set_wiki('en.wikipedia.org','w'); # Turn debugging on, to see what the script is doing $editor->{debug} = 1; # Log in if ( $editor->login($user, $pass) ) { print "Failed to login!\n"; exit; } # Attempt to get the link to the TFD in question, using first on list my @plist = $editor->what_links_here_ns("Template:".$templatelist[0],4); # Loop over all pages in the page list foreach my $pagestruct (@plist) { # Only want links, not transclusions if ( $pagestruct->{'type'} =~ /^$/ ) { # Get the actual page name from the page structure my $page = $pagestruct->{'title'}; if ( $page =~ /\/log\//i ) { if ( ($task == 3) or ($task == 4) ) { $edit_summary = "Replace template per ". "[[".$page."|TFD outcome]]; no change in content"; } else { $edit_summary = "Remove template per ". "[[".$page."|TFD outcome]]"; } } } } # Allow user to override edit summary print "The current edit summary is set to:\n$edit_summary\n"; print "Provide a new edit summary, or press enter to use the default\n"; print "Summary: "; $edit_summary = &my_get_response($edit_summary); # Create a list of regular expressions foreach (@templatelist) { my $str = $_; my @rlist = $editor->what_links_here_ns("Template:".$str, 10); # Escape any characters used in regular expressions $str =~ s/([()\-\.\[\]])/\\$1/gi; # First character is case insensitive $str =~ s/^([A-Za-z])(.*)$/[\u$1\l$1]$2/; # Wikipedia allows for underscores and spaces in template references $str =~ s/ /[_ ]+/gi; # Add template to the list push @regexplist, $str; # Add redirects to the list foreach (@rlist) { my $tstruct = $_; # Only want redirects, not links or transclusions if( $tstruct->{'type'} =~ /redirect/ ) { $str = $tstruct->{'title'}; # Remove leading Template: in name $str =~ s/^Template://; # Escape any characters used in regular expressions $str =~ s/([()\-\.\[\]])/\\$1/gi; # First character is case insensitive $str =~ s/^([A-Za-z])(.*)$/[\u$1\l$1]$2/; # Wikipedia allows for underscores and spaces in template references $str =~ s/ /[_ ]+/gi; push @regexplist, $str; } } } # Get the list of all transclusions in the specified namespaces foreach my $template (@templatelist) { if ($namespace_string) { # Loop over each namespace and get the list of transclusions foreach my $ns (split /\&/, $namespace_string) { if ( $ns =~ /ns([0-9]+)[=]1/ ) { my @alist = $editor->what_links_here_ns("Template:".$template, $1); foreach my $pagestruct (@alist) { # Only want transclusions, not links if ( $pagestruct->{'type'} =~ /transclusion/ ) { push @pagelist, $pagestruct->{'title'}; } } } } } else { # Get the list of transclusions in all namespaces my @alist = $editor->what_links_here_ns("Template:".$template, ''); foreach my $pagestruct (@alist) { # Only want transclusions, not links if ( $pagestruct->{'type'} =~ /transclusion/ ) { push @pagelist, $pagestruct->{'title'}; } } } } # Remove duplicates from the page list @pagelist = sort(@pagelist); my $prev = "not ".$pagelist[0]; @pagelist = grep($_ ne $prev && ($prev = $_, 1), @pagelist); # Confirm job paramters before starting if( &my_confirm_job() ) { # Job not confirmed, so exit exit; } # Loop over all pages in the page list foreach my $page (@pagelist) { # Show which pages are being processed print "Processing: ".$page."\n"; # Pull the wikitext for the page my $text=$editor->get_text($page); # Save the old wikitext for diff my $old = $text; # Split the page into chunks to isolate comments my @chunks = split(/(<!--.*?-->)/s, $text); # Loop over sections foreach $text (@chunks) { if ($text =~ /<!--/) { # Skip chunks consisting of only comments } else { foreach my $str (@regexplist) { if ($task == 0) { # Remove the template and trailing spaces/newlines $text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})[ ]*[\r\n]?//g; } elsif ($task == 1) { # Replace the template with whitespace $text =~ s/[ ]*{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})[ ]*/ /g; } elsif ($task == 2) { # Replace the template with nothing $text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})//g; } elsif ($task == 3) { # Substitute the template with subst=subst: $text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})/{{subst:$1|subst=subst:$2/g; } elsif ($task == 4) { # Substitute the template $text =~ s/{{[_ ]*(?:[Tt]emplate:|)[_ ]*($str)[_ ]*(\||}})/{{subst:$1$2/g; } } } } $text = join('', @chunks); &my_simple_diff($old,$text); print "Press enter to submit, s to skip, or ctrl-c to quit\n"; my $response = &my_get_response(""); if ($response =~ /s/i) { print "Skipping...\n"; } else { print "Submitting...\n"; # Submit to Wikipedia. # Warning: This does not warn of edit conflicts. $editor->edit($page, $text, $edit_summary, $is_minor); } # Take a break (frequent edits are forbidden per bot policy) print "Sleep 10\n"; sleep 10; } sub my_get_namespace_list() { my $all_ns = '&ns0=1&ns1=1&ns2=1&ns3=1&ns4=1&ns5=1&ns6=1&ns7=1' .'&ns8=1&ns9=1&ns10=1&ns11=1&ns12=1&ns13=1&ns14=1' .'&ns15=1&ns100=1&ns101=1&ns108=1&ns109=1'; my $no_ns = '&ns0=0&ns1=0&ns2=0&ns3=0&ns4=0&ns5=0&ns6=0&ns7=0' .'&ns8=0&ns9=0&ns10=0&ns11=0&ns12=0&ns13=0&ns14=0' .'&ns15=0&ns100=0&ns101=0&ns108=0&ns109=0'; my ($num,$list) = ("",""); print "Provide a list of namespaces in which the script will operate\n"; print "-------------- ------------------- -------------- -----------------\n"; print " 0 Article 1 Talk 2 User 3 User talk\n"; print " 4 Wikipedia 5 Wikipedia talk 6 File 7 File talk\n"; print " 8 MediaWiki 9 MediaWiki talk 10 Template 11 Template talk\n"; print " 12 Help 13 Help talk 14 Category 15 Category talk\n"; print "100 Portal 101 Portal talk 108 Book 109 Book talk\n"; print "-------------- ------------------- -------------- -----------------\n"; print "* Delimit list with commas for multiple namespaces\n"; print "* Start the list with 'no' for the compliment of the list\n"; print "* Leave blank for everything\n"; print "Namespace: "; my $ns = <STDIN>; # Remove spaces and newlines $ns =~ s/[\t\r\n ]//g; if( $ns =~ /^$/ ) { # Everything $list = ""; } elsif( $ns =~ /^no[t ]*([0-9].*)$/ ) { # Set compliment $ns = $1; $list = $all_ns; foreach $num (split /[ ]*,[ ]*/, $ns) { $list =~ s/ns($num)=1/ns$1=0/g; } } else { # Set $list = $no_ns; foreach $num (split /[ ]*,[ ]*/, $ns) { $list =~ s/ns($num)[=]0/ns$num=1/g; } } $list =~ s/\&ns[0-9]+=0//g; return $list; } sub my_confirm_job() { my $opt = 4; while( ($opt < 5) && ($opt > 0) ) { if ( $opt == 1 ) { print "----------------------------------------------------------------\n"; print " Template list \n"; print "----------------------------------------------------------------\n"; foreach (@templatelist) { print $_."\n"; } } elsif ( $opt == 2 ) { print "----------------------------------------------------------------\n"; print " Regular expression list \n"; print "----------------------------------------------------------------\n"; foreach (@regexplist) { print $_."\n"; } } elsif ( $opt == 3 ) { print "----------------------------------------------------------------\n"; print " Page list \n"; print "----------------------------------------------------------------\n"; foreach (@pagelist) { print $_."\n"; } } elsif ( $opt == 4 ) { print "----------------------------------------------------------------\n"; print " Edit Summary \n"; print "----------------------------------------------------------------\n"; print "\n".$edit_summary."\n\n"; print "----------------------------------------------------------------\n"; print " Job Summary \n"; print "----------------------------------------------------------------\n"; print "\n"; print " Username: ". $user ."\n"; print " Task: ". $tasklist[$task] . "\n"; print " Number of templates: ". scalar @templatelist ."\n"; print " Number of templates + redirects ". scalar @regexplist ."\n"; print " Number of pages to process ". scalar @pagelist ."\n"; print " Namespace string ". $namespace_string ."\n"; print "----------------------------------------------------------------\n"; print "\n"; } else { print " Unknown option\n"; $opt = 5; } print " 0) Start job\n"; print " 1) Show list of templates\n"; print " 2) Show list of regular expressions\n"; print " 3) Show list of pages\n"; print " 4) Redisplay summary\n"; print " 5) Quit\n"; print "Option: "; $opt = <STDIN>; # Remove newlines, underscores, and excess spaces $opt =~ s/[_ \r\n]+//g; if ( $opt == 5 ) { return 1; } } return 0; } # Simple keyboard response sub my_get_response() { my $val = shift; my $in = <STDIN>; # Replace newlines and tabs with spaces $in =~ s/[\t\r\n]/ /g; # Remove leading spaces $in =~ s/^ [ ]*//; # Remove trailing spaces $in =~ s/ [ ]*$//; if( $in =~ /^$/ ) { return $val; } else { return $in; } } # Very basic module for showing diffs sub my_simple_diff() { my $a = shift; my $b = shift; my @alines = split /[\r\n]/, $a; my @blines = split /[\r\n]/, $b; my $i = 0; my $j = 0; while( $i < scalar @alines and $j < scalar @blines ) { if( $alines[$i] ne $blines[$j] ) { if( $alines[$i+1] eq $blines[$j] ) { print "**** Diff ($i,-) ****\n"; print "- ".$alines[$i]."\n"; $i++; } else { print "**** Diff ($i,$j) ****\n"; print "+ ". $blines[$j]."\n"; print "- ". $alines[$i]."\n"; } } $i++; $j++; } } # Custom module for namespace restricted "what links here" list sub MediaWiki::Bot::what_links_here_ns { my $self = shift; my $article = shift; my $ns = shift; my @links; $article = MediaWiki::Bot::uri_escape_utf8( $article ); my $res = $self->_get( 'Special:Whatlinkshere', 'view', "&target=$article&limit=5000&namespace=$ns" ); unless (ref($res) eq 'HTTP::Response' && $res->is_success) { return 1; } my $content = $res->decoded_content; while ( $content =~ m{<li><a href="[^"]+" title="([^"]+)">[^<]+</a>([^<]*)}g ) { my $title = $1; my $type = $2; if ( $type !~ /\(redirect page\)/ && $type !~ /\(transclusion\)/ ) { $type = ""; } if ( $type =~ /\(redirect page\)/ ) { $type = "redirect"; } if ( $type =~ /\(transclusion\)/ ) { $type = "transclusion"; } push @links, { title => $title, type => $type }; } return @links; }