User:Polbot/source/Make rds from altnames

use strict;
use Perlwikipedia;
use URI::Escape;

my $soonest_next_op = time;

my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');

print "\nStarting polbot, logging in.\n" ;
my $login_status=$pw->login('bot name','bot password');
die "I can't log in." unless ($login_status eq 0);
my @lines;

print "Reading needingdab file\n";
my %needingdab_names = ();
my $needingdab_list = $pw->get_text('User:Polbot/altnames/needingdab');
@lines = split(/\n/, $needingdab_list);
foreach my $line (@lines) {
	if ($line =~ m/^\* \[\[([^]]*)\]\].*$/) {
		$needingdab_names{$1} = $line;
	}
}

print "Reading in-process file\n";
my %inprocess_names = ();
my $inprocess_list = $pw->get_text('User:Polbot/altnames/inprocess');
@lines = split(/\n/, $inprocess_list);

foreach my $line (@lines) {
	if ($line =~ m/^\* \[\[([^]]*)\]\]\|([^*]*)\*(.*)$/) {
		my $main_name = $1;
		my $altnames = $2;
		my $jobtitle = $3;
				
		# Escape wikichars for the main (article) name
		my $enc_main_name = $main_name;
		$enc_main_name =~ s/ /[ _]/g;
		$enc_main_name =~ s/\(/\\\(/g;
		$enc_main_name =~ s/\)/\\\)/g;
		$enc_main_name =~ s/\./\\\./g;
		
		print "\nReading $main_name ($jobtitle)\n";
		my @altnames = split(/\|/, $altnames);
		
		foreach my $altname (@altnames) {
			my $link_descr = $altname;
			$altname =~ s/\[\[(.*)\]\]/$1/;
			my $final_article_name = $altname;
			
			# Read the altname article. 
			my $wiki = wikiread($altname, $pw);
			
			if ($wiki =~ /\S/) {
				# The article exists.

				# Okay, here are The Rules:
				# 1. If it mentions $main_name, then DELETE. End.
				# 2. If it's a redirect, then follow it, and check #1 again.
				# 3. If there's a link to [[anything (disambiguation)]], then follow it, and check #1 again.
				# 4. We won't be deleting at this point. But if it's a dab, then note that. 
				
				# These rules will fail in the following situations:
				# A. $altname is an article that dabs to [[x]], a dab page that mentions $main_name, but that isn't [[x (disambiguation)]]
				# B. $altname is a dab that links to a rd to $main_name.

				# 1. Does it mention the main article (perhaps even rd-ing there)?
				if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
					$altname = "DELETE";
				} 
				
				if ($altname ne "DELETE") {
					# 2. Is it a rd? If so, follow.
					if ($wiki =~ /\#\s*redirect\s*\[\[(.*)\]\]/i) {
						$final_article_name = $1;
						$link_descr .= " r [[$final_article_name]]";
						$wiki = wikiread($final_article_name, $pw);
						
						# 1. again.
						if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
							$altname = "DELETE";
						} 
					}
				}
				
				if ($altname ne "DELETE") {
					# 3. Does it link to a dab page? If so, follow.
					if (($wiki =~ m/\[\[([^]]* \(disambiguation\))\]\]/) 
					or ($wiki =~ m/{{\s*(?:otherpersons2|otherpeople2)\s*\|\s*(.*?)\s*}}/)) {
						$final_article_name = $1;
						$link_descr .= " f [[$final_article_name]]";
						$wiki = wikiread($final_article_name, $pw);
						
						# 1. again.
						if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
							$altname = "DELETE";
						} 
					} elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*}}/) {
						$final_article_name .= " (disambiguation)";
						$link_descr .= " f [[$final_article_name]]";
						$wiki = wikiread($final_article_name, $pw);
						
						# 1. again.
						if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
							$altname = "DELETE";
						}
					} elsif ($wiki =~ m/{{\s*(?:otherpeople|otherpersons|othernames)\s*\|\s*(.*?)\s*}}/) {
						$final_article_name = "$1 (disambiguation)";
						$link_descr .= " f [[$final_article_name]]";
						$wiki = wikiread($final_article_name, $pw);
						
						# 1. again.
						if (($wiki =~ m/\[\[\s*$enc_main_name\s*(\||\]\])/) or ($wiki =~ m/\|\s*$enc_main_name\s*}}/)) {
							$altname = "DELETE";
						}
					}							
				}
				
				if ($wiki =~ m/({{dab|{{disambig|{{disamb|{{disambiguation|{{hndis|{{namedab|{{bio-dab|{{hndisambig)(}}|\|)/i) {
					$link_descr .= " d";
				}
				
				if ($altname ne "DELETE") {
					$altname = $link_descr;
				}
			} else {	
				# No article exists. Make a rd!
				$|=1;
				print "Waiting " . ($soonest_next_op - time) . " secs... ";
				$|=1;
				while (time < $soonest_next_op) {};				
				$soonest_next_op = time + 9;
				print "rd [[$altname]] to [[$main_name]]\n";
				$pw->edit($altname, "#Redirect [[$main_name]]", "Redirecting to [[$main_name]], auto-generated by [[User:polbot]]");
				$altname = "DELETE";
			}
		}
		# Remove the elements that say "DELETE"
		@altnames = grep(!/^DELETE$/, @altnames);
		
		if (scalar(@altnames) == 0) {
			# No altnames left. Do nothing.
		} else {
			# Put it in inprocess
			$inprocess_names{$main_name} = "* [[$main_name]]|" . join('|', @altnames) . "*$jobtitle";		
		}
	}
}

print "Merging old and new needingdab lists\n";
foreach my $inprocess_key (keys %inprocess_names) {
	$needingdab_names{$inprocess_key} = $inprocess_names{$inprocess_key} unless ($needingdab_names{$inprocess_key});
}

print "Writing needingdab list\n";
my $wiki_code = "";
foreach my $dab_key (sort keys %needingdab_names) {
	$wiki_code .= $needingdab_names{$dab_key} . "\n";
}
$pw->edit('User:Polbot/altnames/needingdab', $wiki_code, "Auto-updating based on input at inprocess list");

print "Finis!";

sub wikiread {
	my $article = shift;
	my $connection = shift;
	my $i = 0;
	my $wiki = '';
	
	$wiki = $connection->get_text($article);
	
	while ($wiki eq "0") { 
		$i++;
		if ($i > 5) {
			return '';
		}
		
		sleep $i;
		print "   retry. . .\n";
		$wiki = $connection->get_text($article);
	}

	return $wiki;
}