User:WatchlistBot/source.java

WatchlistBot.java edit

class WatchlistBot {
    public static void main (String[] args) throws Exception {
    	WikiSessionManager sessionMgr = new WikiSessionManager();
    	sessionMgr.userLogin(Private.username, Private.password);

    	// numismatics
    	String[] includePages = {"Template:Currencies of Africa",
    	                         "Template:Currencies of Asia",
    	                         "Template:Currencies of Europe",
    	                         "Template:Currencies of Oceania",
    	                         "Template:Currencies of the Americas"};
    	Project project = new Project(sessionMgr, "Numismatics", "Numismaticnotice",
    								  "Articles", includePages);
    	project.updateWatchlist(true);
    	    
    	// exonumia
    	includePages = new String[0];
    	project = new Project(sessionMgr, "Numismatics", "Exonumianotice", "Exonumia articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Hawaii
    	project = new Project(sessionMgr, "Hawaii", "WPHawaii", "Hawaii recent changes", includePages);
    	project.updateWatchlist(true);
    	
    	// Texas
    	project = new Project(sessionMgr, "Texas", "WikiProject Texas", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Ice Hockey
    	project = new Project(sessionMgr, "Ice Hockey", "Ice hockey", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Louisville
    	project = new Project(sessionMgr, "Louisville", "WikiProject Louisville", "Watchall", includePages);
    	project.updateWatchlist(true);
    	
    	// Kentucky
    	project = new Project(sessionMgr, "Kentucky", "WikiProject Kentucky", "Watchall", includePages);
    	project.updateWatchlist(true);
    	
    	// Texas State Highways
    	project = new Project(sessionMgr, "Texas State Highways", "Texas State Highway WikiProject",
    						  "Watchlist", includePages);
    	project.updateWatchlist(true);
    	
    	// Dallas
    	project = new Project(sessionMgr, "Dallas", "WikiProject Dallas", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Comics
    	project = new Project(sessionMgr, "Comics", "comicsproj", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Pittsburgh
    	project = new Project(sessionMgr, "Pittsburgh", "PittsburghWikiProject", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Baseball
    	project = new Project(sessionMgr, "Baseball", "Baseball-WikiProject", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Bell Systems
    	project = new Project(sessionMgr, "Bell Systems", "WikiProject Bell System", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// LGBT studies
    	project = new Project(sessionMgr, "LGBT studies", "LGBTProject", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// San Francisco Bay Area
    	project = new Project(sessionMgr, "San Francisco Bay Area", "SFBAProject", "Watchlist", includePages);
    	project.updateWatchlist(true);
    	
    	// Africa
    	project = new Project(sessionMgr, "Africa", "AfricaProject", "Watchlist", includePages);
    	project.updateWatchlist(true);
    	
    	// Electronics
    	project = new Project(sessionMgr, "Electronics", "Electron", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Tennessee
    	project = new Project(sessionMgr, "Tennessee", "WikiProject Tennessee", "Articles", includePages);
    	project.updateWatchlist(true);
    	
    	// Hong Kong
    	project = new Project(sessionMgr, "Hong Kong", "WikiProject Hong Kong", "Articles", includePages);
    	project.updateWatchlist(true);
   	
    	// Films
    	project = new Project(sessionMgr, "Films", "Film", "Articles", includePages);
    	project.updateWatchlist(true);

    	// Automobiles
    	project = new Project(sessionMgr, "Automobiles", "AutomobileWatch", "Articles", includePages);
    	project.updateWatchlist(false);
    	
    	// Cricket
    	project = new Project(sessionMgr, "Cricket", "CricketWatch", "Articles", includePages);
    	project.updateWatchlist(false);
    	
    	System.out.println("finished");
        sessionMgr.userLogout();
   	}
}

WikiSessionManager.java edit

import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.Arrays;
import java.net.URL;
import java.net.URLEncoder;
import java.net.URLConnection;
 
    /**
     * WikiSessionManager is a utility class that logs into the English
     * Wikipedia and facilitates making HTTP requests with cookies.
     *
     * This program is free software; you can redistribute it and/or modify
     * it under the terms of the GNU General Public License as published by
     * the Free Software Foundation; either version 2 of the License, or
     * (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU General Public License
     * along with this program; if not, write to the Free Software
     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
     * 
     * @author Gracenotes
     * @version 0.1
     **/
 
public class WikiSessionManager
{
    private String cookie, sessionData, username;
    private boolean loggedIn;
 
    public WikiSessionManager()
    {
        this.loggedIn = false;
        this.sessionData = "";
        this.cookie = "";
    }
 
    public void userLogin(String username, char[] password) throws IOException
    {
        username = username.trim();
        if (username.length() == 0 || password.length == 0) throw new IllegalArgumentException("Blank parameter");
 
        URL url = new URL("http://en.wikipedia.org/w/api.php");
        URLConnection connection = url.openConnection();
 
        connection.setDoOutput(true);
        connection.setUseCaches(false);
        connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
        connection.connect();
        OutputStreamWriter output = new OutputStreamWriter(connection.getOutputStream(), "UTF-8");
 
        output.write("action=login" +
                     "&lgname=" + URLEncoder.encode(username, "UTF-8") +
                     "&lgpassword=" + URLEncoder.encode(new String(password).trim(), "UTF-8"));
        output.flush();
        output.close();
 
        Arrays.fill(password, ' ');
 
        String headerName;
        StringBuffer receivedCookie = new StringBuffer();
        int i = 0;
        while ((headerName = connection.getHeaderFieldKey(++i)) != null)
        {
            headerName = connection.getHeaderFieldKey(i);
            if (headerName != null && headerName.equalsIgnoreCase("Set-Cookie"))
            {
                receivedCookie.append("; " + connection.getHeaderField(i).split(";")[0]);
            }
        }
        receivedCookie.delete(0, 2);
        this.cookie = receivedCookie.toString();
        this.loggedIn = this.cookie.indexOf("Token=") != -1;
        this.username = this.loggedIn ? username : null;
        
        // IB edit (get the session data)
        url = new URL("http://en.wikipedia.org/w/index.php?title=Wikipedia:Sandbox&action=edit");
        connection = url.openConnection();
        addCookies(connection);
        connection.connect();
        if (!findSessionData(connection)) {
        	throw new IOException("Could not load session data");
        }
        // end IB edit
     }
 
    public void userLogout() throws IOException
    {
        if (!this.loggedIn)
            return;
        URL url = new URL("http://en.wikipedia.org/w/index.php?title=Special:Userlogout");
        URLConnection connection = url.openConnection();
        this.addCookies(connection);
        connection.connect();
 
        this.loggedIn = false;
        this.cookie = "";
        this.sessionData = "";
    }
 
    /**
     * Indicates whether a user is logged in or not
     * 
     * @return A boolean showing whether a user is logged in or not
     */
    public boolean isLoggedIn()
    {
        return this.loggedIn;
    }
 
    public void addCookies(URLConnection connection)
    {
        if (!this.loggedIn)
            return;
        connection.setRequestProperty("Cookie", this.cookie +
                                      (this.sessionData != null ? "; " + this.sessionData : ""));
        connection.setRequestProperty("User-Agent", this.username);
    }
 
    public boolean findSessionData(URLConnection connection)
    {
        sessionData = "";
        String headerName;
        int i = 0;
        while ((headerName = connection.getHeaderFieldKey(++i)) != null)
        {
            if (headerName.equals("Set-Cookie") && connection.getHeaderField(i).indexOf("_session") != -1)
                this.sessionData = connection.getHeaderField(i).split(";")[0];
        }
 
        return this.sessionData.length() != 0;
    }
}

Project.java edit

import java.io.*;
import java.net.*;

public class Project {
	/** are we debugging (sends output to file instead of wikipedia) **/
	final static boolean DBG = false;
	
	/** the watchlist **/
	private Watchlist watchlist;
	
	/** the name of the project (without Wikipedia:WikiProject) **/
	private String projectName;
	
	/** the session manager (controls logging in, communication w/ wikipedia) **/
	private WikiSessionManager sessionMgr;
	
	Project (WikiSessionManager sessionMgr, String projectName, String template,
			 String articlePage, String[] includePages) {
		this.sessionMgr = sessionMgr;
		this.projectName = projectName;
		this.watchlist = new Watchlist(projectName, articlePage, template, 
									   sessionMgr, includePages, this);
	}
	
	/** update the watchlist
	 * @param useTaggedPages are we inluding tagged pages (true), or all pages in
	 *        tagged categories (false)
	 **/
	void updateWatchlist (boolean useTaggedPages) throws UnsupportedEncodingException,
			IOException, MalformedURLException {
		watchlist.update(useTaggedPages);
		watchlist.write();
	}
	
	/** write a page in the project
	 * @param subPageName the name of the subpage
	 * @param text the text to write
	 */
	void writePage (String subPageName, String text) {
		try {
			if (DBG) {
				subPageName = subPageName.replaceAll("/", "_");
				FileWriter file = new FileWriter(subPageName + ".txt");
				file.write(text);
				file.close();
			} else {
				String pageName = "Wikipedia:WikiProject " + projectName + "/" + subPageName;
				String comment = "full update by [[User:WatchlistBot|WatchlistBot]]";
				Page page = new Page(sessionMgr, pageName);
				page.put(text, comment, false);
			}
		} catch (Exception e) {
			System.out.println(e);
		}
	}
}

Watchlist.java edit

import java.util.*;
import java.io.*;
import java.net.*;

public class Watchlist {
	/** the project **/
	private Project project;
	
	/** the template name (without namespace) **/
	private String template;
	
	/** the session manager **/
	private WikiSessionManager sessionMgr;
	
	/** does this watchlist use tagged pages (as opposed to pages in a category list **/
	private boolean taggedPages = true;

	/** pages which should be included in the project even though they're not tagged
	 * (maybe because they share a talk page)
	 **/
	private String[] includePages;
	
	/** the name of the project (without Wikipedia:WikiProject) **/
	private String projectName;
	
	/** the name of the page where the article list goes **/
	private String articlePage;
	
	/** the article pages **/
	private TreeSet<String> articles;
	/** the article talk pages **/
	private TreeSet<String> articlesTalk;
	/** the wikipedia pages **/
	private TreeSet<String> wikis;
	/** the wikipedia talk pages **/
	private TreeSet<String> wikisTalk;
	/** the template pages **/
	private TreeSet<String> templates;
	/** the template talk pages **/
	private TreeSet<String> templatesTalk;
	/** the category pages **/
	private TreeSet<String> categories;
	/** the category talk pages **/
	private TreeSet<String> categoriesTalk;
	/** the image pages **/
	private TreeSet<String> images;
	/** the image talk pages **/
	private TreeSet<String> imagesTalk;
	/** the portal pages **/
	private TreeSet<String> portals;
	/** the portal talk pages **/
	private TreeSet<String> portalsTalk;
	
	/** the maximum number of articles to put on one page **/
	private static final int MAX_ARTICLES = 9000;
	
	/** this one is for the top of all bot-created pages **/
	private static final String BOT_WARN =
				"<div class=\"notice\" " +
	            "style=\"background:#ffe1a7; border:1px solid #AAA; " +
	            "padding:0.2em; margin:0.5em auto;\"> " +
	            "[[Image:Stop_hand.svg|left|20px]] This page is automatically " +
	            "recreated from time to time. Accordingly, any changes you " +
	            "make here will be overwitten. See below for details.</div>\n\n";
	/** this text is used to start the first page, if we're splitting (use SPLIT_INTRO for main page,
	 * SPLIT_INTRO_NEXT for next pages)
	 **/
	private static final String SPLIT_INTRO1 =
				"There are too many articles (more than " + MAX_ARTICLES + ") in this project " +
	            "to list them all on one page. This page and the ones linked ";
	private static final String SPLIT_INTRO2 = "contain ";
	private static final String SPLIT_INTRO = SPLIT_INTRO1 + "below " + SPLIT_INTRO2;
	private static final String SPLIT_INTRO_NEXT = SPLIT_INTRO1 + "from the main page " + SPLIT_INTRO2;
	/** this text starts the first page, if we're not splitting **/
	private static final String ONE_PAGE_INTRO = "This page contains ";
	/** this text is the rest of the intro, in either case (use END_INTRO1 + tagText + END_INTRO2
	 * + template + END_INTRO3 + pageName + END_INTRO4 + pageName + END_INTRO5)
	 **/
	private static final String END_INTRO1 =
				"links to all articles, categories, images, portal pages " +
	            "templates, and project pages ";
	private static final String END_INTRO2 = "with {{tl|";
	private static final String END_INTRO3 = "}} on their talk page. It was " +
	            "generated by [[User:WatchlistBot|" +
	            "WatchlistBot]]. Its purpose is to be able to track " +
	            "the project history using ''[[Special:Recentchangeslinked/" +
	            "Wikipedia:WikiProject ";
	private static final String END_INTRO4 =
				"|related changes]]'' or ''[http://tools.wikimedia.de/~interiot/" +
	            "cgi-bin/offtoolserver/RC_firstonly?url=http%3A%2F%2Fen.wikipedia.org" +
	            "%2Fw%2Findex.php%3Ftitle%3DSpecial%3ARecentchangeslinked%26target" +
	            "%3DWikipedia:WikiProject_";
	private static final String END_INTRO5 =
				"%26hideminor%3D0%26days%3D7%26limit%3D500 related watchlist]'' which " +
	            "only shows the last change for each article.\n\n";
	
	/** the text to be put on the main page **/
	private StringBuilder mainText;
	/** the text to be put on a sub page **/
	private StringBuilder subText;
	/** the number of articles on the main page **/
	private int count = 0;
	/** are we still putting articles on the main page **/
	private boolean onMainPage = true;
	/** special text to use if we're not using tagged pages **/
	private String tagText = "";
	/** the page number for the current subpage **/
	private int pageNo = 1;
	/** the output page name, for putting in messages **/
	private String outputName;

	Watchlist (String projectName, String articlePage, String template,
			   WikiSessionManager sessionMgr, String[] includePages,
			   Project project) {
		this.projectName = projectName;
		this.articlePage = articlePage;
		this.template = template;
		this.sessionMgr = sessionMgr;
		this.includePages = includePages;
		this.project = project;
	}
	
	/** update the watchlist
	 * @param useTaggedPages are we inluding tagged pages (true), or all pages in
	 *        tagged categories (false)
	 **/
	void update (boolean useTaggedPages) throws UnsupportedEncodingException,
			IOException, MalformedURLException {
		// reinitialize lists
		initLists();
		// first find the pages which are linked
		Page page = new Page(sessionMgr, "Template:" + template);
		TreeSet<String> refs = page.getTransclusions();
		if (!useTaggedPages) {
			// the list of pages in tagged categories
			TreeSet<String> pages = new TreeSet<String>();
			for (String ref : refs) {
				if (ref.startsWith("Category talk:")) {
					System.out.println("getting pages in " + ref + " pages: " + pages.size());
					Page cat = new Page(sessionMgr, ref.replace(" talk", ""));
					pages.addAll(cat.getMembers());
				}
			}
			// move the pages list into refs (so 
			refs = pages;
		}
		for (String ref : refs) {
			processPageName(ref);
		}
	}

	void initLists () {
		articles = new TreeSet<String>();
		articlesTalk = new TreeSet<String>();
		wikis = new TreeSet<String>();
		wikisTalk = new TreeSet<String>();
		templates = new TreeSet<String>();
		templatesTalk = new TreeSet<String>();
		categories = new TreeSet<String>();
		categoriesTalk = new TreeSet<String>();
		images = new TreeSet<String>();
		imagesTalk = new TreeSet<String>();
		portals = new TreeSet<String>();
		portalsTalk = new TreeSet<String>();
		for (String page : includePages) {
			processPageName(page);
		}
	}

	
	/** process a page name -- that is, add the article and its talk
	 *  page to the appropriate lists
	 **/
	private void processPageName (String pageName) {
		String[] result = pageName.split(":");
		if (result.length == 1) {
			articles.add(result[0]);
			articlesTalk.add("Talk:" + result[0]);
		} else if (result[0].equals("Talk")) {
			articles.add(result[1]);
			articlesTalk.add("Talk:" + result[1]);
		} else if (result[0].startsWith("Wikipedia")) {
			wikis.add("Wikipedia:" + result[1]);
			wikisTalk.add("Wikipedia talk:" + result[1]);
		} else if (result[0].startsWith("Template")) {
			templates.add("Template:" + result[1]);
			templatesTalk.add("Template talk:" + result[1]);
		} else if (result[0].startsWith("Category")) {
			categories.add(":Category:" + result[1]);
			categoriesTalk.add("Category talk:" + result[1]);
		} else if (result[0].startsWith("Image")) {
			images.add(":Image:" + result[1]);
			imagesTalk.add("Image talk:" + result[1]);
		} else if (result[0].startsWith("Portal")) {
			portals.add("Portal:" + result[1]);
			portalsTalk.add("Portal talk:" + result[1]);
		}
	}
	
	/** prepare the output and write to wikipedia **/
	void write () {
		// if we're not using tagged pages, we need to update the output a bit
		if (!taggedPages) {
        	tagText = "in categories ";
        }
		// the page name of the output
        outputName = projectName.replace(" ", "_") + "/" +
        	articlePage.replace(" ", "_");

		mainText = new StringBuilder(BOT_WARN);
		
		// count the number of articles
		int numArticles = articles.size() + wikis.size() + templates.size() +
			categories.size() + images.size() + portals.size();
		
		// figure out if we can fit everything on one page (double the
		// number of articles to count talk pages)
		boolean splitting = (numArticles*2 > MAX_ARTICLES);
		if (splitting) {
			mainText.append(SPLIT_INTRO);
		} else {
			mainText.append(ONE_PAGE_INTRO);
		}
		mainText.append(END_INTRO1 + tagText + END_INTRO2 + template + END_INTRO3 +
			outputName + END_INTRO4 + outputName + END_INTRO5);
		
		mainText.append("==Regular content (count: " + numArticles + ")==\n");
		
		mainText.append("===Articles (count: " + articles.size() + ")===\n");
		char prevChar = 'Z';
		char firstChar = prevChar; // initialize to something late in the alphabet
		
		// the text for this subpage (if we're not splitting, this will be put
		// onto the main page)
		subText = new StringBuilder();
		
		for (String s : articles) {
			if (s.charAt(0) != prevChar) {
				subText.append("====" + s.charAt(0) + "====\n");
				prevChar = s.charAt(0);
				// if this is the first article
				if (count == 0) {
					firstChar = prevChar;
				}
			}
			// put the article name
			subText.append("*[[" + s + "]]\n");
			count++;
			// if we've put all teh articles we can on this page
			if (count > MAX_ARTICLES) {
				count = 0;
				if (onMainPage) {
					onMainPage = false;
					mainText.append(subText);
				} else {
					mainText.append("====[[/Page" + pageNo + "|" +
							firstChar + "-" + prevChar + "]]====\n");
					int index = subText.indexOf("<range>");
					subText.replace(index, index+7, firstChar + "-" + prevChar);
					project.writePage(articlePage + "/Page" + pageNo, subText.toString());
					pageNo++;
				}
				firstChar = prevChar;
				subText = new StringBuilder("===Articles <range>===\n" +
						  "====" + firstChar + "====\n");
			}
		}
		// if we have too many articles, and we've already started the second
		// (or more) page
		if (splitting && !onMainPage) {
			mainText.append("====[[/Page" + pageNo + "|" +
					firstChar + "-" + prevChar + "]]====\n");
			int index = subText.indexOf("<range>");
			subText.replace(index, index+7, firstChar + "-" + prevChar);
			project.writePage(articlePage + "/Page" + pageNo, subText.toString());
			pageNo++;
		} else { // we only have one page or this is the first batch
			mainText.append(subText);
		}
		
		prepareArticleList("Wikipedia", wikis, true);
		prepareArticleList("Templates", templates, true);
		prepareArticleList("Portals", portals, true);
		prepareArticleList("Categories", categories, true);
		prepareArticleList("Images", images, true);
		
		mainText.append("==Talk pages==\n");
		
		mainText.append("===Articles===\n");
		prevChar = firstChar = 'Z';
		if (splitting && subText.length() != 0) {
			project.writePage(articlePage + "/Page" + pageNo, subText.toString());
			pageNo++;
			subText = new StringBuilder(BOT_WARN + SPLIT_INTRO_NEXT +
					 					END_INTRO1 + tagText + END_INTRO2 +
					 					template + END_INTRO3 + outputName +
					 					END_INTRO4 + outputName + END_INTRO5);
			subText.append("===Articles <range>==\n");
		} else {
			subText = new StringBuilder();
		}
		count = 0;
		char endChar = 'Z';
		for (String s : articlesTalk) {
			if (count == 0) {
				firstChar = s.charAt(5);
			}
			subText.append("*[[" + s + "]]\n");
			count++;
			if (count > MAX_ARTICLES) {
				count = 0;
				endChar = s.charAt(5);
				mainText.append("*[[/Page" + pageNo + "|" +
							    firstChar + "-" + endChar + "]]\n");
				int index = subText.indexOf("<range>");
				subText.replace(index, index+7, firstChar + "-" + endChar);
				project.writePage(articlePage + "/Page" + pageNo, subText.toString());
				pageNo++;
				firstChar = endChar;
                subText = new StringBuilder("===Articles <range>===\n");
			}
			endChar = s.charAt(5);
		}
		if (splitting) {
			mainText.append("*[[/Page" + pageNo + "|" +
            				firstChar + "-" + endChar + "]]\n");
            int index = subText.indexOf("<range>");
            if (index != -1) {
            	subText = subText.replace(index, index+7, firstChar + "-" + endChar);
            }
            project.writePage(articlePage + "/Page" + pageNo, subText.toString());
            pageNo++;
		} else {
			mainText.append(subText);
		}
		
		prepareArticleList("Wikipedia", wikisTalk, false);
		prepareArticleList("Templates", templatesTalk, false);
		prepareArticleList("Portals", portalsTalk, false);
		prepareArticleList("Categories", categoriesTalk, false);
		prepareArticleList("Images", imagesTalk, false);		
		
		project.writePage(articlePage, mainText.toString());
	}
	
	private void prepareArticleList (String title, TreeSet<String> pages,
									 boolean includeCount) {
		String countText = "";
		if (includeCount) {
			countText = " (count: " + pages.size() + ")";
		}
		mainText.append("===" + title + countText + "===\n");
		// if we need to put these articles on the next page (becaue we've
		// already started the second page, or we can't fit all these pages
		// on the main page
		boolean pagesOnNext = !onMainPage || count + pages.size() > MAX_ARTICLES;
		if (pagesOnNext) {
			subText = new StringBuilder(BOT_WARN + SPLIT_INTRO_NEXT +
					END_INTRO1 + tagText + END_INTRO2 + template + END_INTRO3 +
					outputName + "/Page" + pageNo + END_INTRO4 + outputName + "/" + pageNo +
					END_INTRO5 +
					"===" + title + "===\n");
			mainText.append("*[[/Page" + pageNo + "#" + title + "|" + title +"]]\n");
		} else {
			subText = new StringBuilder();
			count += pages.size();
		}
		for (String s : pages) {
			subText.append("*[[" + s + "]]\n");
		}
		// if these pages are going on the main page, put them there
		if (!pagesOnNext) {
			mainText.append(subText);
			subText = new StringBuilder();
		} else {
			onMainPage = false;
		}
	}
}

Page.java edit

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.regex.*;
import org.apache.commons.lang.StringEscapeUtils;

public class Page {
	/** the title of the page (with namespace) **/
	private String title;
	/** the title of the page (without namespace) **/
	private String titleWithoutNamespace = null;
	
	/** the index.php URL (as a String) **/
	private final String strIndexURL = "http://en.wikipedia.org/w/index.php";

	/** the api.php URL (as a String) **/
	private final String strAPIURL = "http://en.wikipedia.org/w/api.php";

	/** the session manager (manages logging in, cookies, etc) **/
	private WikiSessionManager sessionMgr;

	/** how long to sleep if maxlag is > 5 -- start with 5 sec **/
	private static int sleepTime = 5000;
	/** the maximum time to sleep (after this much time, we quit **/
	private final static int MAX_SLEEP_TIME = 160000;
	/** the last write time, so we can keep the bot slow **/
	private static long lastWriteTime = -1;
	/** the minimum delay between writes **/
	private final static int MIN_WRITE_DELAY = 10000;

	/** the list of articles that we're building, for example,
	 *  in @see getTransclusions()
	 **/
	private TreeSet<String> articles;
	
	/** create the Page object and store its title (with namespace)
	 * @param title the title of the page (with namespace)
	 * @param sessionMgr the session manager (controls loggin in and other interaction
	 *        with wikipedia
	 * @throws UnsupportedEncodingException if there's a problem with the URL
	 */
	Page (WikiSessionManager sessionMgr, String title)
			throws UnsupportedEncodingException {
		this.sessionMgr = sessionMgr;
		this.title = title;
		this.titleWithoutNamespace = URLEncoder.encode(titleWithoutNamespace(), "UTF-8");
		this.title = URLEncoder.encode(title, "UTF-8");
	}

	/** get the title of this page without namespace
	 **/
	String titleWithoutNamespace () {
		// if we've already gotten it once, don't do it again (because of encoding)
		if (titleWithoutNamespace != null) return titleWithoutNamespace;
		// we haven't called this yet -- means we're in the constructor
		String[] split = title.split(":");
		if (split.length == 1) return split[0];
		return split[1];
	}
	
	/** get the contents of the page
	 * @return the page contents
	 * @throws IOException if something goes wrong (like the page doesn't exist)
	 **/
	public String get () throws IOException {
		// get the URL & connection
		return urlRequest(strIndexURL + "?title=" + title + "&action=raw");
	}
	
	/** write the specified text to the page
	 * @param text the text to put on the page
	 * @param summary the edit summary
	 * @param minor is this a minor edit
	 * @throws MalformedURLException if there's a problem with the page URL
	 * @throws IOException if there's a problem with one of the readers or writers
	 **/
	void put (String text, String summary, boolean minor) {
		try {
			URLConnection connection = null;
			URL url = null;

			// get the URL and connection
			url = new URL(strIndexURL + "?title=" + title + "&action=edit&maxlag=5");
			connection = url.openConnection();
			sessionMgr.addCookies(connection);
			connection.connect();
	
			// process the existing page text to find:
			// wpStarttime, wpEdittime, and wpEditToken. They're in lines of the
			// form given in the pattern
			Pattern pattern = Pattern.compile("<input type='hidden' value=\"(.*?)\" name=\"(.*?)\" />");
			Matcher matcher;
	
			String startTime = "", editTime = "", editToken = "";
			BufferedReader reader = null;
			boolean stillTrying = true;
			while (stillTrying) {
				try {
					reader = new BufferedReader(
							new InputStreamReader(connection.getInputStream()));
					stillTrying = false;
					sleepTime = 5000;
				} catch (IOException e) {
					// there must be a better way to do this!
					if (e.toString().contains("503")) {
						System.out.println("Max lag -- sleeping for " + sleepTime/1000 + " seconds");
						Thread.sleep(sleepTime);
						sleepTime *= 2;
						if (sleepTime > MAX_SLEEP_TIME) {
							System.out.println("Giving up");
							System.exit(-1);
						}
					}
				}
			}
			String line = reader.readLine();
			while (line != null) {
				if (line.indexOf("<input type='hidden'") != -1) {
					matcher = pattern.matcher(line);
					matcher.find();
					String name = matcher.group(2);
					String value = matcher.group(1);
					if (name.equals("wpStarttime")) {
						startTime = value;
					} else if (name.equals("wpEdittime")) {
						editTime = value;
					} else if (name.equals("wpEditToken")) {
						editToken = value;
						break; // we don't need anything else
					}
				}
				line = reader.readLine();
			}
			reader.close();
	
			// send the data
			url = new URL(strIndexURL + "?title=" + title + "&action=submit");
			connection = url.openConnection();
			
			connection.setDoInput(true);
			connection.setDoOutput(true);
			connection.setUseCaches(false);
			connection.setRequestProperty("Content-Type",
					"application/x-www-form-urlencoded");
			sessionMgr.addCookies(connection);
	
			// write the data to the output stream
			long writeDelay = System.currentTimeMillis() - lastWriteTime;
			if (lastWriteTime != -1 && writeDelay < MIN_WRITE_DELAY) {
				System.out.println("Waiting " + (MIN_WRITE_DELAY-writeDelay)/1000 + " seconds");
				Thread.sleep(MIN_WRITE_DELAY-writeDelay);
			}
			System.out.println("Writing " + titleWithoutNamespace);
			OutputStreamWriter output = new OutputStreamWriter(connection
					.getOutputStream(), "UTF-8");
			output.write("wpStarttime=" + startTime);
			output.write("&wpEdittime=" + editTime);
			output.write("&wpEditToken=" + URLEncoder.encode(editToken, "UTF-8"));
			output.write("&wpTextbox1=" + URLEncoder.encode(text, "UTF-8"));
			output.write("&wpSummary=" + URLEncoder.encode(summary, "UTF-8"));
			if (minor) {
				output.write("&wpMinorEdit=1");
			}
			output.flush();
			output.close();
			lastWriteTime = System.currentTimeMillis();
	
			// I don't understand why this is necessary
			BufferedReader input = new BufferedReader(new InputStreamReader(
					connection.getInputStream()));
			line = input.readLine();
			/* could be used to check for errors
			while (line != null) {
				line = input.readLine();
			} */
		} catch (Exception e) {
			System.out.println(e);
		}
	}

	/** get the transclusions for this page
	 * @return the list of all articles which transclude this page
	 * @state articles is used to build the list, but it is initialized
	 *        and then returned
	 * @throws MalformedURLException
	 * @throws IOException
	 */
	TreeSet<String> getTransclusions () 
			throws MalformedURLException, IOException {
		// the article list
		articles = new TreeSet<String>();
		// the parameters to use in the URL
		final String urlParams = "action=query&list=embeddedin&eilimit=5000&format=xml";

		String result = urlRequest(strAPIURL + "?titles=" + title + "&" + urlParams);
		
		int index = 0;
		while (index != -1) {
			processResult(result, "ei");
			index = result.indexOf("eicontinue");
			if (index != -1) {
				// find the next " after eicontinue=" (12 chars long)
				int endIndex = result.indexOf("\"", index+12);
				String continueText = result.substring(index+12, endIndex);
				result = urlRequest(strAPIURL + "?" + urlParams + "&eicontinue=" +
									URLEncoder.encode(continueText, "UTF-8"));
			}
		}

		return articles;
	}

	/** get the articles in the category
	 * @return the list of all articles in this category
	 * @state articles is used to build the list, but it is initialized
	 *        and then returned
	 * @throws MalformedURLException
	 * @throws IOException
	 */
	TreeSet<String> getMembers () 
			throws MalformedURLException, IOException {
		// the article list
		articles = new TreeSet<String>();
		// the parameters to use in the URL
		final String urlParams = "?cmcategory=" + titleWithoutNamespace +
								 "&action=query&list=categorymembers&cmlimit=5000&format=xml";
		String result = urlRequest(strAPIURL + urlParams);

		int index = 0;
		while (index != -1) {
			processResult(result, "cm");
			index = result.indexOf("cmcontinue");
			if (index != -1) {
				// find the next " after cmcontinue=" (12 chars long)
				int endIndex = result.indexOf("\"", index+12);
				String continueText = result.substring(index+12, endIndex);
				result = urlRequest(strAPIURL + urlParams + "&cmcontinue=" +
									URLEncoder.encode(continueText, "UTF-8"));
			}
		}

		return articles;
	}

	/** process the result -- this is a list of articles in XML format
	 * @param result the raw text
	 * @param id the id to use in the pattern (e.g., "ei" for embedded in, "cm" for
	 *        for category members, etc.)
	 * @state articles new article titles are added to articles
	 */
	private void processResult (String result, String id) {
		Pattern pattern =
			Pattern.compile("<" + id + " pageid=\"(.*?)\" ns=\"(.*?)\" title=\"(.*?)\" />");
		Matcher matcher = pattern.matcher(result);
		
		while (matcher.find()) {
			String article = matcher.group(3);
			article = StringEscapeUtils.unescapeXml(article);
			articles.add(article);
		}
	}
	
	/** open a URL and read the page
	 * @param http the full URL "http://whatever"
	 * @return the text of the page
	 * @throws MalformedURLException
	 * @throws IOException
	 */
	private String urlRequest (String http) throws MalformedURLException,
			IOException {
		// get the URL & connection
		URL url = new URL(http);
		URLConnection connection = url.openConnection();
		sessionMgr.addCookies(connection);

		// convert the connection stream into a String
		StringBuilder sbResult = new StringBuilder();
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				connection.getInputStream(), "UTF-8"));
		String line = reader.readLine();
		while (line != null) {
			sbResult.append(line + "\n");
			line = reader.readLine();
		}
		reader.close();

		return sbResult.toString();
	}
}