foot.py

mycatlist1=list()
mycatlist2=list()
allplayers=list()
excludelist=list()
donelist=list()
nomatchlist=list()
stublist=list()
redirlist=list()
removed=0
added=0
added=0

Main Program

from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import foot

site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login

#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
    try:
        print(s)
    except UnicodeEncodeError:
        print(s.encode('utf-8'))

      
def startAllowed():
    textpage = page.Page(site, "User:RonBot/7/Run").getWikiText()
    if textpage == "Run":
        return "run"
    else:
        return "no"

def allow_bots(text, user):
    user = user.lower().strip()
    text = mwparserfromhell.parse(text)
    for tl in text.filter_templates():
        if tl.name.matches(['bots', 'nobots']):
            break
    else:
        return True
    print "template found" #Have we found one
    for param in tl.params:
        bots = [x.lower().strip() for x in param.value.split(",")]
	if param.name == 'allow':
            print "We have an ALLOW" # allow found
            if ''.join(bots) == 'none': return False
            for bot in bots:
                if bot in (user, 'all'):
                    return True
        elif param.name == 'deny':
            print "We have a DENY" # deny found
            if ''.join(bots) == 'none':
                print "none - true"
                return True
	    for bot in bots:
                if bot in (user, 'all'):
                    pnt(bot)
                    pnt(user)
                    print "all - false"
                    return False
    if (tl.name.matches('nobots') and len(tl.params) == 0):
        print "match - false"
        return False
    return True

def findpages(nextcat):
    lastContinue=''
    touse=''
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':nextcat,
                  'cmlimit':'max',
                  'cmnamespace':'0|14',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        for filep in touse: #For page in the list
            pagename=filep.unprefixedtitle
            if "Category" in pagename:
                if pagename not in foot.mycatlist1:
                    foot.mycatlist1.append(pagename)
                    pnt("APPENDING "+pagename)
                    print len(foot.mycatlist1)
                else:
                    pnt("NOT APPENDING "+pagename) 
            else:
                if pagename not in foot.allplayers: #Have we a unique player name?
                    foot.allplayers.append(pagename)
                    #pnt(pagename)
                    if pagename not in foot.excludelist: #Is this name not in the exclude list?
                        if pagename not in foot.nomatchlist: #Is this name unique
                            foot.nomatchlist.append(pagename)
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return 

def splittextpoint(pagetext):
    # Only here if we see a "stub"
    size=len(pagetext)-1
    print size
    curly=False
    for loopvar in xrange(size,size-100, -1):
        mychar=pagetext[loopvar]
        print loopvar, repr(mychar)
        if mychar=="]":
            return size
        if mychar=="}":
            curly=True
        if curly==True:
            matchObj = re.match( r'\n', mychar)
            if matchObj:
                if curly==False:
                    return size
                else:
                    return loopvar
    return size

def ProcessDoneCat(nextcat):
    lastContinue=''
    touse=''
    print("PDC")
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':nextcat,
                  'cmlimit':'max',
                  'cmnamespace':'0|14',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        for filep in touse: #For page in the list
            pnt(filep.unprefixedtitle)
            pagename=filep.unprefixedtitle
            if pagename in foot.excludelist:
                pnt("REMOVE THE CAT IN "+pagename)
                foot.donelist.append(pagename)
            else:
                pnt("EXCUDE "+pagename)
                foot.excludelist.append(pagename)
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return 

def RemoveCat():
    size=len(foot.donelist)
    print size
    for pagetitle in foot.donelist:
        pagetitletext = pagetitle.encode('utf-8')
        print pagetitletext
        pagepage = page.Page(site, pagetitle)
        pagetext = pagepage.getWikiText()
        go = allow_bots(pagetext, 'RonBot')# does user page allow bots
        if go:
            print"++++++++++++++++++++++++++++++++++++++++"
            print"REMOVAL bot allowed on article"
            pnt(pagetext)
            pagetext = re.sub(r'\[\[Category:Association footballers not categorized by position\]\]\n*', '', pagetext)
            pnt(pagetext)
            try:
                pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Removal of [[:Category:Association footballers not categorized by position]]") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
                foot.removed += 1
                print "writing changed page"
            except:
                print"Failed to write"
            print"++++++++++++++++++++++++++++++++++++++++"
    return

def AddCat():
    print (time.ctime())
    size=len(foot.nomatchlist)
    print size
    for pagetitle in foot.nomatchlist:
        pagetitletext = pagetitle.encode('utf-8')
        pagepage = page.Page(site, pagetitle, True, False) # dont follow redirects!
        pageredir= pagepage.isRedir()
        pagetext = pagepage.getWikiText()
        size = len(pagetext)-1
        cutplace=size
        if "stub" in pagetext:
            foot.stublist.append(pagetitle)
            cutplace=splittextpoint(pagetext)
        go = allow_bots(pagetext, 'RonBot')# does user page allow bots
        if go:
            #print"++++++++++++++++++++++++++++++++++++++++"
            print pagetitletext+ " ADDITION bot allowed on article"
            if pageredir:
                print "REDIRECT " + pagetitletext # show that page, but don't add the cat.
                foot.redirlist.append(pagetitle)
            else:
                if cutplace<size:
                    pagetext=pagetext[0:cutplace]+"\n"+"[[Category:Association footballers not categorized by position]]"+"\n"+pagetext[cutplace+1:]
                else:
                    pagetext = pagetext+"\n"+"[[Category:Association footballers not categorized by position]]"
                try:
                    pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Addition of [[:Category:Association footballers not categorized by position]]") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
                    foot.added += 1
                    print "writing changed page"
                except:
                    print"Failed to write"
                print"++++++++++++++++++++++++++++++++++++++++"
        if foot.added+foot.removed>=13000: #Termination for trials. comment out this line and next for full run
            return
    return

def findexclude(nextcat):
    lastContinue=''
    touse=''
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':nextcat,
                  'cmlimit':'max',
                  'cmnamespace':'0|14',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        for filep in touse: #For page in the list
            pagename=filep.unprefixedtitle
            if "Category" in pagename:
                if pagename not in foot.mycatlist2:
                    foot.mycatlist2.append(pagename)
                    pnt("APPENDING "+pagename)
                    print len(foot.mycatlist2)
                else:
                    pnt("NOT APPENDING "+pagename) 
            else:
                if pagename not in foot.excludelist:
                    foot.excludelist.append(pagename)
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return 

def TestMainIO():
    #foot.nomatchlist=list()
    #foot.nomatchlist.append("User:Ronhjones/Sandbox3")
    #foot.nomatchlist.append("User:Ronhjones/Sandbox4")
    #foot.nomatchlist.append("User:Ronhjones/Sandbox5")
    #pagepage = page.Page(site, 'Sammy Frost', True, False) # dont follow redirects!
    #pagetext = pagepage.getWikiText()
    ##cutplace=splittextpoint(pagetext)
    #pagetext=pagetext[0:cutplace]+"\n"+"[[Category:Association footballers not categorized by position]]"+"\n"+pagetext[cutplace+1:]
    #pnt(repr(pagetext))
        
    
    foot.donelist=list()
    foot.nomatchlist.append("User:Ronhjones/Sandbox4")
    foot.nomatchlist.append("User:Ronhjones/Sandbox5")
    AddCat()
    sys.exit()
    

def main():
    go = startAllowed() #Check if task is enabled
    #TestMainIO() # - test run was OK.
    #sys.exit()
    #Get the exclude list
    foot.mycatlist2=list()
    foot.excludlist=list()
    foot.stublist=list()
    foot.redirlist=list()
    foot.mycatlist2.append("Category:Association football defenders")
    foot.mycatlist2.append("Category:Association football central defenders")
    foot.mycatlist2.append("Category:Association football fullbacks")
    foot.mycatlist2.append("Category:Association football sweepers")
    foot.mycatlist2.append("Category:Association football forwards")
    foot.mycatlist2.append("Category:Association football inside forwards")
    foot.mycatlist2.append("Category:Association football outside forwards")
    foot.mycatlist2.append("Category:Association football goalkeepers")
    foot.mycatlist2.append("Category:Association football midfielders")
    foot.mycatlist2.append("Category:Association football wing halves")
    foot.mycatlist2.append("Category:Association football wingers")
    foot.mycatlist2.append("Category:Women's association football defenders")
    foot.mycatlist2.append("Category:Women's association football forwards")
    foot.mycatlist2.append("Category:Women's association football goalkeepers")
    foot.mycatlist2.append("Category:Women's association football midfielders")
    foot.mycatlist2.append("Category:Association football player non-biographical articles")
    foot.mycatlist2.append("Category:Association football utility players")
    foot.mycatlist2.append("Category:Women's association football central defenders")
    foot.mycatlist2.append("Category:Women's association football fullbacks")
    foot.mycatlist2.append("Category:Women's association football wingers")
    foot.mycatlist2.append("Category:Women's association football utility players")
    listnum=0
    while listnum<len(foot.mycatlist2):
        pnt( "CAT " + foot.mycatlist2[listnum])
        findexclude(foot.mycatlist2[listnum])
        listnum+=1
        print "LIST No. ", listnum
        print len(foot.excludelist)

    #Get the target cat, if not in exclude then add to that list
    #Otherwise add to donelist - these will need to have the cat removed.
    foot.removed=0
    foot.added=0
    print "check the done cat"        
    ProcessDoneCat("Category:Association footballers not categorized by position")
    print len(foot.donelist)
    print len(foot.excludelist)
    if len(foot.donelist)>0:
        RemoveCat()

    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist1.txt','w')
    print "OPEN FILE 1"
    for item in foot.excludelist:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close
    
    #Now ready to process Mainlist
    #Make a list of players that are NOT in the exclude list
    foot.mycatlist1=list()
    foot.allplayers=list()
    foot.nomatchlist=list()
    foot.mycatlist1.append("Category:Association football defenders")
    foot.mycatlist1.append("Category:Footballers by city or town")
    foot.mycatlist1.append("Category:Association football players by club")
    foot.mycatlist1.append("Category:Association football players by competition")
    foot.mycatlist1.append("Category:Association football players by country")
    foot.mycatlist1.append("Category:Association football players by national team")
    foot.mycatlist1.append("Category:Association football players by nationality")
    foot.mycatlist1.append("Category:Women's association football players")
    foot.mycatlist1.append("Category:Expatriate association football players")
    listnum=0
    while listnum<len(foot.mycatlist1):
        pnt( "CAT" + foot.mycatlist1[listnum])
        findpages(foot.mycatlist1[listnum])
        listnum+=1
        print "LIST No. ", listnum
        print len(foot.allplayers)
        print len(foot.nomatchlist)

    foot.nomatchlist.sort()

    if len(foot.nomatchlist)>0:
        AddCat()
        #write local file
        myfile=open('C:\\Python27\\bot\\log7\\articlelist2.txt','w')
        print "OPEN FILE 1"
        for item in foot.nomatchlist:
            try:
                myfile.write("%s\n" % item)
            except UnicodeEncodeError:
                myfile.write("%s\n" % item.encode('utf-8'))
        myfile.close
        
    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist3.txt','w')
    print "OPEN FILE 3"
    for item in foot.allplayers:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close

    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist4.txt','w')
    print "OPEN FILE 4"
    for item in foot.stublist:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close

    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist5.txt','w')
    print "OPEN FILE 5"
    for item in foot.redirlist:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close
    
    print foot.added
    print foot.removed
    print (time.ctime())
      
if __name__ == "__main__":
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        main()