AFC.py

OutList=list()
NewList=list()
OldList=list()
LastList=list()
SkipList=list()
OutHead=''

Main program

from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import AFC

#Based on https://en.wikipedia.org/wiki/User:Ritchie333/afcbios.py CC-BY-SA-3.0

site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login

name = 'Category:AfC submissions declined as a non-notable biography'
reText = re.compile( '(She is|She was|Her work|Her book)' )
reMarker = re.compile( '<ref.*?\/ref>|{{.*}}|<!--.*-->|\'\'\'|----' )
reTitle = re.compile( '\(.*\)' )
OutPage='Wikipedia:WikiProject Women in Red/Drafts'
OutList='User:RonBot/11/ShortList'
Header='User:RonBot/11/Header'
BadOnes='User:RonBot/11/FalsePositives'

#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
    try:
        print(s)
    except UnicodeEncodeError:
        print(s.encode('utf-8'))

      
def startAllowed():
    textpage = page.Page(site, "User:RonBot/11/Run").getWikiText()
    if textpage == "Run":
        return "run"
    else:
        return "no"

def lastrevision(page):
    params = {'action':'query',
              'prop':'revisions',
              'titles':page.unprefixedtitle,
              'rvlimit':'1'
              }
    req = api.APIRequest(site, params)
    res = req.query(False)
    pageid = res['query']['pages'].keys()[0]
    timestamp = str(res['query']['pages'][pageid]['revisions'][0]['timestamp'])
    m = re.search(r'(.*?)T', timestamp)
    date = m.group(1)
    comment = res['query']['pages'][pageid]['revisions'][0]['comment']
    timemix=date+" - "+comment
    #print timemix
    return timemix

def getskippages():
    AFC.SkipList = page.Page(site, BadOnes).getWikiText().split("|")
    return

def getoldpages():
    AFC.LastList = page.Page(site, OutList).getWikiText().split("|")
    return

def findpages(cat):
    lastContinue=''
    count=0
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':cat,
                  'cmlimit':'max',
                  'cmnamespace':'118',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        #pnt(touse)
        for filep in touse: #For page in the list
            pagename=filep.unprefixedtitle
            if pagename not in AFC.SkipList: #known false positives
                #pnt(pagename)
                pagepage = page.Page(site, pagename)
                pagetext = pagepage.getWikiText()
                match = reText.search( pagetext )
                #pnt(match)
                if match:
                    tmps=lastrevision(filep)
                    shortText = reMarker.sub( '', pagetext )
                    shortText = shortText.replace( '<ref', 'ref' )
                    shortText = shortText.replace( '</ref', 'ref' )
                    shortTitle = re.sub('Draft:', '', pagename ) #Remove Draft: prefix
                    shortTitle = reTitle.sub( '', shortTitle )
                    AFC.OldList.append(pagename)#save page name for next week
                    #pnt(pagename)
                    if pagename not in AFC.LastList:
                        print "NewList"
                        AFC.NewList.append('* [['+ pagename + ']]')
                        match = re.search(shortTitle+'.*?\.',shortText)
                        if match:
                            lead=shortText[match.start():match.end()]
                            AFC.NewList.append('::<small>' + lead + '</small>')
                        AFC.NewList.append(':::<small><nowiki>' + tmps + '</nowiki></small>')
                    else:
                        print "OutList"
                        AFC.OutList.append('* [['+ pagename + ']]')
                        match = re.search(shortTitle+'.*?\.',shortText)
                        if match:
                            lead=shortText[match.start():match.end()]
                            AFC.OutList.append('::<small>' + lead + '</small>')
                        AFC.OutList.append(':::<small><nowiki>' + tmps + '</nowiki></small>')
                    count += 1
                    print count, pagename.encode('utf-8')
                    #if count>123:
                    #    break
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return

def getmyheader():
    AFC.OutHead = page.Page(site, Header).getWikiText()
    return
                                   
def writepage(title):
    outtext=AFC.OutHead+"\n"
    count=-1
    print "writing list"
    outtext=outtext+"== New Additions ==\n"
    print len(AFC.NewList), "newlist"
    for item in AFC.NewList:
        if "[[Draft" in item:
            count += 1
            if 0 == ( count % 50 ):
                outtext=outtext+'=== ' + str( count + 1 ) + ' - ' + str( count + 50 ) + ' ===\n'
            #print count, item
        outtext=outtext+item+"\n"
    #Second part
    count=-1
    outtext=outtext+"== Existing Pages ==\n"
    print len(AFC.OutList), "outlist"
    for item in AFC.OutList:
        if "[[Draft" in item:
            count += 1
            if 0 == ( count % 50 ):
                outtext=outtext+'=== ' + str( count + 1 ) + ' - ' + str( count + 50 ) + ' ===\n'
            #print count, item
        outtext=outtext+item+"\n"
    print "writing page", title
    pnt(outtext)
    pagepage = page.Page(site, title)
    pagepage.edit(text=outtext, bot=True, skipmd5=True, summary="(Task 11 - userspace trial) re-write page")

def writeold(title):
   #write short list for next week
    shorttext=''
    print "writing list"
    for item in AFC.OldList:
        shorttext=shorttext+item+"|"
    pagepage = page.Page(site, title)
    print "writing page"
    pagepage.edit(text=shorttext, bot=True, skipmd5=True, summary="(Task 11 - userspace trial) re-write page")

def main():
    reload(sys)  
    sys.setdefaultencoding('utf8')
    go = startAllowed() #Check if task is enabled
    getskippages()
    getmyheader()
    AFC.OutList=list()
    AFC.OldList=list()
    AFC.Newlist=list()
    getoldpages()
    findpages(name)
    writepage(OutPage)
    #writeold(OutList)

if __name__ == "__main__":
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        main()