User:GalliumBot/proctor/proctor.py

"""
Copyright (c) 2023 theleekycauldron

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

to-do:
* Implement open and close by-month dicts for all users
* Move subpages to proctor/open/, create /close/ and /user/ (by close)
"""
import pywikibot as pwb
import datetime as dt
from datetime import datetime
import re
import json
import math

tag = " [[[User:GalliumBot#proctor|proctor]] v0.0.0]"
site = pwb.Site('en','wikipedia')
site.login()

start = datetime(2011,7,1,0,0,0)
monthyears = []
while start<datetime.now():
    monthyears.append(start.strftime("%B %Y"))
    start = datetime(start.year if start.month<12 else start.year+1,start.month+1 if start.month<12 else 1,start.day,0,0,0)

class User:
    def __init__(self,name,date):
        self.name = name
        self.date = date
        self.opened = {k: {} for k in monthyears}
        self.closed = {k: 0 for k in monthyears}
        
    def __repr__(self):
        return f"User(name={self.name}, date={self.date}, count={self.count()})"
        
    def count(self):
        return sum(len(my) for my in self.opened.values())
        
    def mu(self,u):
        for my in monthyears:
            self.opened[my] = {**self.opened[my],**u.opened[my]}
        self.date = max(self.date,u.date)
        
    def out(self):
        names = {
            "Theleekycauldron": "theleekycauldron",
            "Yoninah": "Yoninah|aftername=<small>([[Z\"L]])</small>",
            "PumpkinSky": "PumpkinSky|aftername=<small>(+[[Wikipedia:Sockpuppetry|sockpuppets]])</small>"
        }
        try:
            name = names[self.name]
        except KeyError:
            name = self.name
        diff = datetime.now()-self.date
        active = diff.days < 30
        return f"{{{{/Template|{name}|{self.count():,}|{self.date.strftime('%Y-%m-%d')}{'|a=y' if active else ''}}}}}"

def get_datetime_from_timestamp(ts):
    return datetime(ts.year,ts.month,ts.day,ts.hour,ts.minute,ts.second)

def analyze_cat(my,users,unknowns,breaks=[]):
    catstring = f"Category:Passed DYK nominations from {my}"
    cat = pwb.Category(site,catstring)

    l = len(list(cat.articles()))
    catsizelast = sum(len(users[user].opened[my]) for user in users)
    print(my+":",catsizelast,l,unknowns)
    if l-unknowns==catsizelast:
       return users, [], unknowns, False
    
    unknowns = 0
    users = {}
    
    for page in cat.articles():
        revisions = page.revisions()
        revision = next(revisions)
        nextrevision = revision
        notFound = False
        while True:
            revision = nextrevision
            nextrevision = next(revisions,None)
            if nextrevision is None:
                notFound=True
                break
            text = page.getOldVersion(nextrevision.revid)
            if "{{DYKsubpage" in text:
                name = revision.user
                date = get_datetime_from_timestamp(revision.timestamp)
                break
                
        if notFound:
            unknowns += 1
            continue
            
        firstUser = re.search("\[\[User:([^\]\|]+)",page.text).group(1)
        if firstUser != name:
            #print(page.title(),"is suspicious...",name,firstUser)
            breaks.append(page)
        
        createNew = True
        short = page.title()[page.title().index("/")+1:]
        print(" || ".join([name,short,date.strftime("%Y-%m-%d")]))
        
        if name not in users:
            users[name] = User(name,date)
        
        users[name].opened[my][short] = date.strftime("%Y-%m-%d")
        users[name].date = max(users[name].date,date)
        
    return users, breaks, unknowns, True
    
def compile_users(users):
    users.sort(key = lambda x:x.count(),reverse=True)
    nl = "\n"
    return f"""{{{{Wikipedia:List of Wikipedians by number of DYK promotions/header}}}}

{{| class="wikitable sortable"
|+
!User
!count
!Date of last promotion
{nl.join([n.out() for n in filter(lambda x:x.count()>=100,users)])}
|}}
If you want, you can add this cool [[Wikipedia:Userboxes|userbox]] to [[Special:MyPage|your userpage]]! (The template can be found at [[Template:User DYK promotions]])
{{{{User DYK promotions|0}}}}
[[Category:Wikipedia Did you know administration]]"""

def merge(d1,d2):
    for n in d2:
        if n in d1:
            d1[n].mu(d2[n])
        else:
            d1[n] = d2[n]
    return d1
    
def json_eq(j1,j2):
    return j1.replace(" ","").replace("\n","") == j2.replace(" ","").replace("\n","")

def to_open_json(users,my):
    json_log = pwb.Page(site,f"User:GalliumBot/proctor/open/{my}.json")
    dump = {u.name:u.opened[my] for u in users}
    dump = json.dumps(dump)
    json_log.text = dump
    json_log.save(summary="updating list"+tag)

def from_json(my):
    open_log = pwb.Page(site,f"User:GalliumBot/proctor/open/{my}.json")
    open_json = json.loads(open_log.text)
    users = {u: User(u,max(datetime.strptime(d,"%Y-%m-%d") for d in open_json[u].values())) for u in open_json}

    for user in open_json:
        users[user].opened[my] = open_json[user]
    return users
    
def to_close_json(users):

    for user in users:
        for myo in monthyears:
            monthyearsclose = list(datetime.strptime(d,"%Y-%m-%d").strftime("%B %Y") for d in user.opened[myo].values())
            for myc in set(monthyearsclose):
                user.closed[myc] += monthyearsclose.count(myc)

    for user in users:
        user.usercount = dict(filter(lambda x:x[1]>0, user.closed.items()))
        user.usercount = {my: [user.usercount[my],0,0] for my in user.usercount}
    
    print("Updating close jsons...")
    for my in monthyears:
        json_log = pwb.Page(site,f"User:GalliumBot/proctor/close/{my}.json")
        
        counts = [u.closed[my] for u in users]
        counts.sort(reverse=True)
        for user in users:
            try:
                user.usercount[my][1] = f"{user.closed[my]/sum(counts):.2%}"
                user.usercount[my][2] = f"#{counts.index(user.closed[my])+1}"
            except KeyError:
                pass
        
        close_dump = {u.name:u.closed[my] for u in filter(lambda x:x.closed[my]>0,users)}
        close_dump = json.dumps(close_dump)
        if json_eq(json_log.text,close_dump):
            continue
        json_log.text = close_dump
        json_log.save(summary="updating list"+tag)
    
    print("Updating user jsons...")   
    for user in users:
        user_json = pwb.Page(site,f"User:GalliumBot/proctor/user/{user.name}.json")
        dumps = json.dumps(user.usercount)
        if json_eq(user_json.text,dumps):
            continue
        user_json.text = dumps
        user_json.save(summary="updating list"+tag)
        
def from_unknowns():
    json_log = pwb.Page(site,f"User:GalliumBot/proctor/unknowns.json")
    return json.loads(json_log.text)
    
def to_unknowns(unknowns):
    json_log = pwb.Page(site,f"User:GalliumBot/proctor/unknowns.json")
    dumps = json.dumps(unknowns)
    if json_eq(json_log.text,dumps):
        return None
    json_log.text = dumps
    json_log.save(summary="updating list"+tag)
    
def main():
    now1 = datetime.now()
    users = {}
    breaks = []
    try:
        unknowns = from_unknowns()
        for my in monthyears[::-1]:
            if my in unknowns:
                break
            unknowns[my] = 0
    except Exception:
        unknowns = dict.fromkeys(monthyears,0)
    do_close_json = False
    
    for my in monthyears:
        try:
            usersmonth = from_json(my)
        except Exception:
            usersmonth = {}
        usersmonth, breaks, unknown, edit = analyze_cat(my, usersmonth, unknowns[my], breaks=breaks)
        unknowns[my] = unknown
        try:
            #User-specific things
            hg = users.pop("HalfGig")
            usersmonth["PumpkinSky"].mu(hg)
        except KeyError:
            pass
        
        users = merge(users,usersmonth)
        usersmonth = list(usersmonth.values())
        usersmonth.sort(key = lambda x:x.name)
        if edit:
            to_open_json(usersmonth,my)
            do_close_json = True
    to_unknowns(unknowns)    
    
    #Finish up
    users = list(users.values())
    users.sort(key = lambda x:x.name)
    if do_close_json:
        to_close_json(users)
    
    now2 = datetime.now()
    print("Time:",now2-now1)

    dykpc = pwb.Page(site,"Wikipedia:List of Wikipedians by number of DYK promotions")
    dykpc.text = compile_users(users)
    dykpc.save(summary="updating list"+tag)
    
if __name__ == "__main__":
    main()