File:littleimagegif.py

#! /usr/bin/env python
from PIL import Image
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from PIL import ImageSequence
import pyexiv2
import cStringIO
import mwclient
import uuid
import urllib
import cgi
import sys
import urllib2
import requests
import math
import tempfile
import os

# CC-BY-SA Theopolisme


def getSize(filename):
    st = os.stat(filename)
    return st.st_size

def gimme_image(filename,compound_site,pxl,theimage):
    """This function creates the new image, runs
    metadata(), and passes along the new image's
    filename.
    """
    site = mwclient.Site(compound_site)
    extension = os.path.splitext(theimage)[1]
    extension_caps = extension[1:].upper()
    temppath = "c:\\python27\\bot\\temp\\"

    if extension_caps <> "GIF":
        results = "SKIP"
	return results

    image_1 = site.Images[theimage] 
    image_2 = str(image_1.imageinfo['url'])
	
    response = requests.get(image_2)
    item10 = cStringIO.StringIO(response.content)

    temp_file = temppath+str(uuid.uuid4()) + extension
    print temp_file
    f = open(temp_file,'wb')
    f.write(item10.getvalue())
    try:
        img = Image.open(item10)
        print "img.size[0]", img.size[0]
        print "img.size[1]", img.size[1]
        index = 0
        for frame in ImageSequence.Iterator(img):
            index += 1
        print "Frames", index
        if index > 1:
            print "Animated GIF"
            results="MANUAL"
            return results
        Fsize1=getSize(temp_file)
        print "Size 1", Fsize1
        basewidth = int(math.sqrt((pxl * float(img.size[0]))/(img.size[1])))
        print "basewidth",basewidth
        wpercent = (basewidth/float(img.size[0]))
        print "wpercent",wpercent
        hsize = int((float(img.size[1])*float(wpercent)))
        print "hsize",hsize
        original_pixel = img.size[0] * img.size[1]
        print "original_pixel",original_pixel
        modified_pixel = basewidth * hsize
        print "modified_pixel",modified_pixel
        pct_chg = 100.0 *  (original_pixel - modified_pixel) / float(original_pixel)
        print "pct_chg",pct_chg
        if pct_chg > 5:
            png_info = img.info
            img = img.resize((int(basewidth),int(hsize)), Image.LANCZOS)
            img.save(filename + extension, **png_info)
            print filename + extension
            #Sometimes convert goes belly up and we get a black picture
            #Get and compare sizes of the two temp files
            Fsize1=getSize(temp_file)
            print "Size 1", Fsize1
            Fsize2=getSize(filename + extension)
            print "Size 2", Fsize2
            Fsizer=Fsize1*(100-pct_chg)/200 #Orig size * % reduced and div by 2 for good measure
            print "Size R min", Fsizer
            if Fsize2 < Fsizer:
                print "TOO SMALL"
                results="MANUAL"
                return results
        else:
            print "Looks like we'd have a less than 5% change in pixel counts. Skipping."
            results = "PIXEL"
            return results
    except (IOError):
        print "Unable to open image " + theimage.encode('ascii', 'ignore') + " (aborting)"
        results = "ERROR"
        return results

    print "Image saved to disk at " + filename + extension
    results = filename + extension

    filelist = [ f for f in os.listdir("c:\\python27\\bot\\") if f.startswith(temp_file) ]
    for fa in filelist: os.remove(fa)
    return results
from PIL import Image
from xml.dom import minidom
import cStringIO
import mwclient
import uuid
import urllib
import os.path
import cgi
import littleimagegif
import sys
import urllib2
import re
import time
import random
import logging
import userpassbot
import requests.packages.urllib3
requests.packages.urllib3.disable_warnings()
import mwparserfromhell


# CC-BY-SA Theopolisme


#routine to autoswitch some of the output - as filenames in, say, filep.unprefixedtitle have accented chars!
def pnt(s):
    try:
        print(s)
    except UnicodeEncodeError:
        print(s.encode('utf-8'))


def startAllowed():
    checkname="User:RonBot/2/Run"
    page = site.Pages[checkname]
    textpage = page.text()
    if textpage == "Run":
        return "yes"
    else:
        return "no"
        
def allow_bots(pagetext, username):
    user = username.lower().strip()
    text = mwparserfromhell.parse(pagetext)
    for tl in text.filter_templates():
        if tl.name in ('bots', 'nobots'):
            break
    else:
        return True
    for param in tl.params:
        bots = [x.lower().strip() for x in param.value.split(",")]
        if param.name == 'allow':
            if ''.join(bots) == 'none': return False
            for bot in bots:
                if bot in (user, 'all'):
                    return True
        elif param.name == 'deny':
            if ''.join(bots) == 'none': return True
            for bot in bots:
                if bot in (user, 'all'):
                    return False
    return True

def are_you_still_there(theimage):
    """ This function makes sure that
    a given image is still tagged with
    {{non-free reduce}}.
    """
    img_name = "File:" + theimage
		
    page = site.Pages[img_name]
    text = page.text()
	
    r1 = re.compile(r'\{\{[Nn]on.?free-?\s*[Rr]educe.*?\}\}')
    r2 = re.compile(r'\{\{[Rr]educe.*?\}\}')
    r3 = re.compile(r'\{\{[Cc]omic-ovrsize-img.*?\}\}')
    r4 = re.compile(r'\{\{[Ff]air.?[Uu]se.?[Rr]educe.*?\}\}')
    r5 = re.compile(r'\{\{[Ii]mage-toobig.*?\}\}')
    r6 = re.compile(r'\{\{[Nn]fr.*?\}\}')
    r7 = re.compile(r'\{\{[Ss]maller image.*?\}\}')
	
    if r1.search(text) is not None:
        return True
    elif r2.search(text) is not None:
        return True
    elif r3.search(text) is not None:
        return True
    elif r4.search(text) is not None:
        return True
    elif r5.search(text) is not None:
        return True
    elif r6.search(text) is not None:
        return True
    elif r7.search(text) is not None:
        return True
    else:
        return False

def image_routine(images):
    """ This function does most of the work:
    * First, checks the checkpage using sokay()
    * Then makes sure the image file still exists using are_you_still_there()
    * Next it actually resizes the image.
    * As long as the resize works, we reupload the file.
    * Then we update the page with {{non-free reduced}}.
    * And repeat!
    """
    temppath = "c:\\python27\\bot\\temp\\"
    gifdone = 0
    for theimage in images:
        print "Working on " + theimage.encode('ascii', 'ignore')
        go = startAllowed()
        if go == "yes":
            if are_you_still_there(theimage) == True:	
                #Stop if there's nobots
                img_name = "File:" + theimage
                page = site.Pages[img_name]
                text = page.text()
                allow_bots(text, "RonBot")
                print "main.allowbots"
                desired_megapixel = float(0.1)
                print "desired_megapixel", desired_megapixel
                pxl = desired_megapixel * 1000000
                print "pxl", pxl
                compound_site = 'en.wikipedia.org'
                filename = temppath+str(uuid.uuid4())
                file = littleimagegif.gimme_image(filename,compound_site,pxl,theimage)
			
                if file == "SKIP":
                    print "Skipping File."

                if file == "PIXEL":
                    print "Removing tag...already reduced..."
                    img_name = "File:" + theimage
                    page = site.Pages[img_name]
                    text = page.text()
                    text = re.sub(r'\{\{[Nn]on.?free-?\s*[Rr]educe.*?\}\}', '', text)
                    text = re.sub(r'\{\{[Rr]educe.*?\}\}', '', text)
                    text = re.sub(r'\{\{[Cc]omic-ovrsize-img.*?\}\}', '', text)			
                    text = re.sub(r'\{\{[Ff]air.?[Uu]se.?[Rr]educe.*?\}\}', '', text)			
                    text = re.sub(r'\{\{[Ii]mage-toobig.*?\}\}', '', text)			
                    text = re.sub(r'\{\{[Nn]fr.*?\}\}', '', text)			
                    text = re.sub(r'\{\{[Ss]maller image.*?\}\}', '', text)			
                    page.save(text, bot=True, summary = "(Task 2) Removing {{[[Template:Non-free reduce|Non-free reduce]]}} since file is already adequately reduced")				

                if file == "MANUAL":
                    print "Changing tag...cannot reduced..."
                    img_name = "File:" + theimage
                    page = site.Pages[img_name]
                    text = page.text()
                    if "{{Non-free manual gif reduce}}" not in text:
                        text = re.sub(r'\{\{[Nn]on.?free-?\s*[Rr]educe.*?\}\}', '{{Non-free manual gif reduce}}', text)
                        text = re.sub(r'\{\{[Rr]educe.*?\}\}', '{{Non-free manual gif reduce}}', text)
                        text = re.sub(r'\{\{[Cc]omic-ovrsize-img.*?\}\}', '{{Non-free manual gif reduce}}', text)			
                        text = re.sub(r'\{\{[Ff]air.?[Uu]se.?[Rr]educe.*?\}\}', '{{Non-free manual gif reduce}}', text)			
                        text = re.sub(r'\{\{[Ii]mage-toobig.*?\}\}', '{{Non-free manual gif reduce}}', text)			
                        text = re.sub(r'\{\{[Nn]fr.*?\}\}', '{{Non-free manual gif reduce}}', text)			
                        text = re.sub(r'\{\{[Ss]maller image.*?\}\}', '{{Non-free manual gif reduce}}', text)			
                        page.save(text, bot=True, summary = "(Task 2) Tagging with [[Template:Non-free manual gif reduce]] - bot cannot reduce")				
					
                elif file not in ("ERROR", "PIXEL", "SKIP", "MANUAL"):					
                    try:
                        Fcomment="Reduce size of non-free image to NFCC guideline"
                        site.upload(open(file,"rb"), filename=theimage, ignore=True, description=Fcomment, comment=Fcomment)
                        gifdone += 1
                        print "Uploaded!"
                        filelist = [ f for f in os.listdir(".") if f.startswith(filename) ]
                        for fa in filelist: os.remove(fa)
                        img_name = "File:" + theimage
						
                        page = site.Pages[img_name]
                        text = page.text()
                        text = re.sub(r'\{\{[Nn]on.?free-?\s*[Rr]educe.*?\}\}', '{{subst:orfurrev}}', text)
                        text = re.sub(r'\{\{[Rr]educe.*?\}\}', '{{subst:orfurrev}}', text)
                        text = re.sub(r'\{\{[Cc]omic-ovrsize-img.*?\}\}', '{{subst:orfurrev}}', text)			
                        text = re.sub(r'\{\{[Ff]air.?[Uu]se.?[Rr]educe.*?\}\}', '{{subst:orfurrev}}', text)			
                        text = re.sub(r'\{\{[Ii]mage-toobig.*?\}\}', '{{subst:orfurrev}}', text)			
                        text = re.sub(r'\{\{[Nn]fr.*?\}\}', '{{subst:orfurrev}}', text)			
                        text = re.sub(r'\{\{[Ss]maller image.*?\}\}', '{{subst:orfurrev}}', text)									
                        page.save(text, bot=True, summary = "(Task 2) Tagging with [[Template:orfurrev]]")
						
                        print "Tagged!", gifdone
                        #if gifdone==200: # Batches of 200 are nice number to check up
                            #break
                    except:
                        print "Unknown error. Image skipped."
                        filelist = [ f for f in os.listdir(".") if f.startswith(filename) ]
                        for fa in filelist: os.remove(fa)
						
                else:
                    print "Image skipped."
                    filelist = [ f for f in os.listdir(".") if f.startswith(filename) ]
                    for fa in filelist: os.remove(fa)
            else:
                print "Gah, looks like someone removed the tag."

        else:
            print "Ah, darn - looks like the bot was disabled."
    return

def main():
    """This defines and fills a global
    variable for the site, and then calls
    get_images() to assemble an initial
    selection of images to work with. Then
    it runs image_rountine() on this selection.
    """
    global site
    site = mwclient.Site('en.wikipedia.org')
    site.login(userpassbot.username, userpassbot.password)

    zam = mwclient.listing.Category(site, "Category:Wikipedia non-free file size reduction requests")
    glob = zam.members()
    flub = []
    for image in glob:
        zip = image.page_title
        pnt(zip)
        #rough and ready sort out of gif images - will save a lot of processing time - might let some worng ones through, not an issue.
        if "gif" in zip.lower():
            flub.append(zip)
    image_routine(flub)
    print "We're DONE!"
 
if __name__ == '__main__':
    main()