Wikipedia:Database reports/Tagged redirect misspellings/Configuration

taggedredirectmisspellings.py edit

#! /usr/bin/env python
# Public domain; MZMcBride; 2018

import pymysql
import wikitools

import settings

report_title = settings.rootpage + 'Tagged redirect misspellings'

report_template = u'''\
Tagged redirect misspellings, limited to more than five redirect misspellings;
data as of <onlyinclude>~~~~~</onlyinclude>.

{| class="wikitable sortable" style="width:100%%; margin:auto;"
|- style="white-space:nowrap;"
! No.
! Count
! Target
! Redirects
|-
%s
|}
'''

wiki = wikitools.Wiki(settings.apiurl)
wiki.login(settings.username, settings.password)

conn = pymysql.connect(
    host=settings.host,
    db=settings.dbname,
    read_default_file='~/.my.cnf',
    charset='utf8mb4',
)
cursor = conn.cursor()
cursor.execute('SET group_concat_max_len = 100000000000;')
cursor.execute('''
/* taggedredirectmisspellings.py */
SELECT
  COUNT(rd_from),
  rd_title,
  GROUP_CONCAT(page_title SEPARATOR '|')
FROM page
JOIN redirect
ON rd_from = page_id
JOIN categorylinks
ON cl_from = page_id
WHERE cl_to = 'Redirects_from_misspellings'
AND page_namespace = 0
AND rd_namespace = 0
GROUP BY rd_namespace, rd_title
HAVING COUNT(rd_from) > 4
ORDER BY COUNT(rd_from) DESC;
''')

i = 1
output = []
for row in cursor.fetchall():
    count = '{0:,}'.format(int(row[0]))
    target_title = row[1].decode('utf-8').replace('_', ' ')
    target_title = u'[[%s]]' % (target_title)
    redirects = []
    for r in row[2].decode('utf-8').replace('_', ' ').split('|'):
        redirects.append('* [[:%s]]' % r)
    table_row = u'''\
| %d
| %s
| %s
| %s
|-''' % (i, count, target_title, '\n' + '\n'.join(redirects))
    output.append(table_row)
    i += 1

report = wikitools.Page(wiki, report_title)
report_text = report_template % ('\n'.join(output))
report_text = report_text.encode('utf-8')
report.edit(report_text, summary=settings.editsumm, bot=1)

cursor.close()
conn.close()