#! /usr/bin/env python
# Public domain; MZMcBride; 2018
import pymysql
import wikitools
import settings
report_title = settings.rootpage + 'Tagged redirect misspellings'
report_template = u'''\
Tagged redirect misspellings, limited to more than five redirect misspellings;
data as of <onlyinclude>~~~~~</onlyinclude>.
{| class="wikitable sortable" style="width:100%%; margin:auto;"
|- style="white-space:nowrap;"
! No.
! Count
! Target
! Redirects
|-
%s
|}
'''
wiki = wikitools.Wiki(settings.apiurl)
wiki.login(settings.username, settings.password)
conn = pymysql.connect(
host=settings.host,
db=settings.dbname,
read_default_file='~/.my.cnf',
charset='utf8mb4',
)
cursor = conn.cursor()
cursor.execute('SET group_concat_max_len = 100000000000;')
cursor.execute('''
/* taggedredirectmisspellings.py */
SELECT
COUNT(rd_from),
rd_title,
GROUP_CONCAT(page_title SEPARATOR '|')
FROM page
JOIN redirect
ON rd_from = page_id
JOIN categorylinks
ON cl_from = page_id
WHERE cl_to = 'Redirects_from_misspellings'
AND page_namespace = 0
AND rd_namespace = 0
GROUP BY rd_namespace, rd_title
HAVING COUNT(rd_from) > 4
ORDER BY COUNT(rd_from) DESC;
''')
i = 1
output = []
for row in cursor.fetchall():
count = '{0:,}'.format(int(row[0]))
target_title = row[1].decode('utf-8').replace('_', ' ')
target_title = u'[[%s]]' % (target_title)
redirects = []
for r in row[2].decode('utf-8').replace('_', ' ').split('|'):
redirects.append('* [[:%s]]' % r)
table_row = u'''\
| %d
| %s
| %s
| %s
|-''' % (i, count, target_title, '\n' + '\n'.join(redirects))
output.append(table_row)
i += 1
report = wikitools.Page(wiki, report_title)
report_text = report_template % ('\n'.join(output))
report_text = report_text.encode('utf-8')
report.edit(report_text, summary=settings.editsumm, bot=1)
cursor.close()
conn.close()