Wikidata:Database reports/Abuse filter effectiveness/Configuration
The most recent version of this script can be found at Github: abuse_filter_effectiveness. The script is run on Toolforge in the deltabot
tool account; Toolforge users usually have read-access to all scripts in that tool account.
A potentially outdated version is stored onwiki at User:DeltaBot/source/abuse_filter_effectiveness in order to be permanently available and conveniently accessible; it is being displayed on this page with code formatting. Mind that the onwiki source code might have been slightly altered in order to prevent onlyinclude
directives from being effective, and that the wikitext parser im some situations messes up the formatted output of the transcluding page content including the source code.
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0
from datetime import datetime
from os.path import expanduser
from time import strftime
import mariadb
import pywikibot as pwb
HEADER = """Update: <only""" + """include>{update_timestamp}</onlyinclude>
{{| class="wikitable sortable plainlinks"
|-
!Id !! Description !! Active Since !! Just Warned !! Edited !! Warning Deterred
"""
FOOTER = """|}
[[Category:Wikidata statistics]]"""
TABLE_ROW = """|-
| [//wikidata.org/wiki/Special:AbuseLog?wpSearchFilter={af_id} {af_id}] || {title} || {start_date} || {warned} || {edited} || {deterred}%
"""
TS_FORMAT_MW = '%Y%m%d%H%M%S'
def make_report() -> str:
query1 = 'SELECT af_id, CONVERT(af_public_comments USING utf8) AS af_public_comments FROM abuse_filter WHERE af_actions="warn,tag"'
query2 = 'SELECT CONVERT(afh_timestamp USING utf8) AS afh_timestamp FROM abuse_filter_history WHERE afh_changed_fields LIKE "%actions%" AND afh_filter=%(afh_filter)s ORDER BY afh_timestamp DESC LIMIT 1'
query3 = 'SELECT COUNT(*) AS cnt FROM abuse_filter_log WHERE afl_filter_id=%(afl_filter)s AND afl_actions=%(afl_actions)s AND afl_timestamp>%(afl_timestamp)s'
db = mariadb.connect(
host='wikidatawiki.analytics.db.svc.wikimedia.cloud',
database='wikidatawiki_p',
default_file=f'{expanduser("~")}/replica.my.cnf',
)
cur = db.cursor(dictionary=True)
cur.execute(query1)
result1 = cur.fetchall()
text = ''
for row in result1:
af_id = row.get('af_id')
comment = row.get('af_public_comments')
if af_id is None or comment is None:
continue
cur.execute(query2, { 'afh_filter' : af_id })
result2 = cur.fetchall()
if len(result2) == 0:
continue
start = result2[0].get('afh_timestamp')
if start is None:
continue
start_date_ts = datetime.strptime(start, TS_FORMAT_MW)
cur.execute(query3, { 'afl_filter' : af_id, 'afl_actions' : 'warn', 'afl_timestamp' : start_date_ts.strftime(TS_FORMAT_MW) })
result3a = cur.fetchall()
warn = result3a[0].get('cnt')
if warn is None:
continue
cur.execute(query3, { 'afl_filter' : af_id, 'afl_actions' : 'tag', 'afl_timestamp' : start_date_ts.strftime(TS_FORMAT_MW) })
result3b = cur.fetchall()
tag = result3b[0].get('cnt')
if tag is None:
continue
if warn > 0:
deterred = f'{(warn-tag)/warn*100:.1f}'
else:
deterred = '0'
text += TABLE_ROW.format(
af_id=af_id,
title=comment,
start_date=start_date_ts.strftime('%Y-%m-%d'),
warned=warn-tag,
edited=tag,
deterred=deterred,
)
cur.close()
db.close()
return text
def main() -> None:
text = HEADER.format(update_timestamp=strftime('%Y-%m-%d %H:%M (%Z)')) + make_report() + FOOTER
page = pwb.Page(pwb.Site('wikidata', 'wikidata'), 'Wikidata:Database reports/Abuse filter effectiveness')
page.text = text
page.save(summary='Bot:Updating database report', minor=False)
if __name__=='__main__':
main()