Wikidata:Database reports/removed sitelinks/Configuration
The most recent version of this script can be found at Github: removed_sitelinks. The script is run on Toolforge in the deltabot
tool account; Toolforge users usually have read-access to all scripts in that tool account.
A potentially outdated version is stored onwiki at User:DeltaBot/source/removed_sitelinks in order to be permanently available and conveniently accessible; it is being displayed on this page with code formatting. Mind that the onwiki source code might have been slightly altered in order to prevent onlyinclude
directives from being effective, and that the wikitext parser im some situations messes up the formatted output of the transcluding page content including the source code.
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0
from datetime import datetime
from os.path import expanduser
import re
from time import strftime
import mariadb
import pywikibot as pwb
DBNAMES = [
'dewiki',
'fawiki',
'frwiki',
'nlwiki',
'ptwiki',
'ruwiki',
]
HEADER = """A list of {dbname} pages which were disconnected from Wikidata. Data as of <only""" + """include>{update_timestamp}</onlyinclude>.
{{| class="wikitable sortable" style="width:100%; margin:auto;"
|-
! Page !! Item !! Comment !! User !! Time
"""
TABLE_ROW = """|-
| [[:{iw_prefix}:{page_title}]] || [[{qid}]] || {comment} || [[User:{user_name}|{user_name}]] || {timestamp}
"""
FOOTER = """|}
[[Category:Database reports|removed sitelinks]]"""
def make_report(dbname:str) -> str:
db = mariadb.connect(
host=f'{dbname}.analytics.db.svc.wikimedia.cloud',
database=f'{dbname}_p',
default_file=f'{expanduser("~")}/replica.my.cnf',
)
cur = db.cursor(dictionary=True)
query = """SELECT
CONVERT(page_title USING utf8) AS page_title,
CONVERT(comment_text USING utf8) AS comment_text,
CONVERT(actor_name USING utf8) AS actor_name,
CONVERT(rc_timestamp USING utf8) AS rc_timestamp,
CONVERT(rc_params USING utf8) AS rc_params
FROM
page
JOIN recentchanges ON rc_cur_id=page_id
JOIN comment_recentchanges ON rc_comment_id=comment_id
JOIN actor_recentchanges ON rc_actor=actor_id
LEFT JOIN page_props ON page_id=pp_page AND pp_propname='wikibase_item'
WHERE
rc_type=5
AND rc_source='wb'
AND page_namespace=0
AND page_is_redirect=0
AND pp_page IS NULL
ORDER BY
page_title ASC"""
cur.execute(query)
result = cur.fetchall()
cur.close()
db.close()
text = ''
for row in result:
page_title = row.get('page_title')
rc_comment = row.get('comment_text')
rc_user_text = row.get('actor_name')
rc_timestamp = row.get('rc_timestamp')
rc_params = row.get('rc_params')
if page_title is None or rc_comment is None or rc_user_text is None or rc_timestamp is None or rc_params is None:
continue
if dbname not in rc_params:
continue
timestamp = datetime.strptime(rc_timestamp, '%Y%m%d%H%M%S')
res = re.search('"(Q\d+)"', rc_params)
if res:
qid = res.group(1)
else:
qid = ''
text += TABLE_ROW.format(
iw_prefix=dbname[:-4],
page_title=page_title.replace('_', ' '),
qid=qid,
comment=rc_comment,
user_name=rc_user_text.replace('wikidata>', ''),
timestamp=timestamp.strftime('%Y-%m-%d %H:%M'),
)
return text
def main() -> None:
for dbname in DBNAMES:
text = HEADER.format(dbname=dbname, update_timestamp=strftime('%Y-%m-%d %H:%M (%Z)')) + make_report(dbname) + FOOTER
page = pwb.Page(pwb.Site('wikidata', 'wikidata'), f'Wikidata:Database reports/removed sitelinks/{dbname}')
page.text = text
page.save(summary='Bot:Updating database report', minor=False)
if __name__=='__main__':
main()