Wikidata:Database reports/Deleted Wikidata entities that are still linked/Configuration

The most recent version of this script can be found at Github: linked_deleted_entities. The script is run on Toolforge in the deltabot tool account; Toolforge users usually have read-access to all scripts in that tool account.

A potentially outdated version is stored onwiki at User:DeltaBot/source/linked_deleted_entities in order to be permanently available and conveniently accessible; it is being displayed on this page with code formatting. Mind that the onwiki source code might have been slightly altered in order to prevent onlyinclude directives from being effective, and that the wikitext parser im some situations messes up the formatted output of the transcluding page content including the source code.

#!/usr/bin/python
# -*- coding: UTF-8 -*-
#licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0

import mariadb
import pywikibot
import time
import requests
from os.path import expanduser


SITE = pywikibot.Site('wikidata','wikidata')

HEADER = """Update: <only""" + """include>{update_timestamp}</onlyinclude>
{{| class="wikitable sortable plainlinks" style="width:100%; margin:auto;"
|-
! Entity !! count !! deleted by
"""

FOOTER = """|}

[[Category:Database reports|Deleted Wikidata entities that are still linked]]"""

TABLE_ROW = """|-
| {{{title}} || [{{{{fullurl:Special:WhatLinksHere/{title}}}}} {cnt}] || [[User:{user}]]
"""


def make_report() -> str:
    db = mariadb.connect(
        host='wikidatawiki.analytics.db.svc.wikimedia.cloud',
        database='wikidatawiki_p',
        default_file=f'{expanduser("~")}/replica.my.cnf',
    )
    cur = db.cursor(dictionary=True)

    query = """SELECT
  CONVERT(pl_title USING utf8) AS pl_title,
  COUNT(*) AS cnt
FROM
  pagelinks
    LEFT JOIN page ON pl_title=page_title AND pl_namespace=page_namespace
WHERE
  (pl_from_namespace=0 OR pl_from_namespace=120)
  AND (pl_namespace=0 OR pl_namespace=120)
  AND page_id IS NULL
GROUP BY
  pl_title
ORDER BY
  COUNT(*) DESC, pl_namespace, pl_title"""

    cur.execute(query)

    text = ''
    for row in cur.fetchall():
        title = row.get('pl_title')
        if title is None:
            continue

        response = requests.get(
            url='https://www.wikidata.org/w/api.php',
            params={
                'action' : 'query',
                'list' : 'logevents',
                'leprop' : 'user|type',
                'letitle' : title,
                'format' : 'json',
            }
        )
        data = response.json()
        user = 'unknown'

        for logevent in data.get('query', {}).get('logevents', []):
            if logevent['type']=='delete' and logevent['action']=='delete':
                user = logevent['user']
                break

        text += TABLE_ROW.format(pql=title[0], title=title, cnt=f'{row.get("cnt", 0):,}', user=user)

    cur.close()
    db.close()

    return text


def main() -> None:
    report = make_report()
    text = HEADER.format(update_timestamp=time.strftime('%Y-%m-%d %H:%M (%Z)')) + report + FOOTER

    page = pywikibot.Page(SITE, 'Wikidata:Database reports/Deleted Wikidata entities that are still linked')
    page.text = text
    page.save(summary='Bot:Updating database report', minor=False)


if __name__=='__main__':
    main()