Wikidata:WikiProject Association football/Wanted footballers/Configuration

The most recent version of this script can be found at Github: wanted_footballer. The script is run on Toolforge in the deltabot tool account; Toolforge users usually have read-access to all scripts in that tool account.

A potentially outdated version is stored onwiki at User:DeltaBot/source/wanted_footballer in order to be permanently available and conveniently accessible; it is being displayed on this page with code formatting. Mind that the onwiki source code might have been slightly altered in order to prevent onlyinclude directives from being effective, and that the wikitext parser im some situations messes up the formatted output of the transcluding page content including the source code.

#!/usr/bin/python
# -*- coding: UTF-8 -*-
#licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0

from time import strftime
from typing import Generator

import pywikibot as pwb
import requests


SITE = pwb.Site('wikidata','wikidata')

PROJECTS = ['en','sv','nl','de','fr','war','ceb','ru','it','es','vi','pl','ja','pt','zh','uk','ca','fa','no','fi','id','ar','sr','cs','ko','sh','hu','ms','ro','tr']

WDQS_ENDPOINT = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
WDQS_USERAGENT = f'{requests.utils.default_user_agent()} (wanted_footballer.py via User:DeltaBot at Wikidata; mailto:tools.deltabot@toolforge.org)'
WD = 'http://www.wikidata.org/entity/'


def query_wdqs(query:str) -> Generator[dict, None, None]:
    response = requests.post(
        url=WDQS_ENDPOINT,
        data={
            'query' : query,
            'format' : 'json',
        },
        headers={
            'Accept' : 'application/sparql-results+json',
            'User-Agent': WDQS_USERAGENT,
        }
    )

    try:
        data = response.json()
    except requests.exceptions.JSONDecodeError as exception:
        raise RuntimeError('Cannot parse result from SPARQL endpoint') from exception

    for row in data.get('results', {}).get('bindings', []):
        yield row


def make_report(project:str) -> None:
    text = f'Many wikipedia have these articles. Please create these articles in [[:{project}:|{project} wikipedia]]. Update: <only' + f'include>{strftime("%Y-%m-%d %H:%M (%Z)")}</onlyinclude>\n'
    cnt = 0

    query = f"""SELECT ?item ?cnt WHERE {{
    {{
        SELECT ?item (COUNT(*) AS ?cnt) WHERE [[:Template:?item wdt:P106 wd:Q937857; ^schema:about ?article]] GROUP BY ?item HAVING(?cnt>=5) ORDER BY DESC(?cnt)
    }}
    FILTER NOT EXISTS {{ ?item ^schema:about/schema:isPartOf <https://{project}.wikipedia.org/> }}
}} ORDER BY DESC(?cnt) LIMIT 100"""

    try:
        result = query_wdqs(query)
    except RuntimeError:
        print(f'{strftime("%Y-%m-%d, %H:%M:%S")}: omit project "{project}" due to query timeout')
        return

    for row in result:
        qid = row.get('item', {}).get('value', '').replace(WD, '')
        
        if row.get('cnt', {}).get('value', 0) != cnt:
            cnt = row.get('cnt', {}).get('value', 0)
            text += f'\n== {cnt} wikipedia ==\n'
        text += f'*{{qid}}\n'
    text += '\n[[Category:WikiProject Association football/Wanted footballers]]'

    #write to wikidata
    page = pwb.Page(SITE, f'Wikidata:WikiProject Association football/Wanted footballers/{project}')
    page.text = text
    page.save(summary='Bot:Updating database report', minor=False)


def main():
    for project in PROJECTS:
        make_report(project)


if __name__ == '__main__':
    main()