Open main menu

Wikidata:Database reports/Humans with missing claims/Configuration

< Wikidata:Database reports‎ | Humans with missing claims
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0

import pywikibot
import requests
import json
import re
import time

site = pywikibot.Site('wikidata', 'wikidata')
repo = site.data_repository()

missingProps = ['P21', 'P19', 'P569', 'P734', 'P735']

def createStatistics(counts, countsAll):
    text = '{{Wikidata:Database reports/Humans with missing claims/header|' + time.strftime("%Y-%m-%d %H:%M (%Z)") + '}}\n'
    for p1 in counts:
        text += '{{tr missingclaims|' + p1 + '|'
        entity = pywikibot.PropertyPage(repo, p1)
        #if not entity.exists():
        #    continue
        entity.get()
        if 'en' in entity.labels:
            text += entity.labels['en'] + '|'
        else:
            text += '|'
        if p1 in countsAll:
            text += countsAll[p1] + '|'
        else:
            text += '|'
        for p2 in missingProps:
            if p2 in counts[p1]:
                text += str(counts[p1][p2]) + '|'
        text += '}}\n'
    page = pywikibot.Page(site, 'Wikidata:Database reports/Humans with missing claims/statistics')
    page.put(text, comment='upd', minorEdit=False)


def createSummay(counts):
    text = 'Update: <onlyinclude>'+time.strftime("%Y-%m-%d %H:%M (%Z)") + '</onlyinclude>\n\n{| class="wikitable sortable"\n! Id !! Property '
    for p2 in missingProps:
        text += '!! {{P|' + p2 + '}} '
    for p1 in counts:
        text += '\n|-\n|data-sort-value=' + p1[1:] + '| [[Property:' + p1 + '|' + p1 + ']] || [[/' + p1 + '|{{label|' + p1 + '}}]]'
        for p2 in missingProps:
            if p2 in counts[p1]:
                text += ' || [[/' + p1 + '#' + p2 + '|' + str(counts[p1][p2]) + ']]'
            else:
                text += ' || -'
    text += '\n|}\n\nNew reports can be requested on [[/input]].\n[[Category:Database reports]]'
    page = pywikibot.Page(site, 'Wikidata:Database reports/Humans with missing claims')
    page.put(text.decode('UTF-8'), comment='upd', minorEdit=False)


def createReport(results, counts):
    for p1 in results:
        try:
            text = ''
            total = 0
            for p2 in missingProps:
                if p2 in counts[p1]:
                    total += int(counts[p1][p2])
            for p2 in missingProps:
                if p2 not in counts[p1]:
                    continue
                text += '== <span id="' + p2 + '"></span> Missing {{P|' + p2 + '}} ==\n'
                text += 'count: ' + str(counts[p1][p2]) + '\n\n'
                for q in results[p1][p2]:
                    text += '*{{Q|' + q + '}}\n'                    
                if counts[p1][p2] > 1000:
                    skip = counts[p1][p2]-1000
                    text += str(skip) + ' records skipped\n'
            if text == '':
                continue
            text +='__FORCETOC__'
            page = pywikibot.Page(site, 'Wikidata:Database reports/Humans with missing claims/' + p1)
            page.put(text.decode('UTF-8'), comment='report update for [[Property:' + p1 + ']]', minorEdit=False)
        except:
            pass

    
def createLists(props):
    url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
    sparql = 'SELECT ?item WHERE {{ ?item  wdt:{0}?foo1 . ?item wdt:P31 wd:Q5 . OPTIONAL {{ ?item wdt:{1} ?novalue }} FILTER (!BOUND(?novalue))}} ORDER BY ?item LIMIT 1000'
    sparql_count = 'SELECT (count(DISTINCT ?item) as ?cnt) WHERE {{ ?item  wdt:{0}?foo1 . ?item wdt:P31 wd:Q5 . OPTIONAL {{ ?item wdt:{1} ?novalue }} FILTER (!BOUND(?novalue))}}'
    sparql_countAll = 'SELECT (count(DISTINCT ?item) as ?cnt) WHERE {{ ?item  wdt:{0}?foo1 . ?item wdt:P31 wd:Q5}}'
    results = {}
    counts = {}
    countsAll = {}
    for p1 in props:
        try:
            results[p1] = {}
            counts[p1] = {}
            for p2 in missingProps:
                results[p1][p2] = []
                payload1 = {
                    'query': sparql.format(p1, p2),
                    'format': 'json'
                }
                r1 = requests.get(url, params=payload1)
                data1 = r1.json()
                payload2 = {
                    'query': sparql_count.format(p1, p2),
                    'format': 'json'
                }
                r2 = requests.get(url, params=payload2)
                data2 = r2.json()                
                for m in data1['results']['bindings']:
                    val = m['item']['value'].split('http://www.wikidata.org/entity/')
                    results[p1][p2].append(val[1])
                counts[p1][p2] = int(data2['results']['bindings'][0]['cnt']['value'])
            payload3 = {
                'query': sparql_countAll.format(p1),
                'format': 'json'
            }                
            r3 = requests.get(url, params=payload3)
            data3 = r3.json()
            countsAll[p1] = data3['results']['bindings'][0]['cnt']['value']
        except:
            pass
    createReport(results, counts)
    createSummay(counts)
    #createStatistics(counts, countsAll)
    
def main():    
    page = pywikibot.Page(site, 'Wikidata:Database reports/Humans with missing claims/input')
    input = page.get()
    lists = input.split('\n')
    props = []
    for m in lists:
        if m[0] != '#':
            if re.match('^P\d+$', m) != None:
                props.append(m)
    createLists(props)

if __name__ == "__main__":
    main()