Open main menu

Wikidata:Database reports/without claims by site/Configuration

< Wikidata:Database reports‎ | without claims by site
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0

import MySQLdb
import pywikibot
import time

site = pywikibot.Site('wikidata', 'wikidata')
repo = site.data_repository()

header = 'A list of items with a sitelink to {0} but without any statements. Data as of <onlyinclude>{1}</onlyinclude>.\n\n'
table_row = '* [[Q{0}]] - [[:{1}:{2}]]\n'
query1 = 'SELECT epp_entity_id, ips_site_page FROM wb_items_per_site JOIN wb_entity_per_page ON ips_item_id=epp_entity_id AND epp_entity_type="item" AND epp_redirect_target IS NULL JOIN page_props ON epp_page_id=pp_page AND pp_propname="wb-claims" AND pp_value=0 WHERE ips_site_id="{0}" ORDER BY epp_entity_id DESC LIMIT 1000;'

table_row_overview = '{{{{TR otherreport|{0}|{1}|{2}|{3}|{4}|{5}}}}}\n'
header_overview = '{{{{Wikidata:Database reports/without claims by site/header|{0}}}}}\n'
footer_overview = '{{Wikidata:Database reports/without claims by site/footer}} __NOINDEX__'
query2 = 'SELECT ips_site_id AS sit, COUNT(*) AS ct FROM wb_items_per_site GROUP BY ips_site_id'
query3 = 'SELECT ips_site_id AS sit, COUNT(*) AS ct FROM page_props, wb_items_per_site, wb_entity_per_page WHERE pp_sortkey = 0 AND pp_propname="wb-claims" AND pp_page = epp_page_id AND ips_item_id = epp_entity_id AND epp_entity_type = "item" GROUP BY ips_site_id ORDER BY ct DESC'


def wikisite(sit):
    if sit in ('commonswiki', 'specieswiki', 'metawiki'):
        group = 'wikimedia'
        sdomain = sit.split('wiki')[0]
    elif sit in ('wikidatawiki', 'mediawikiwiki'):
        sdomain = 'www'
        group = sit.split('wiki')[0]
    else:
        sdomain, group = sit.split('wik')
        sdomain = sdomain.replace('_', '-')
        if sdomain == 'nb':
            sdomain = 'no'
        group = 'wik'+group
        if group == 'wiki':
            group = 'wikipedia'
    return sdomain, group


def makeReport(db, s):
    cursor = db.cursor()
    cursor.execute(query1.format(s+'wiki'))
    text = ''
    for qid, article in cursor:
        text += table_row.format(qid, s, article)
    return text


def makeOverview():
    db = MySQLdb.connect(host='wikidatawiki.labsdb', db='wikidatawiki_p', read_default_file='~/replica.my.cnf')

    cursor2 = db.cursor()
    cursor2.execute(query2)
    allsit = {}
    for sit, cnt in cursor2:
        allsit[sit] = cnt

    reportlength = 0
    cursor = db.cursor()
    cursor.execute(query3)
    text = ''
    for sit, cnt in cursor:
        reportlength += 1
        if reportlength == 1:
            maxvalue = cnt
        lang, group = wikisite(sit)
        allsite = allsit.get(sit, 0)
        text += table_row_overview.format(sit, lang, group, cnt, reportlength, allsite)
    return text, reportlength, maxvalue


def main():
    for s in ['de', 'en', 'fr', 'ja', 'nl', 'pt', 'ru', 'sv']:
        page = pywikibot.Page(site, 'Wikidata:Database reports/without claims by site/'+s+'wiki')
        db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")
        report = makeReport(db, s)
        text = header.format(s, time.strftime("%Y-%m-%d %H:%M (%Z)")) + report
        summary = 'Bot: Updating database report'
        page.put(text.decode('UTF-8'), comment=summary, minorEdit=False)

    page = pywikibot.Page(site, 'Wikidata:Database reports/without claims by site')
    report, reportlength, maxvalue = makeOverview()
    stat = '{{{{DR otherreport|max={}|reportlength={}}}}}\n'.format(maxvalue, reportlength)
    text = stat + header_overview.format(time.strftime("%Y-%m-%d %H:%M (%Z)")) + report + footer_overview
    summary = 'Bot: Updating database report: reportlength: {}; max: {}'.format(reportlength, maxvalue)
    page.put(text.decode('UTF-8'), comment=summary, minorEdit=False)


if __name__ == "__main__":
    main()