User:EpidòseosBot/GND P21.py

#Thanks to users Horcrux (cf. [[Special:PermanentLink/2170822656]]) and Matěj Suchánek (cf. [[Topic:Y5t8uibmrxfns1bx]]) for the great help with the code

import urllib
import xml.etree.ElementTree as ET
from collections import OrderedDict
from datetime import date

import marcalyx
import pywikibot
from pywikibot import pagegenerators

def attachSources(claim, sourcestoadd):
	sources = OrderedDict()
	for source in sourcestoadd:
		prop = source.getID()
		if prop not in sources:
			sources[prop] = []
		sources[prop].append(source)
	claim.sources.append(sources)

def detachSources(claim, sourcestorem):
	claim.sources = [s for s in claim.sources if s not in sourcestorem]

def saveClaim(claim):
	if not claim.on_item:
		claim.on_item = item
	repo.save_claim(claim)

def wd_sparql_query(spq):
    generator=pagegenerators.WikidataSPARQLPageGenerator(spq, site=repo)
    for wd in generator:
        try:
            wd.get(get_redirect=True)
            yield wd
        except Exception:
            pass

def get_gnd_xml(gndID):
    url = 'http://d-nb.info/gnd/%s/about/marcxml' % gndID
    response = urllib.request.urlopen(url)
    tree = ET.parse(response)
    root = tree.getroot()
    marc = marcalyx.Record(root)
    try:
        marcvalue = marc["375"][0]['a']
        fixedmarcvalue = ",".join(str(element) for element in marcvalue)
    except IndexError:
        fixedmarcvalue = '$a0'
    return fixedmarcvalue

def sourcetoadd(gnditem, gndID, timestamp):
    statedin = pywikibot.Claim(repo, 'P248')
    statedin.isReference = True
    statedin.setTarget(gnditem)
    idsource = pywikibot.Claim(repo, 'P227')
    idsource.isReference = True
    idsource.setTarget(gndID)
    retrieved = pywikibot.Claim(repo, 'P813')
    retrieved.isReference = True
    retrieved.setTarget(timestamp)
    return [statedin, idsource, retrieved]

repo = pywikibot.Site('wikidata', 'wikidata')
repo.login()

today = date.today()
timestamp = pywikibot.WbTime(year=today.year, month=today.month, day=today.day)
gnditem = pywikibot.ItemPage(repo, "Q36578")

#The first task will be emptying query1
#The second task will be emptying (as much as possible) query2
query1='''SELECT DISTINCT ?item WHERE
    { ?item p:P21 ?st ; wdt:P227 ?gnd . ?st prov:wasDerivedFrom ?ref .
    MINUS { ?item p:P227 ?std . ?std wikibase:rank wikibase:DeprecatedRank } .
    ?ref pr:P248 wd:Q36578 . MINUS { ?ref pr:P227 ?id } . } LIMIT 5000
'''
query2='''SELECT DISTINCT ?item WHERE
    { ?item p:P21 ?st ; wdt:P227 ?gnd . ?st prov:wasDerivedFrom ?ref, ?ref2 .
    MINUS { ?item p:P227 ?std . ?std wikibase:rank wikibase:DeprecatedRank } .
    ?ref pr:P143 ?v . MINUS { ?ref2 pr:P248 ?s } . } LIMIT 5000
'''

for item in wd_sparql_query(query1):
    gndID = item.claims['P227'][0].getTarget()
    iso5218 = get_gnd_xml(gndID)
    print(f'{gndID = }')
    print(f'{iso5218 = }')
    genderitem = item.claims['P21'][0].getTarget()
    try:
        gendervalue = genderitem.getID()
    except AttributeError:
        gendervalue = "Q113124952"
    print(f'{gendervalue = }')
    if (gendervalue == "Q6581097" and iso5218 == "$a1") or (gendervalue == "Q6581072" and iso5218 == "$a2"):
        print("gendervalue and iso5218 coincide: go on")
    else:
        print("invalid or divergent iso5218: restart")
        continue

    for claim in item.claims['P21']:
        removablesources = []
        sources = claim.getSources()
        for source in claim.sources:
            for value in source.values():
                if "P248" in source.keys():
                    if value[-1].getTarget() == gnditem:
                        removablesources.append(source)
                if "P143" in source.keys():
                    removablesources.append(source)
                if "P214" in source.keys():
                    removablesources.append(source)
                if "P887" in source.keys():
                    removablesources.append(source)
            continue
        detachSources(claim, removablesources)
        attachSources(claim, sourcetoadd(gnditem, gndID, timestamp))
        saveClaim(claim)
        print("All saved!")