#Thanks to users Horcrux (cf. [[Special:PermanentLink/2170822656]]) and Matěj Suchánek (cf. [[Topic:Y5t8uibmrxfns1bx]]) for the great help with the code
import urllib
import xml.etree.ElementTree as ET
from collections import OrderedDict
from datetime import date
import marcalyx
import pywikibot
from pywikibot import pagegenerators
def attachSources(claim, sourcestoadd):
sources = OrderedDict()
for source in sourcestoadd:
prop = source.getID()
if prop not in sources:
sources[prop] = []
sources[prop].append(source)
claim.sources.append(sources)
def detachSources(claim, sourcestorem):
claim.sources = [s for s in claim.sources if s not in sourcestorem]
def saveClaim(claim):
if not claim.on_item:
claim.on_item = item
repo.save_claim(claim)
def wd_sparql_query(spq):
generator=pagegenerators.WikidataSPARQLPageGenerator(spq, site=repo)
for wd in generator:
try:
wd.get(get_redirect=True)
yield wd
except Exception:
pass
def get_gnd_xml(gndID):
url = 'http://d-nb.info/gnd/%s/about/marcxml' % gndID
response = urllib.request.urlopen(url)
tree = ET.parse(response)
root = tree.getroot()
marc = marcalyx.Record(root)
try:
marcvalue = marc["375"][0]['a']
fixedmarcvalue = ",".join(str(element) for element in marcvalue)
except IndexError:
fixedmarcvalue = '$a0'
return fixedmarcvalue
def sourcetoadd(gnditem, gndID, timestamp):
statedin = pywikibot.Claim(repo, 'P248')
statedin.isReference = True
statedin.setTarget(gnditem)
idsource = pywikibot.Claim(repo, 'P227')
idsource.isReference = True
idsource.setTarget(gndID)
retrieved = pywikibot.Claim(repo, 'P813')
retrieved.isReference = True
retrieved.setTarget(timestamp)
return [statedin, idsource, retrieved]
repo = pywikibot.Site('wikidata', 'wikidata')
repo.login()
today = date.today()
timestamp = pywikibot.WbTime(year=today.year, month=today.month, day=today.day)
gnditem = pywikibot.ItemPage(repo, "Q36578")
#The first task will be emptying query1
#The second task will be emptying (as much as possible) query2
query1='''SELECT DISTINCT ?item WHERE
{ ?item p:P21 ?st ; wdt:P227 ?gnd . ?st prov:wasDerivedFrom ?ref .
MINUS { ?item p:P227 ?std . ?std wikibase:rank wikibase:DeprecatedRank } .
?ref pr:P248 wd:Q36578 . MINUS { ?ref pr:P227 ?id } . } LIMIT 5000
'''
query2='''SELECT DISTINCT ?item WHERE
{ ?item p:P21 ?st ; wdt:P227 ?gnd . ?st prov:wasDerivedFrom ?ref, ?ref2 .
MINUS { ?item p:P227 ?std . ?std wikibase:rank wikibase:DeprecatedRank } .
?ref pr:P143 ?v . MINUS { ?ref2 pr:P248 ?s } . } LIMIT 5000
'''
for item in wd_sparql_query(query1):
gndID = item.claims['P227'][0].getTarget()
iso5218 = get_gnd_xml(gndID)
print(f'{gndID = }')
print(f'{iso5218 = }')
genderitem = item.claims['P21'][0].getTarget()
try:
gendervalue = genderitem.getID()
except AttributeError:
gendervalue = "Q113124952"
print(f'{gendervalue = }')
if (gendervalue == "Q6581097" and iso5218 == "$a1") or (gendervalue == "Q6581072" and iso5218 == "$a2"):
print("gendervalue and iso5218 coincide: go on")
else:
print("invalid or divergent iso5218: restart")
continue
for claim in item.claims['P21']:
removablesources = []
sources = claim.getSources()
for source in claim.sources:
for value in source.values():
if "P248" in source.keys():
if value[-1].getTarget() == gnditem:
removablesources.append(source)
if "P143" in source.keys():
removablesources.append(source)
if "P214" in source.keys():
removablesources.append(source)
if "P887" in source.keys():
removablesources.append(source)
continue
detachSources(claim, removablesources)
attachSources(claim, sourcetoadd(gnditem, gndID, timestamp))
saveClaim(claim)
print("All saved!")