import pywikibot
from pywikibot import pagegenerators
import re
enwiki = pywikibot.Site('en', 'wikipedia')
enwiki_repo = enwiki.data_repository()
def addQid(page, itempage): #function to add the ID to wikidata
# I found one of the three types of regex expressions would be needed to extract the ID
regex_exp = re.compile(r'UEFA\splayer\|(\d+)') #regex expression to find UEFA player id
found = regex_exp.findall(page.text)
if not found:
regex_exp = re.compile(r'UEFA\|(\d+)') #regex expression to find UEFA player id
found = regex_exp.findall(page.text)
if not found:
regex_exp = re.compile(r'UEFA\splayer\|id\=(\d+)') #regex expression to find UEFA player id
found = regex_exp.findall(page.text)
claim = pywikibot.Claim(enwiki_repo, u'P2276') #adding the ID to wikidata
claim.setTarget(found[0])
print(claim)
text = input("Save? ")
if text == 'y' or text == 'Y':
itempage.addClaim(claim, summary=u'Adding UEFA player ID')
def checkqids(page, itempage): #Check if the wikidata proerty already exusts
qid = 'P2276'
item_dict = item.get()
try:
found = item_dict['claims'][qid]
except:
addQid(page, itempage)
targetcats = "Category:UEFA_player_ID_not_in_Wikidata"
cat = pywikibot.Category(enwiki, targetcats)
pages = pagegenerators.CategorizedPageGenerator(cat, recurse=False);
for page in pages: #Loop through the pages listed in the category
item = pywikibot.ItemPage.fromPage(page)
print(page)
checkqids(page, item)