User:AkkakkBot/code/05-links-to-labels
< User:AkkakkBot | code
python code:
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")
editmax = 3000
log = ""
exit_reason = "end of data"
summary_max_len = "100"
language_map = {}
cur = db.cursor()
cur.execute('select * from s51880_akkakkbot.languages;')
lines = cur.fetchall()
for line in lines:
language_map[line[0]] = line[1]
#get items
print("get items")
sys.stdout.flush()
items = []
cur = db.cursor()
#cur.execute('select ips_item_id from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and ips_site_page not like "%, %" and ips_site_page not like "% (%)" and not (lang_db = "alswiki" or lang_db = "crhwiki" or lang_db = "nowiki" or lang_db = "bat_smgwiki" or lang_db = "be_x_oldwiki" or lang_db = "fiu_vrowiki" or lang_db = "roa_rupwiki" or lang_db = "zh_classicalwiki" or lang_db = "zh_min_nanwiki" or lang_db = "zh_yuewiki") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 50000;')
cur.execute('select ips_item_id from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and ips_site_page not like "%, %" and ips_site_page not like "% (%)" and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 50000;')
lines = cur.fetchall()
for line in lines:
for cell in line:
if(not cell in items):
items.append("Q{}".format(cell))
#iterate items
print("iterate items")
editcnt = 0
try:
for i, q in enumerate(items):
sys.stdout.flush()
print("checking {}:".format(q)),
if(os.path.isfile("bot-05-links-to-labels.stop")):
print("stop file")
exit_reason = "stop file"
break
item = pywikibot.ItemPage(repo, q)
try:
content = item.get()
links = content['sitelinks']
labels = content['labels']
descriptions = content['descriptions']
changed = False
summary_text = ""
for langdb in links:
langwd = langdbtowd(language_map, langdb)
if langwd != "" and not langwd in labels:
link = links[langdb]
description = ""
if langwd in descriptions:
description = descriptions[langwd]
if (re.match("(.+ \(.+\)|.+, .+)", link)):
print("- skip "+langwd+":contains brackets"),
#elif (description != "" and exists_item(db=db, language=langwd, label=link, description=description)):
# print("- skip"+langwd+":there is an item with the same label+description"),
else:
print(u"- set "+langwd+u" label"),
labels[langwd] = link
if not (changed):
log_line = "* [["+q+"]]:"
changed = True
log_line += u" "+langwd+u": "+link
summary_text += u" - set "+langwd+u" label to "+link
if(changed):
editcnt += 1
print("- edit {}...".format(editcnt))
summary_text += " (task 5)"
if(len(summary_text) > summary_max_len):
summary_text = u"set labels to links (task 5)"
try:
item.editLabels(summary=summary_text, labels=labels)
log_line += "\n"
log += log_line
except TypeError:
print(" Type Error")
editcnt -= 1
if(editcnt >= editmax):
print("maximum number of edits reached")
exit_reason = "maximum number of edits reached"
break
except pywikibot.exceptions.NoPage:
print(" item does not exist")
except pywikibot.data.api.APIError:
print(" api error. trying to continue.")
except UnicodeEncodeError:
print(" UnicodeEncodeError.why?")
except Exception as exc:
print("exception")
traceback.print_exc()
exit_reason = "exception"
if log != "":
log += "exit reason:"+exit_reason
pageobj = pywikibot.Page(site, u"User:AkkakkBot/log")
pageobj.put(log, u"log for task 5: set unset labels to links", minorEdit = False)
print("end of script")
return exit_reason
'''
+------------------+----------+
| lang_db | lang_wd |
+------------------+----------+
| alswiki | gsw | *
| bat_smgwiki | bat-smg |
| be_x_oldwiki | be-x-old |
| cbk_zamwiki | cbk-zam |
| crhwiki | crh-latn | *
| fiu_vrowiki | fiu-vro |
| map_bmswiki | map-bms |
| nds_nlwiki | nds-nl |
| nowiki | nb | *
| roa_rupwiki | roa-rup |
| roa_tarawiki | roa-tara |
| simplewiki | en |
| zh_classicalwiki | lzh | *
| zh_min_nanwiki | nan | *
| zh_yuewiki | yue | *
+------------------+----------+
'''
def langdbtowd(language_map, langdb):
#langignore = ["alswiki", "crhwiki", "nowiki", "simplewiki", "bat_smgwiki", "be_x_oldwiki", "fiu_vrowiki", "roa_rupwiki", "zh_classicalwiki", "zh_min_nanwiki, zh_yuewiki"]
langignore = []
if langdb in langignore:
langwd = ""
elif langdb in language_map:
langwd = language_map[langdb]
else:
langwd = ""
return langwd