User:AkkakkBot/code/08-remove-redundant-aliases
< User:AkkakkBot | code
python code:
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")
editmax = 1
log = ""
exit_reason = "end of data"
summary_max_len = "100"
#get items from file
print("get items")
sys.stdout.flush()
items = []
cur = db.cursor()
cur.execute('select l.term_entity_id from wb_terms as l join wb_terms as a on l.term_entity_id = a.term_entity_id and l.term_entity_type = "item" and a.term_entity_type = "item" and l.term_language = a.term_language and l.term_type = "label" and a.term_type = "alias" and l.term_text = a.term_text limit 10;')
lines = cur.fetchall()
for line in lines:
for cell in line:
items.append("Q{}".format(cell))
#iterate items
print("iterate items")
editcnt = 0
try:
for i, q in enumerate(items):
if(os.path.isfile("bot-08-remove-redundant-aliases.stop")):
print("stop file")
exit_reason = "stop file"
break
print("checking {}:".format(q)),
sys.stdout.flush()
item = pywikibot.ItemPage(repo, q)
try:
content = item.get()
labels = content['labels']
aliases = content['aliases']
changed = False
summary_text = ""
for lang in labels:
label = labels[lang]
if lang in aliases:
aliases_s = aliases[lang]
for alias in aliases_s:
if(alias == label):
print("-"+lang+":conflict"),
changed = True
aliases_s.remove(label)
aliases[lang] = aliases_s
summary_text += " - remove redundant "+lang+" alias ("+label+")"
log += u"[["+q+"]]: remove "+lang+" ("+label+")\n"
print("- remove "+lang)
if(changed):
editcnt += 1
print("- edit {}...".format(editcnt))
log += "\n"
summary_text += " (task 8)"
if(len(summary_text) > summary_max_len):
summary_text = "remove redundant aliases (task 8)"
item.editAliases(summary=summary_text, aliases=aliases)
if(editcnt >= editmax):
print("maximum number of edits reached")
exit_reason = "maximum number of edits reached"
break
else:
print("")
except pywikibot.exceptions.NoPage:
print(" item does not exist")
except pywikibot.data.api.APIError:
print(" api error. trying to continue.")
except UnicodeEncodeError:
print(" UnicodeEncodeError.why?")
except Exception as exc:
print("exception")
traceback.print_exc()
exit_reason = "exception"
if log != "":
log += "exit reason:"+exit_reason
pageobj = pywikibot.Page(site, u"User:AkkakkBot/log")
pageobj.put(log, u"log for task 8: remove redundant aliases", minorEdit = False)
print("end of script")