User:AkkakkBot/code/08-remove-redundant-aliases

python code:

        site = pywikibot.Site("wikidata", "wikidata")
        repo = site.data_repository()
        db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")

        editmax = 1
        log = ""
        exit_reason = "end of data"
        summary_max_len = "100"

        #get items from file
        print("get items")
        sys.stdout.flush()
        items = []
        cur = db.cursor()
        cur.execute('select l.term_entity_id from wb_terms as l join wb_terms as a on l.term_entity_id = a.term_entity_id and l.term_entity_type = "item" and a.term_entity_type = "item" and l.term_language = a.term_language and l.term_type = "label" and a.term_type = "alias" and l.term_text = a.term_text limit 10;')
        lines = cur.fetchall()
        for line in lines:
            for cell in line:
                items.append("Q{}".format(cell))

        #iterate items
        print("iterate items")
        editcnt = 0
        try:
            for i, q in enumerate(items):
                if(os.path.isfile("bot-08-remove-redundant-aliases.stop")):
                    print("stop file")
                    exit_reason = "stop file"
                    break

                print("checking {}:".format(q)),
                sys.stdout.flush()
                item = pywikibot.ItemPage(repo, q)
                try:
                    content = item.get()
                    labels = content['labels']
                    aliases = content['aliases']
                    changed = False
                    summary_text = ""
                    for lang in labels:
                        label = labels[lang]
                        if lang in aliases:
                            aliases_s = aliases[lang]
                            for alias in aliases_s:
                                if(alias == label):
                                    print("-"+lang+":conflict"),
                                    changed = True
                                    aliases_s.remove(label)
                                    aliases[lang] = aliases_s
                                    summary_text += " - remove redundant "+lang+" alias ("+label+")"
                                    log += u"[["+q+"]]: remove "+lang+" ("+label+")\n"
                                    print("- remove "+lang)
                    if(changed):
                        editcnt += 1
                        print("- edit {}...".format(editcnt))
                        log += "\n"
                        summary_text += " (task 8)"
                        if(len(summary_text) > summary_max_len):
                            summary_text = "remove redundant aliases (task 8)"
                        item.editAliases(summary=summary_text, aliases=aliases)
                        if(editcnt >= editmax):
                            print("maximum number of edits reached")
                            exit_reason = "maximum number of edits reached"
                            break 
                    else:
                        print("")
                except pywikibot.exceptions.NoPage:
                    print("  item does not exist")
                except pywikibot.data.api.APIError:
                    print("  api error. trying to continue.")
                except UnicodeEncodeError:
                    print("  UnicodeEncodeError.why?")
        except Exception as exc:
            print("exception")
            traceback.print_exc()
            exit_reason = "exception"
                     
        if log != "":
            log += "exit reason:"+exit_reason
            pageobj = pywikibot.Page(site, u"User:AkkakkBot/log")
            pageobj.put(log, u"log for task 8: remove redundant aliases", minorEdit = False)
        print("end of script")