Open main menu

Wikidata β


< User:TweetsFactsAndQueries

Robin van der Vliet informed be via Twitter that the Esperanto (eo) label for many humans has the family name in all caps. This dates back to a now-abandoned Esperanto Wikipedia policy.

I am fixing the simplest of these cases with pywikibot. The gist of the code is:

import pywikibot as pwb
from pywikibot import pagegenerators as pg

site = pwb.Site("wikidata", "wikidata")
repo = site.data_repository()

def humans(limit=30000000):
    batchsize = 1000
    query = """\
  ?item wdt:P31 wd:Q5.
LIMIT batchsize
OFFSET offset"""
    for offset in range(0, limit, batchsize):
        currentQuery = query.replace("batchsize", str(batchsize)).replace("offset", str(offset))
        count = 0
        for item in pg.WikidataSPARQLPageGenerator(currentQuery, site):
            yield item
            count += 1
        if (count != batchsize):

def fix_eo_label(human, actually_edit=False):
    human_ = human.get()
    claims = human_["claims"]
    labels = human_["labels"]
    if "eo" in labels and "P735" in claims and len(claims["P735"]) == 1 and "P734" in claims and len(claims["P734"]) == 1:
        givenLabels = claims["P735"][0].getTarget().get()["labels"]
        familyLabels = claims["P734"][0].getTarget().get()["labels"]
        if "en" in givenLabels and "en" in familyLabels:     # use en because eo labels don’t always exist, and it shouldn’t make a difference
            given = givenLabels["en"]
            family = familyLabels["en"]
            if given + " " + family.upper() == labels["eo"]: # and if it does, this should fail, so no harm done
                newlabel = given + " " + family
                print("{}: {} => {}".format(human.getID(), labels["eo"], newlabel))
                if actually_edit:
                    human.editLabels(labels={"eo": newlabel}, summary="Fix uppercase family name in eo label: see")

for human in humans():
    fix_eo_label(human, actually_edit=True)

I am running this code on PAWS; you can see the notebook here.