#!/usr/local/bin/python
# -*- coding: utf-8 -*-
import pywikibot
from pywikibot import pagegenerators
import pywikibot.data.wikidataquery as wdquery
from pywikibot.data import api
import codecs
import sys
import datetime
from datetime import datetime, date, time
CLAIM_is_a='P31'
CLAIM_in_administrative_unit='P131'
CLAIM_official_name='P1448'
CLAIM_country='P17'
items2do = 0
langlist = {'en','sv','nl','de','it','pt','da','fi','fr','ro'}
districtlist = {'en':'district in ','sv':'district i ','nl':'district in ','de':'Distrikt in ','it':'distretto in ','pt':'distrito na ','da':u'distriktet i ','fi':'piirin ','fr':'district en ','ro':u'district în '}
default_query = 'claim[31:18333556] and claim[131]' # Swedish district, with a defined municipality
default_language = u'se'
txt2skip = u'|skip!|'
output2screen = False
debugedo=True
debugedo=False
def log_premature(itemno):
with codecs.open("SE-description.prelog.csv","a", encoding="utf-8") as logfile:
logfile.write('%s\n' % (itemno))
logfile.close
def logme(verbose, formatstring, *parameters):
with codecs.open("SE-description.log.csv", "a", encoding="utf-8") as logfile:
formattedstring = formatstring.encode('utf-8') + '\n'
try:
logfile.write(formattedstring % (parameters) )
except :
exctype, value = sys.exc_info()[:2]
print("1) Error writing to logfile on: [%s] [%s]" % (exctype, value))
verbose = True #now I want to see what!
logfile.close()
if verbose:
print(formatstring % (parameters))
def log_unknown(verbose, formatstring, *parameters):
with codecs.open("SE-description.missing.csv", "a", encoding="utf-8") as logfile:
formattedstring = formatstring.encode('utf-8') + '\n'
try:
logfile.write(formattedstring % (parameters))
except :
print("2) Error writing to logfile on")
verbose = True #now I want to see what!
logfile.close()
if verbose:
print(formatstring % (parameters))
def get_description(language, wikidataitem):
if language in wikidataitem.descriptions:
return wikidataitem.descriptions[language]
else:
return('')
def get_claim_str(repo,lng,wditem,claim):
if claim in wditem.claims:
STRclaim = wditem.claims.get(claim)[0].getTarget()
return(STRclaim['text'], '')
else :
return('',wditem.title()+'/'+claim+'/'+lng)
return('',wditem.title()+'/'+claim+'/'+lng)
def get_claim_title(repo,lng,wditem,claim):
missing = u''
if claim in wditem.claims:
LNKclaim = wditem.claims.get(claim)[0].getTarget()
WDIclaim = pywikibot.ItemPage(repo,LNKclaim.title())
WDIclaim.get(get_redirect=True)
if lng in WDIclaim.labels:
return(WDIclaim.labels[lng],'')
else :
return('',WDIclaim.title()+'->'+lng)
else:
return('',claim+'/'+lng,wditem.title())
return prnInstance, missing
def get_Swedish_town(repo,lng,wditem,claim):
pass
def action_one_item(repo, wditem, lng):
global output2screen
global items2do
my_new_description = u''
items_found=0
missing = u''
"""
my_description=u''
orig_desc = get_description(lng,wditem)
en_description = u''
en_description = get_description('en',wditem)
sv_description = u''
sv_description = get_description('sv',wditem)
nl_description = u''
nl_description = get_description('nl',wditem)
"""
type_of_item=u''
placefound=''
items2do -= 1
if not debugedo: sys.stdout.write("\r%i\t%s" % (items2do, wditem.title()))
if (CLAIM_is_a in wditem.claims):
type_of_item = wditem.claims.get(CLAIM_is_a)[0].getTarget().title() #we only use the first one, and assume the first one is most relevant
if (type_of_item=='Q18333556') : #Q18333556 is a Swedish district
for lng in langlist:
orig_desc = get_description(lng,wditem)
new_town,missingtown = get_claim_title(repo,lng,wditem,CLAIM_in_administrative_unit)
new_country, missingcountry = get_claim_title(repo,lng,wditem,CLAIM_country)
if lng!='sv':
my_new_description = u''+districtlist[lng]+new_town+', '+new_country
else:
my_new_description = u''+districtlist[lng]+new_town #in Swedish, just skip Sweden in the description
"""
new_en_town,missingENt = get_claim_title(repo,'en',wditem,CLAIM_in_administrative_unit)
new_en_country,missingENc = get_claim_title(repo,'en',wditem,CLAIM_country)
new_en_description = 'district in '+new_en_town+', '+new_en_country;
new_sv_town,missingSVt = get_claim_title(repo,'sv',wditem,CLAIM_in_administrative_unit)
new_sv_description = 'distrikt i '+new_sv_town+'';
new_nl_town,missingNLt = get_claim_title(repo,'nl',wditem,CLAIM_in_administrative_unit)
new_nl_country,missingNLc = get_claim_title(repo,'nl',wditem,CLAIM_country)
new_nl_description = 'district in '+new_nl_town+', '+new_nl_country;
logstr1 = 'en|'+en_description+'|'+new_en_description #all in one string gives a weird error ... but this works
logstr2 = 'sv|'+sv_description+'|'+new_sv_description
logstr3 = 'nl|'+nl_description+'|'+new_nl_description
"""
logme(False,'%s|%s|%s|%s',wditem.title(),lng,orig_desc,my_new_description)
elif my_new_description=='': #this should not happen with the active filters
log_unknown(False,'%s|%s|%s',wditem.title(),type_of_item,en_description)
placefound='unknown'
if missingtown !='':
log_unknown(True,'[%s][sv][%s]',wditem.title(),missingtown)
if missingcountry !='':
log_unknown(True,'[%s][en][%s]',wditem.title(),missingcountry)
"""
if (my_new_description!='') and (missing!=txt2skip):
try :
if not (lng in wditem.labels):
if lng+'wiki' in wditem.sitelinks: #check if link still exist
wditem.labels[lng] = wditem.sitelinks[lng+'wiki']
data = {}
data.update( {'labels': { lng: wditem.labels[lng]}, 'descriptions': {lng:my_new_description} } )
#wditem.editEntity(data,summary=u'SE-Sveriges-distrikt.py, python code on https://goo. gl/lA9lZM logfile on https://goo .gl/BezTim')
except ValueError:
logme(False, "ValueError occured on %s",wditem.title())
except :
logme(False, "Undefined error occured on %s",wditem.title())
else :
logme(False, '%s|%s|%s|%s|%s|%s',datetime.now().strftime("%Y-%b-%d/%H:%m:%S"),wditem.title(),lng,orig_desc,my_new_description,placefound)
items_found += 1
"""
return items_found
class WDBot():
"""
A bot to add streets on Wikidata
"""
def __init__(self, generator):
"""
Arguments:
* generator - A generator that yields itempage objects.
"""
self.generator = generator
self.repo = pywikibot.Site().data_repository()
def run(self,lng):
"""
Starts the robot.
"""
site = pywikibot.getSite('sv')
repo = site.data_repository()
items_found=0
for WDIquery in self.generator:
if items_found> 3 :
break
if WDIquery.exists() :
log_premature(WDIquery.title()) #log which item we process ... in case of an error, I know which item it is
WDIquery.get(get_redirect=True)
items_found += action_one_item(repo,WDIquery,lng)
def WikidataQueryItemPageGenerator(query, site=None):
"""Generate pages that result from the given WikidataQuery.
@param query: the WikidataQuery query string.
"""
global items2do
if site is None:
site = pywikibot.Site()
repo = site.data_repository()
wd_queryset = wdquery.QuerySet(query)
wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
data = wd_query.query(wd_queryset)
items2do = data[u'status'][u'items']
pywikibot.output(u'retrieved %d items' % data[u'status'][u'items'])
for item in data[u'items']:
yield pywikibot.ItemPage(repo, u'Q' + unicode(item))
def testrun():
global output2screen
output2screen = True
site = pywikibot.Site()
repo = site.data_repository()
item2get = 'Q20801595'
print("start")
x = pywikibot.ItemPage(repo, item2get)
x.get(get_redirect=True)
action_one_item(repo, x, default_language)
def main():
print "main"
query = default_query #later, I want to manage this with params
lng = default_language
pigenerator = pagegenerators.PreloadingItemGenerator(pagegenerators.WikidataItemGenerator(WikidataQueryItemPageGenerator(query)))
wikidataBot = WDBot(pigenerator)
wikidataBot.run(lng)
print("\n")
try:
if(debugedo):
testrun()
else :
main()
finally:
pywikibot.stopme()