User:Edoderoobot/SE-Sveriges-district

#!/usr/local/bin/python
# -*- coding: utf-8 -*-

import pywikibot
from pywikibot import pagegenerators
import pywikibot.data.wikidataquery as wdquery
from pywikibot.data import api
import codecs
import sys
import datetime
from datetime import datetime, date, time

CLAIM_is_a='P31'
CLAIM_in_administrative_unit='P131'
CLAIM_official_name='P1448'
CLAIM_country='P17'
items2do = 0
langlist = {'en','sv','nl','de','it','pt','da','fi','fr','ro'}
districtlist = {'en':'district in ','sv':'district i ','nl':'district in ','de':'Distrikt in ','it':'distretto in ','pt':'distrito na ','da':u'distriktet i ','fi':'piirin ','fr':'district en ','ro':u'district în '}

default_query = 'claim[31:18333556] and claim[131]'  # Swedish district, with a defined municipality
default_language = u'se'
txt2skip = u'|skip!|'
output2screen = False
debugedo=True
debugedo=False

def log_premature(itemno):
  with codecs.open("SE-description.prelog.csv","a", encoding="utf-8") as logfile:
    logfile.write('%s\n' % (itemno))
  logfile.close
 
 
def logme(verbose, formatstring, *parameters):
  with codecs.open("SE-description.log.csv", "a", encoding="utf-8") as logfile:
    formattedstring = formatstring.encode('utf-8') + '\n'
  
    try:  
      logfile.write(formattedstring % (parameters) )
    except :
      exctype, value = sys.exc_info()[:2]
      print("1) Error writing to logfile on: [%s] [%s]" % (exctype, value))
      verbose = True    #now I want to see what!  
    logfile.close()
  if verbose:
    print(formatstring % (parameters)) 

def log_unknown(verbose, formatstring, *parameters):   
  with codecs.open("SE-description.missing.csv", "a", encoding="utf-8") as logfile:
    formattedstring = formatstring.encode('utf-8') + '\n'
  
    try:  
      logfile.write(formattedstring % (parameters))
    except :
      print("2) Error writing to logfile on")  
      verbose = True    #now I want to see what!
  logfile.close()
  if verbose:
    print(formatstring % (parameters)) 


def get_description(language, wikidataitem):
  if language in wikidataitem.descriptions:
    return wikidataitem.descriptions[language]
  else:
    return('') 

def get_claim_str(repo,lng,wditem,claim):
    if claim in wditem.claims:
      STRclaim = wditem.claims.get(claim)[0].getTarget()
      return(STRclaim['text'], '')
    else :
      return('',wditem.title()+'/'+claim+'/'+lng)
    return('',wditem.title()+'/'+claim+'/'+lng)
   
def get_claim_title(repo,lng,wditem,claim):
    missing = u''
    if claim in wditem.claims:
       LNKclaim = wditem.claims.get(claim)[0].getTarget()
       WDIclaim = pywikibot.ItemPage(repo,LNKclaim.title())
       WDIclaim.get(get_redirect=True)
       if lng in WDIclaim.labels:
         return(WDIclaim.labels[lng],'')
       else :
         return('',WDIclaim.title()+'->'+lng)
    else:
      return('',claim+'/'+lng,wditem.title())
    return prnInstance, missing
 
def get_Swedish_town(repo,lng,wditem,claim):
   pass
 
def action_one_item(repo, wditem, lng):
    global output2screen
    global items2do
   
    my_new_description = u'' 
   
    items_found=0
    missing = u''
   
    """
    my_description=u''
    orig_desc = get_description(lng,wditem)
    en_description = u''
    en_description = get_description('en',wditem)
    sv_description = u''
    sv_description = get_description('sv',wditem)
    nl_description = u''
    nl_description = get_description('nl',wditem)
    """
    type_of_item=u''
    placefound=''
   
    items2do -= 1
    if not debugedo: sys.stdout.write("\r%i\t%s" % (items2do, wditem.title()))
   
    if (CLAIM_is_a in wditem.claims):
       type_of_item = wditem.claims.get(CLAIM_is_a)[0].getTarget().title()  #we only use the first one, and assume the first one is most relevant
     
    if (type_of_item=='Q18333556') :  #Q18333556 is a Swedish district
       for lng in langlist:
        
         orig_desc = get_description(lng,wditem)
         new_town,missingtown        = get_claim_title(repo,lng,wditem,CLAIM_in_administrative_unit)
         new_country, missingcountry = get_claim_title(repo,lng,wditem,CLAIM_country)
         if lng!='sv':
           my_new_description = u''+districtlist[lng]+new_town+', '+new_country
         else:
           my_new_description = u''+districtlist[lng]+new_town  #in Swedish, just skip Sweden in the description
        
         """
         new_en_town,missingENt    = get_claim_title(repo,'en',wditem,CLAIM_in_administrative_unit)
         new_en_country,missingENc = get_claim_title(repo,'en',wditem,CLAIM_country)
         new_en_description = 'district in '+new_en_town+', '+new_en_country;
        
         new_sv_town,missingSVt    = get_claim_title(repo,'sv',wditem,CLAIM_in_administrative_unit)
         new_sv_description = 'distrikt i '+new_sv_town+'';

         new_nl_town,missingNLt    = get_claim_title(repo,'nl',wditem,CLAIM_in_administrative_unit)
         new_nl_country,missingNLc = get_claim_title(repo,'nl',wditem,CLAIM_country)
         new_nl_description = 'district in '+new_nl_town+', '+new_nl_country;
        
         logstr1 = 'en|'+en_description+'|'+new_en_description #all in one string gives a weird error ... but this works
         logstr2 = 'sv|'+sv_description+'|'+new_sv_description
         logstr3 = 'nl|'+nl_description+'|'+new_nl_description
         """
        
         logme(False,'%s|%s|%s|%s',wditem.title(),lng,orig_desc,my_new_description)
        
      
      
    elif my_new_description=='':  #this should not happen with the active filters
        log_unknown(False,'%s|%s|%s',wditem.title(),type_of_item,en_description)
        placefound='unknown'

    if missingtown !='':
      log_unknown(True,'[%s][sv][%s]',wditem.title(),missingtown)   
    if missingcountry !='':
      log_unknown(True,'[%s][en][%s]',wditem.title(),missingcountry)   
     
    """ 
    if (my_new_description!='') and (missing!=txt2skip): 

        try :
          if not (lng in wditem.labels):
            if lng+'wiki' in wditem.sitelinks: #check if link still exist
              wditem.labels[lng] = wditem.sitelinks[lng+'wiki']
          data = {}
          data.update( {'labels': { lng: wditem.labels[lng]}, 'descriptions': {lng:my_new_description} } )
          #wditem.editEntity(data,summary=u'SE-Sveriges-distrikt.py, python code on https://goo. gl/lA9lZM  logfile on https://goo .gl/BezTim')
        except ValueError:
          logme(False, "ValueError occured on %s",wditem.title())
        except :
          logme(False, "Undefined error occured on %s",wditem.title())
        else :
          logme(False, '%s|%s|%s|%s|%s|%s',datetime.now().strftime("%Y-%b-%d/%H:%m:%S"),wditem.title(),lng,orig_desc,my_new_description,placefound)

        items_found += 1
    """     
    return items_found
   
class WDBot():
    """
    A bot to add streets on Wikidata
    """
    def __init__(self, generator):
        """
        Arguments:
            * generator    - A generator that yields itempage objects.
        """
        self.generator = generator
        self.repo = pywikibot.Site().data_repository()

    def run(self,lng):
        """
        Starts the robot.
        """
        site = pywikibot.getSite('sv')
        repo = site.data_repository()
       
        items_found=0
        for WDIquery in self.generator:
            if items_found> 3 :
               break
            if WDIquery.exists() :
                log_premature(WDIquery.title())   #log which item we process ... in case of an error, I know which item it is
                WDIquery.get(get_redirect=True)
                items_found += action_one_item(repo,WDIquery,lng)
       

def WikidataQueryItemPageGenerator(query, site=None):
    """Generate pages that result from the given WikidataQuery.
    @param query: the WikidataQuery query string.
    """
   
    global items2do
   
    if site is None:
        site = pywikibot.Site()
    repo = site.data_repository()

    wd_queryset = wdquery.QuerySet(query)

    wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
    data = wd_query.query(wd_queryset)

    items2do = data[u'status'][u'items']
    pywikibot.output(u'retrieved %d items' % data[u'status'][u'items'])
    for item in data[u'items']:
        yield pywikibot.ItemPage(repo, u'Q' + unicode(item))

def testrun():       
 global output2screen
 output2screen = True
 site = pywikibot.Site()
 repo = site.data_repository()
 item2get = 'Q20801595'
 
 print("start")
 x = pywikibot.ItemPage(repo, item2get)
 x.get(get_redirect=True)
 action_one_item(repo, x, default_language)
 
def main():
    print "main"
    query = default_query #later, I want to manage this with params
    lng = default_language

    pigenerator = pagegenerators.PreloadingItemGenerator(pagegenerators.WikidataItemGenerator(WikidataQueryItemPageGenerator(query)))
   
    wikidataBot = WDBot(pigenerator)
    wikidataBot.run(lng)
    print("\n")
 
try:   
  if(debugedo):
    testrun()
  else :
    main()
finally:
  pywikibot.stopme()