User:Reza1615/BOT/merge-pages.py
For running this code please read User:Reza1615/BOT
Sub-page format, which bot get the items list, should be like here
<source lang='python'>
- !/usr/bin/python
from __future__ import unicode_literals
- -*- coding: utf-8 -*-
- Reza (User:reza1615)
- Distributed under the terms of the CC-BY-SA 3.0 .
- -*- coding: utf-8 -*-
import wikipedia,pagegenerators,config import query,time,login,codecs,re import wikidata from pywikibot import textlib
- -------------------Summary which uses in wikidata and they should be english ----
creat_summary="Bot: Import page from {0}wiki".format('en') update_link_summary="Bot: Update site links from {0}wiki".format('en') update_link_labels_summary="Bot: Update site links and labels from {0}wiki".format('en') update_Labels_summary=u'Bot: Update of labels.'
- -------------------------------------------------------------------------------
Orgine_Site=wikipedia.getSite('en',fam='wikipedia') repo = Orgine_Site.data_repository() SafeWork=True mysite=wikipedia.getSite('wikidata','wikidata')
def login_wiki(mode):
dataSite=wikipedia.getSite('wikidata','wikidata') passwords='yourBotPass' usernames='YourBotUser' botlog=login.LoginManager(password=passwords,username=usernames,site=dataSite) botlog.login()
def save_file(case,type):
if type=='merge': file = 'zzinterwiki_import_merged_need_deleted.txt' elif type=='error': file = 'zzinterwiki_import_errors.txt' else: file = 'zzinterwiki_conflicts.txt' try: file_text = codecs.open(file,'r' ,'utf8' ) file_text = file_text.read().strip() except: file_text=u if not case in file_text: with codecs.open(file ,mode = 'a',encoding = 'utf8' ) as f: f.write(u'\n'+case)
def check_item(wiki,link):
site=wikipedia.getSite(wiki.replace(u'_',u'-'),fam='wikipedia') page=wikipedia.Page(site,link) data=wikipedia.DataPage(page) try: items=data.get() except wikipedia.NoPage: return True except: wikipedia.output("\03{lightred}Item has been created. Skipping...\03{default}") return False
def set_lable(data,new_langs,item):
dic_item=data.get() old=dic_item['links'] changes=False for cases in new_langs: if cases=='nb': cases='no' dic_item['links'][cases]=new_langs[cases] if old!=dic_item['links']: wikipedia.output('added '+cases+'......................') for langs in dic_item['links']: if langs=='nb': langs='no' value=dic_item['links'][langs].strip() lang=langs.replace('wiki',).replace('_','-') try: value=unicode(value,'UTF8') except: pass if lang !='fa': value = value.split(u'(')[0].strip() if lang =='es' or lang=='pt' or lang=='pt-br': value = value.replace(u"Anexo:",u"") if lang == 'cs': value = value.replace(u"Príloha:",u"") if lang == 'de-ch': value = value.replace(u"ß",u"ss") try : a=dic_item['label'][lang] except: item.labels[lang] = value changes=True wikipedia.output('\03{lightgreen}for '+value+' added as label of '+lang+'\03{default}') if changes: changes=True else: wikipedia.output("Doesn't need any update!") changes=False return item,changes
def Update_data(data_add,appenddict):
item = wikidata.api.getItemById(data_add.title()) summary= confilict={} new_langs={} for lang in appenddict: if lang=='nb': lang='no' site_lang=lang interlangLinks=appenddict[lang] status=check_item(site_lang,interlangLinks) if not status: wikipedia.output(site_lang+' has confilict!') confilict[site_lang]=interlangLinks continue summary=update_link_summary item.sitelinks[lang+"wiki"] = interlangLinks new_langs[lang+"wiki"] = interlangLinks
if confilict: item_confilict=u'* '+data_add.title()+u' Confilict > ' for i in confilict: item_confilict+=u'[[:'+i+u':'+confilict[i]+u'|'+i+u'wiki]]-' save_file(item_confilict[:-1],'conflict') if SafeWork: wikipedia.output('\03{lightred}-->'+data_add.title()+' Passed! because of safe mode and conflict\03{default}') return False if summary: item,changes=set_lable(data_add,new_langs,item) if changes: summary=update_link_labels_summary try: wikidata.api.save(item, summary) wikipedia.output('\03{lightblue}Page '+data_add.title()+' : '+summary+'\03{default}') return True except Exception,e: try: wikipedia.output('\03{lightred}Page '+data_add.title()+' Passed! error was : '+str(e)+' \03{default}') except: wikipedia.output('\03{lightred}Page '+data_add.title()+'Passed!\03{default}') return False
def find_diff(my_data,interwiki_links):
dictionary= {} appenddict={} for lang in interwiki_links: if lang=='nb': lang='no' L_lang=lang.replace(u'-',u'_') L_link=interwiki_links[lang] if not (L_lang in appenddict): if not ((L_lang+'wiki') in dictionary): appenddict[L_lang]=L_link wikipedia.output('\03{lightblue}+ '+L_lang +u' > '+L_link+' \03{default}') done=Update_data(my_data,appenddict) if done: return True
def main():
site = wikipedia.getSite('en') Orgine_Site=wikipedia.getSite('wikidata',fam='wikidata') Orgine_page=wikipedia.Page( Orgine_Site,'User:Yamaha5/test')#'User:Soulkeeper/dups' ) text=Orgine_page.get() lines=text.split(u'\n') for line in lines: our_links={} Regex=re.compile(ur'\[\[.*?\]\]') wikipedia.output('--------------------------------------------------') links = Regex.findall(line) lent=len(links) desc={} cla=[] alia={} if links: count=0 line_pass=True for link in links: Last_Item=False link=link.replace(u'',u'').replace(u'',u).strip() wikipedia.output('\03{lightblue}working on '+link+u'\03{default}') count+=1 repo = site.data_repository() data = wikipedia.DataPage(repo, link) try: dict = data.get() except: wikipedia.output(link+' has error') continue if 'description' in dict: description=dict['description'] for i in description: desc[i]= description[i]
if 'claims' in dict: claims=dict['claims'] for i in claims: if not i in cla: cla.append(i) if 'aliases' in dict: aliases=dict['aliases'] for i in aliases: alia[i]=aliases[i] if 'links' in dict: dictionary=dict['links']
if our_links=={} and count==lent and desc=={} and alia=={}: line_pass=False break if count==lent: for lang in dictionary: break lang=lang.replace(u'wiki',u) Last_Item=True break item = wikidata.api.getItemById(link) countB=0 for lang in dictionary: lang=lang.replace(u'wiki',u) wikipedia.output(lang+u'>'+dictionary[lang+u"wiki"]) if not lang in our_links: our_links[lang]=dictionary[lang+u"wiki"] item.sitelinks[lang+u"wiki"] = item.labels[lang] = try: wikidata.api.save(item, u'Bot: Merging Items') pass except Exception,e: wikipedia.output('\03{lightred}Page Passed! error was : '+str(e)+' \03{default}') continue if not Last_Item: wikipedia.output('\03{lightred}Emptying '+link+u'\03{default}') if desc or alia: siteLang = wikipedia.getSite(lang) Orgine_page2=wikipedia.Page( siteLang,dictionary[lang+u"wiki"]) data = wikipedia.DataPage(Orgine_page2) if not data.exists(): continue item = wikidata.api.getItemById(data.title()) for i in desc: if str(type(desc[i]))==("<type 'list'>" or "<type 'dict'>"): continue item.descriptions[i] = desc[i] for i in alia: if str(type(alia[i]))==("<type 'list'>" or "<type 'dict'>"): continue item.aliases[i] = alia[i] try: wikidata.api.save(item, u'Bot: adding alias and discriptions') wikipedia.output(u'Bot: adding alias and discriptions') except: continue if line_pass and our_links!={}: siteLang = wikipedia.getSite(lang) Orgine_page2=wikipedia.Page( siteLang,dictionary[lang+u"wiki"]) data = wikipedia.DataPage(Orgine_page2) if not data.exists(): save_file(line,'error') continue done=find_diff(data,our_links) wikipedia.output('\03{lightgreen}Filling '+link+' with new Langs\03{default}') if done: wikipedia.output(u'\03{lightgreen}Item merged and updated!\03{default} '+line) else: wikipedia.output(u'\03{lightred}Item not merged!\03{default}')
if __name__ == "__main__":
login_wiki(1) main()