User:Yamaha5/ConflictBot

Wrong Interwiki Namespace

edit
sql fawiki_p 'SELECT /*SLOW OK */ page_title,page_namespace ,ll_title  FROM page  JOIN langlinks ON page_id = ll_from WHERE page_namespace = 0 AND page_is_redirect = 0 AND ll_lang="en" AND ((ll_title LIKE "%Template:%") OR (ll_title LIKE "%Category:%") OR (ll_title LIKE "%Wikipedia:%") OR (ll_title LIKE "%Portal:%") OR (ll_title LIKE "%User:%") OR (ll_title LIKE "%talk:%") OR (ll_title LIKE "%Talk:%")) GROUP BY page_id;' >foo.txt

Templates Wrong interwiki

edit
sql fawiki_p 'SELECT page_title FROM page JOIN langlinks ON page_id = ll_from WHERE page_namespace = 10 AND page_is_redirect = 0 AND (page_title LIKE "%/doc%" OR page_title LIKE "%/Doc%") GROUP BY page_title ORDER BY COUNT(ll_from) DESC;' >foo2.txt

Bot code

edit

This code lists the pages which has wrong interwiki for example:

  • fa:a > en:a > fa:b
  • fa:a > en:a > nopage

you can change fa with other local wiki (de,it,ru,...)

It will report the conflicts on WrongInterList.txt file.

options

edit

by changing 'namespace' you can get query for other namespaces

code

edit
#!/usr/bin/python
# -*- coding: utf-8 -*-
# BY: رضا (User:reza1615 on fa.wikipedia)
# Distributed under the terms of the CC-BY-SA 3.0 .
import codecs,config,wikipedia
import MySQLdb as mysqldb
items_a,items_b,items_c=[],[],{}
text_w,q_text=u'\n',u'\n'

#--------------------------------------------------need change---------------------------
first_wiki='fa'
second_wiki='en'
'''
Mediawiki Important Namespaces
0  =  Main
10 = Template
14 = Category
'''
namespace="0"

#----------------------------------------------------------------------------------------
if int(namespace)>0:
    first_wiki_site = wikipedia.getSite(first_wiki)
    first_wiki_pre=first_wiki_site.namespace(int(namespace), all = True)[0]+u':'
    
    second_wiki_site=wikipedia.getSite(second_wiki)
    second_wiki_pre=second_wiki_site.namespace(int(namespace), all = True)[0]+u':'
else:
    first_wiki_pre,second_wiki_pre=u'',u''

'''
first_wiki_pre,second_wiki_pre=u'رده:',u'Category:'
namespace="14"
'''

wikipedia.output('--------------First Query--------------------')
site  = wikipedia.getSite(first_wiki)
querys = "SELECT page_title,ll_title  FROM page  JOIN langlinks ON page_id = ll_from WHERE page_is_redirect = 0 AND page_namespace="+namespace+" AND ll_lang='"+second_wiki+"' GROUP BY ll_from;"

conn = mysqldb.connect(first_wiki+"wiki.labsdb", db = site.dbName(),
                       user = config.db_username,
                       passwd = config.db_password)                                
cursor = conn.cursor()
wikipedia.output(u'Executing query:\n%s' % querys)
querys = querys.encode(site.encoding())
cursor.execute(querys)
results = cursor.fetchall()
for row in results:
    q_text+=unicode(str(row[0]), 'UTF-8')+u'\t'+unicode(str(row[1]), 'UTF-8')+u'\n'
with codecs.open( first_wiki+'.txt',mode = 'w',encoding = 'utf8' ) as file:
                 file.write(q_text)
del file,q_text
q_text=u'\n'
wikipedia.output(first_wiki+'.txt is built')
wikipedia.output('----------------Second Query------------------')
site  = wikipedia.getSite(second_wiki)
querys = "SELECT page_title,ll_title  FROM page  JOIN langlinks ON page_id = ll_from WHERE page_is_redirect = 0 AND page_namespace="+namespace+" AND ll_lang='"+first_wiki+"' GROUP BY ll_from;"

conn = mysqldb.connect(second_wiki+"wiki.labsdb", db = site.dbName(),
                       user = config.db_username,
                       passwd = config.db_password)                                
cursor = conn.cursor()
wikipedia.output(u'Executing query:\n%s' % querys)
querys = querys.encode(site.encoding())
cursor.execute(querys)
results = cursor.fetchall()
for row in results:
    q_text+=unicode(str(row[0]), 'UTF-8')+u'\t'+unicode(str(row[1]), 'UTF-8')+u'\n'
with codecs.open( second_wiki+'.txt',mode = 'w',encoding = 'utf8' ) as file:
                 file.write(q_text)
del file,q_text,results,cursor
wikipedia.output(second_wiki+'.txt is built')

wikipedia.output('----------------------------------')
Textfirst_wiki = codecs.open(first_wiki+'.txt','r' ,'utf8' )
lines_first_wiki=Textfirst_wiki.read().replace(u'\r',u'').strip().split(u'\n')


Textsecond_wiki = codecs.open(second_wiki+'.txt','r' ,'utf8' )
lines_second_wiki=Textsecond_wiki.read().replace(u'\r',u'').strip().split(u'\n')

del Textsecond_wiki,Textfirst_wiki

for line_F in lines_first_wiki:
    items_a.append(line_F.split(u'\t')[0].replace(u'_',u' '))
    items_c[line_F.split(u'\t')[1].replace(second_wiki_pre,u'')]=line_F.split(u'\t')[0].replace(u'_',u' ')

for line_S in lines_second_wiki:
    case=line_S.split(u'\t')[1].replace(first_wiki_pre,u'').replace(u'_',u' ')
    if case in items_a:
        items_a.remove(case)
    else:
        wikipedia.output(line_S)
        try:
           item_c=first_wiki_pre+items_c[line_S.split(u'\t')[0].replace(u'_',u' ')]
           if item_c!=line_S.split(u'\t')[1]:
               to_add=u'# [[:'+item_c+u']]  >  [[:'+second_wiki+u':'+second_wiki_pre+line_S.split(u'\t')[0]+u']] > [[:'+line_S.split(u'\t')[1]+u']]\n'
               text_w+=to_add
        except:
            to_add=u'# [[:'+second_wiki+u':'+second_wiki_pre+line_S.split(u'\t')[0]+u']] > [[:'+line_S.split(u'\t')[1]+u']]\n'
            text_w+=to_add
with codecs.open( u'WrongInterList.txt',mode = 'w',encoding = 'utf8' ) as file:
    file.write(text_w)