User:AkkakkBot/code/03-update-db-reports
< User:AkkakkBot | code
python code:
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")
cur = db.cursor()
sqlstatements = {
"label with brackets" : u'select * from s51880_akkakkbot.items where label like "% (%)" or label like "%, %" limit 60;',
"label with brackets/en" : u'select * from s51880_akkakkbot.items where language = "en" and (label like "% (%)" or label like "%, %") limit 40,60;',
"label with brackets/de" : u'select * from s51880_akkakkbot.items where language = "de" and (label like "% (%)" or label like "%, %") limit 10,60;',
"term with link" : u'select term_language, term_type, concat("[[Q", term_entity_id, "]]") as item, concat("<nowiki>", term_text, "</nowiki>") as text from wb_terms where term_entity_type = "item" and term_text like "%[[%]]%" limit 60;',
"long terms" : u'select term_language, term_type, char_length(term_text), concat("[[Q", term_entity_id, "]]") as item, term_text from wb_terms where term_entity_type = "item" and char_length(term_text) > 200 order by char_length(term_text) desc, term_text, term_entity_id limit 50;',
"long terms/en" : u'select term_language, term_type, char_length(term_text), concat("[[Q", term_entity_id, "]]") as item, term_text from wb_terms where term_language = "en" and term_entity_type = "item" and char_length(term_text) > 200 order by char_length(term_text) desc, term_text, term_entity_id limit 50;',
"long terms/de" : u'select term_language, term_type, char_length(term_text), concat("[[Q", term_entity_id, "]]") as item, term_text from wb_terms where term_language = "de" and term_entity_type = "item" and char_length(term_text) > 200 order by char_length(term_text) desc, term_text, term_entity_id limit 50;',
"link without label/brackets" : u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 60;',
"link without label/no-brackets" : u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and not (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 60;',
"link without label/en" : u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "enwiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
"link without label/de" : u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "dewiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
"link without label/es" : u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "eswiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
"link without label/fr" : u'select concat("[[Q", ips_item_id, "]] ([[User:YMS/LC#Q", ips_item_id, "|LC]])") as item, ips_site_id, ips_site_page from wb_items_per_site join s51880_akkakkbot.languages on lang_db = "frwiki" and lang_db = ips_site_id and (ips_site_page like "%, %" or ips_site_page like "% (%)") and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") limit 100;',
"link without label/stat": u'select count(*), lang_wd, lang_db from wb_items_per_site join s51880_akkakkbot.languages on lang_db = ips_site_id and ips_site_page not like "%, %" and ips_site_page not like "% (%)" and not exists (select term_language from wb_terms where ips_item_id = term_entity_id and lang_wd = term_language and term_type = "label") group by lang_wd, lang_db order by count(*) desc;',
"description ending with period" : u'select language, item, label, description from s51880_akkakkbot.items where description like "%." limit 50;',
"description ending with period/en" : u'select language, item, label, description from s51880_akkakkbot.items where language = "en" and description like "%." limit 50;',
"description ending with period/de" : u'select language, item, label, description from s51880_akkakkbot.items where language = "de" and description like "%." limit 50,50;',
"disambiguation conflicts" : u'select concat("[[Q", t1.term_entity_id, "]]") as item, t1.term_language as lang1, t1.term_text as en_label, t2.term_language as lang2, t2.term_text as de_label from wb_terms as t1 join wb_terms as t2 on t1.term_entity_id = t2.term_entity_id where t1.term_text != t2.term_text and t1.term_language = "en" and t1.term_type = "label" and t1.term_entity_type = "item" and t2.term_language != "en" and t2.term_type = "label" and t2.term_entity_type = "item" and exists (select * from wb_terms as t3 where t3.term_entity_id = t1.term_entity_id and t3.term_entity_type = "item" and t3.term_language = "en" and t3.term_type = "description" and t3.term_text = "Wikipedia disambiguation page") limit 25,50;',
# "disambiguation conflicts" : u'select l1.item, l1.language, l1.label, l2.language, l2.label from s51880_akkakkbot.items as l1 join s51880_akkakkbot.items as l2 on l1.item = l2.item where l1.language = "en" and l2.language != "en" and l1.label != l2.label and (l1.description = "Wikimedia disambiguation page" or l1.description = "Wikipedia disambiguation page") limit 60;',
"deprecated-languages/de-formal" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "de" and def.term_language = "de-formal" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"deprecated-languages/als" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "gsw" and def.term_language = "als" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
# "deprecated-languages/crh" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "crh-latn" and def.term_language = "crh" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"deprecated-languages/crh": u'select concat("[[Q", term_entity_id, "]]") as item, term_type, term_text from wb_terms where term_language = "crh" limit 100;',
"deprecated-languages/no" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "nb" and def.term_language = "no" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"deprecated-languages/simple" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "en" and def.term_language = "simple" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
# "deprecated-languages/bat-smg" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "sgs" and def.term_language = "bat-smg" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"deprecated-languages/bat-smg": u'select concat("[[Q", term_entity_id, "]]") as item, term_type, term_text from wb_terms where term_language = "bat-smg" limit 100;',
"deprecated-languages/be-x-old" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "be-tarask" and def.term_language = "be-x-old" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"deprecated-languages/fiu-vro" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "vro" and def.term_language = "fiu-vro" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"deprecated-languages/roa-rup" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "rup" and def.term_language = "roa-rup" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"deprecated-languages/zh-classical" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "lzh" and def.term_language = "zh-classical" and de.term_type != "alias" and de.term_text != def.term_text order by de.term_entity_id limit 100;',
"deprecated-languages/zh-min-nan" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "nan" and def.term_language = "zh-min-nan" and de.term_type != "alias" and de.term_text != def.term_text order by de.term_entity_id limit 100;',
"deprecated-languages/zh-yue" : u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "yue" and def.term_language = "zh-yue" and de.term_type != "alias" and de.term_text != def.term_text order by de.term_entity_id limit 100;',
"deprecated-languages/stat" : u'select count(*), term_language, term_type from wb_terms where term_entity_type = "item" and (term_language = "als" or term_language = "de-formal" or term_language = "crh" or term_language = "no" or term_language = "simple" or term_language = "bat-smg" or term_language = "be-x-old" or term_language = "fiu-vro" or term_language = "roa-rup" or term_language = "zh-classical" or term_language = "zh-min-nan" or term_language = "zh-yue" or term_language = "nl-informal") group by term_language, term_type order by term_language, term_type;',
"deprecated-languages/nl-informal": u'select concat("[[Q", de.term_entity_id, "]]") as item, de.term_type, de.term_language, de.term_text, def.term_language, def.term_text from wb_terms as de join wb_terms as def on de.term_entity_id = def.term_entity_id and de.term_type = def.term_type where de.term_entity_type = "item" and def.term_entity_type = "item" and de.term_language = "nl" and def.term_language = "nl-informal" and de.term_type != "alias" and de.term_text != def.term_text limit 100;',
"language-mapping" : u'select * from s51880_akkakkbot.languages where lang_db != concat(lang_wd, "wiki");',
"alias-conflicts" : u'select concat("[[Q", l.term_entity_id, "]]") as item, l.term_language, l.term_text from wb_terms as l join wb_terms as a on l.term_entity_id = a.term_entity_id and l.term_entity_type = "item" and a.term_entity_type = "item" and l.term_language = a.term_language and l.term_type = "label" and a.term_type = "alias" and l.term_text = a.term_text limit 60;',
"alias without label" : u'select concat("[[Q", a.term_entity_id, "]]"), a.term_language, a.term_text from wb_terms as a where a.term_type = "alias" and a.term_entity_type = "item" and not exists(select * from wb_terms as l where a.term_entity_id = l.term_entity_id and l.term_entity_type = "item" and a.term_language = l.term_language and l.term_type = "label") limit 60;'
}
print("iterate pages")
for i, page in enumerate(sqlstatements):
sqlstatement = sqlstatements[page];
print("updating '"+page+"' ("+sqlstatement+")")
sys.stdout.flush()
cur.execute(sqlstatement)
result_text = u"<nowiki>"+sqlstatement.replace("</nowiki>", "</nowiki></nowiki><nowiki>")+u"</nowiki>\n{| class=\"wikitable\"\n|-\n"
lines = cur.fetchall()
line_count = 1
for line in lines:
result_text += u"|| {} ".format(line_count)
line_count += 1
for cell in line:
if(type(cell) == str):
try:
cell = cell.decode('utf8')
except UnicodeDecodeError:
cell = u"(encoding error)"
else:
cell = u"{}".format(cell)
result_text += u"|| "+cell+u" "
result_text += u"\n|-\n"
result_text += u"|}\n"
pageobj = pywikibot.Page(site, u"User:Akkakk/issues/"+page)
text = pageobj.get(get_redirect = True)
if(text != result_text):
print("- sending new text...")
sys.stdout.flush()
pageobj.put(result_text, u"update (task 3)", minorEdit = False)
else:
print("- text not changed")
print("end of updates")
sys.stdout.flush()