import sys
import pywikibot
from pywikibot import pagegenerators as pg
from pywikibot.data import api
def get_namespace_pages(site_obj, name_space):
gen = pg.AllpagesPageGenerator(start='!', namespace=name_space,
includeredirects=False, site=site_obj,
step=None, total=None, content=False)
gen = site.preloadpages(gen, pageprops=True)
return gen
def search_for_title(site, item_title):
params = {"action": "wbsearchentities",
"format": "json",
"language": "bn", # Change language that should be searched
"type": "item",
"search": item_title}
request = api.Request(site=site, **params)
return request.submit()
def check_and_set_sitelink(item, site_id, site, title):
item_dict = item.get()
# checking with try-except is faster than just trying
# to set the link (avoids the wait-time)
try:
sitelink = item_dict["sitelinks"][site_id]
print("Sitelink exists")
except:
wiki_page = pywikibot.Page(site, title)
item.setSitelink(wiki_page)
print("Setting sitelink")
#https://bn.wikisource.org/w/api.php?action=query&meta=siteinfo&siprop=namespaces
#100 is the author namespace of bn-wikisource
name_space = "100"
site_id = "bnwikisource" # Change correct language code here
site = pywikibot.Site("bn", "wikisource") # Change correct language code here
tmpl_gen = get_namespace_pages(site, name_space)
data_site = pywikibot.Site("wikidata", "wikidata")
repo = data_site.data_repository()
count = 0
edit_count = 0
for page in tmpl_gen:
count += 1
print("\n------({})------".format(count))
title = page.title()
title_list = title.split(":")
page_title = title_list[1]
print(page_title)
result_list = search_for_title(data_site, page_title)
if len(result_list["search"]) == 1:
try:
print("=== Clear result:", result_list["search"][0]["id"])
print(result_list["search"][0])
item_id = result_list["search"][0]["id"]
item = pywikibot.ItemPage(repo, item_id)
check_and_set_sitelink(item, site_id, site, title)
continue
except:
continue
elif len(result_list["search"]) > 1:
print("+++ More than one result")
print(result_list["search"])
else:
print("--- No result")
print(result_list["search"])