#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
This script adds claims to Wikidata items based on categories.
------------------------------------------------------------------------------
Usage:
python claimit.py [pagegenerators] P1 Q2 P123 Q456
You can use any typical pagegenerator to provide with a list of pages.
Then list the property-->target pairs to add.
------------------------------------------------------------------------------
For geographic coordinates:
python claimit.py [pagegenerators] P625 [lat-dec],[long-dec],[prec]
[lat-dec] and [long-dec] represent the latitude and longitude respectively,
and [prec] represents the precision. All values are in decimal degrees,
not DMS. If [prec] is omitted, the default precision is 0.0001 degrees.
Example:
python claimit.py [pagegenerators] P625 -23.3991,-52.0910,0.0001
------------------------------------------------------------------------------
By default, claimit.py does not add a claim if one with the same property
already exists on the page. To override this behavior, use the 'exists' option:
python claimit.py [pagegenerators] P246 "string example" -exists:p
Suppose the claim you want to add has the same property as an existing claim
and the "-exists:p" argument is used. Now, claimit.py will not add the claim
if it has the same target, sources, and/or qualifiers as the existing claim.
To override this behavior, add 't' (target), 's' (sources), or 'q' (qualifiers)
to the 'exists' argument.
For instance, to add the claim to each page even if one with the same
property, target, and qualifiers already exists:
python claimit.py [pagegenerators] P246 "string example" -exists:ptq
Note that the ordering of the letters in the 'exists' argument does not matter,
but 'p' must be included.
"""
#
# (C) Legoktm, 2013
# (C) Pywikibot team, 2013
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: db8ff17f23c0c40c6efae64d5eb8d93c3a7c81b5 $'
#
import json
import re
import pywikibot
from pywikibot import pagegenerators
from datetime import datetime
from datetime import timedelta
class ClaimRobot:
"""
A bot to add Wikidata claims
"""
def __init__(self, generator, claims, exists_arg=''):
"""
Arguments:
* generator - A generator that yields Page objects.
* claims - A list of wikidata claims
* exists_arg - String specifying how to handle duplicate claims
"""
self.generator = generator
self.claims = claims
self.exists_arg = exists_arg
self.repo = pywikibot.Site().data_repository()
self.cacheSources()
def getSource(self, lang):
"""
Get the source for the specified language,
if possible
"""
if lang in self.source_values:
source = pywikibot.Claim(self.repo, 'p143')
source.setTarget(self.source_values.get(lang))
return source
def cacheSources(self):
"""
Fetches the sources from the onwiki list
and stores it internally
"""
page = pywikibot.Page(self.repo, u'Wikidata:List of wikis/python')
self.source_values = json.loads(page.get())
self.source_values = self.source_values['wikipedia']
for source_lang in self.source_values:
self.source_values[source_lang] = pywikibot.ItemPage(self.repo,
self.source_values[source_lang])
def newItem(self, page, item):
"""
Create item where none exists (from newitem.py by Multichill)
"""
self.pageAge = 21
self.pageAgeBefore = self.repo.getcurrenttime() - timedelta(days=self.pageAge)
self.lastEdit = 7
self.lastEditBefore = self.repo.getcurrenttime() - timedelta(days=self.lastEdit)
if page.isRedirectPage():
pywikibot.output('%s is a redirect page. Skipping.' % page)
elif page.editTime() > self.lastEditBefore:
pywikibot.output('Last edit on %s was on %s. Too recent. Skipping.' % (page, page.editTime().isoformat()))
else:
(revId, revTimestamp, revUser, revComment) = page.getVersionHistory(reverseOrder=True, total=1)[0]
if revTimestamp > self.pageAgeBefore:
pywikibot.output('Page creation of %s on %s is too recent. Skipping.' % (page, page.editTime().isoformat()))
elif page.langlinks():
# FIXME: Implement this
pywikibot.output('Found language links (interwiki links). Haven\'t implemented that yet so skipping.')
else:
# FIXME: i18n
summary = u'Bot: New item with sitelink from %s' % (page.title(asLink=True, insite=self.repo), )
data = {'sitelinks':
{item.getdbName(page.site):
{'site': item.getdbName(page.site),
'title': page.title()}
},
'labels':
{page.site.lang:
{'language': page.site.lang,
'value': page.title()}
}
}
pywikibot.output(summary)
item.editEntity(data, summary=summary)
def run(self):
"""
Starts the robot.
"""
if self.exists_arg:
pywikibot.output('\'exists\' argument set to \'%s\'' % self.exists_arg)
for page in self.generator:
pywikibot.output('Processing %s' % page)
item = pywikibot.ItemPage.fromPage(page)
if not item.exists():
# create the page
self.newItem(page, item)
item = pywikibot.ItemPage.fromPage(page)
if not item.exists():
# The item was not created
continue
for claim in self.claims:
skip = False
# If claim with same property already exists...
if claim.getID() in item.claims:
if self.exists_arg is None or 'p' not in self.exists_arg:
pywikibot.log('Skipping %s because claim with same property already exists' % (claim.getID(),))
pywikibot.log('Use the -exists:p option to override this behavior')
skip = True
else:
existing_claims = item.claims[claim.getID()] # Existing claims on page of same property
for existing in existing_claims:
skip = True # Default value
# If some attribute of the claim being added matches some attribute in an existing claim
# of the same property, skip the claim, unless the 'exists' argument overrides it.
if claim.getTarget() == existing.getTarget() and 't' not in self.exists_arg:
pywikibot.log('Skipping %s because claim with same target already exists' % (claim.getID(),))
pywikibot.log('Append \'t\' to the -exists argument to override this behavior')
break
if listsEqual(claim.getSources(), existing.getSources()) and 's' not in self.exists_arg:
pywikibot.log('Skipping %s because claim with same sources already exists' % (claim.getID(),))
pywikibot.log('Append \'s\' to the -exists argument to override this behavior')
break
if listsEqual(claim.qualifiers, existing.qualifiers) and 'q' not in self.exists_arg:
pywikibot.log('Skipping %s because claim with same qualifiers already exists' % (claim.getID(),))
pywikibot.log('Append \'q\' to the -exists argument to override this behavior')
break
skip = False
if not skip:
# check if the bot was reverted recently
revhist = pywikibot.data.api.Request(site=self.repo, action="query", titles=item.getID(), prop="revisions", rvprop="comment", rvlimit="35")
revisions_text = str(revhist.submit())
match = re.search("remove.{9,9}Property:" + claim.getID(), revisions_text)
if match:
pywikibot.output('%s cannot be added as it was recently removed from the item' % (claim.getID(),))
else:
pywikibot.output('Adding %s --> %s'
% (claim.getID(), claim.getTarget()))
item.addClaim(claim)
# A generator might yield pages from multiple languages
source = self.getSource(page.site.language())
if source:
claim.addSource(source, bot=True)
# TODO FIXME: We need to check that we aren't adding a
# duplicate
else:
pywikibot.output('%s is already set' % (claim.getID()))
def listsEqual(list1, list2):
"""
Returns true if the lists are probably equal, ignoring order.
Works for lists of unhashable items (like dictionaries).
"""
if len(list1) != len(list2):
return False
if sorted(list1) != sorted(list2):
return False
for item in list1:
if not item in list2:
return False
return True
def main():
exists_arg = ''
gen = pagegenerators.GeneratorFactory()
commandline_claims = list()
for arg in pywikibot.handleArgs():
# Handle args specifying how to handle duplicate claims
if arg.startswith('-exists:'):
exists_arg = arg.split(':')[1].strip('"')
continue
# Handle page generator args
if gen.handleArg(arg):
continue
commandline_claims.append(arg)
if len(commandline_claims) % 2:
raise ValueError # or something.
claims = list()
repo = pywikibot.Site().data_repository()
for i in range(0, len(commandline_claims), 2):
claim = pywikibot.Claim(repo, commandline_claims[i])
if claim.getType() == 'wikibase-item':
target = pywikibot.ItemPage(repo, commandline_claims[i + 1])
elif claim.getType() == 'time':
target = pywikibot.WbTime(year= commandline_claims[i + 1])
elif claim.getType() == 'string':
target = commandline_claims[i + 1]
elif claim.getType() == 'globecoordinate':
coord_args = map(float, commandline_claims[i + 1].split(','))
if len(coord_args) >= 3:
precision = coord_args[2]
else:
precision = 0.0001 # Default value (~10 m at equator)
target = pywikibot.Coordinate(coord_args[0], coord_args[1], precision=precision)
else:
raise NotImplementedError(
"%s datatype is not yet supported by claimit.py"
% claim.getType())
claim.setTarget(target)
claims.append(claim)
generator = gen.getCombinedGenerator()
if not generator:
# FIXME: Should throw some help
return
bot = ClaimRobot(generator, claims, exists_arg)
bot.run()
if __name__ == "__main__":
main()