http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11521
Revision: 11521
Author: russblau
Date: 2013-05-09 23:39:44 +0000 (Thu, 09 May 2013)
Log Message:
-----------
Should have committed this with 11520.
Modified Paths:
--------------
branches/rewrite/pywikibot/config2.py
Modified: branches/rewrite/pywikibot/config2.py
===================================================================
--- branches/rewrite/pywikibot/config2.py 2013-05-09 19:24:17 UTC (rev 11520)
+++ branches/rewrite/pywikibot/config2.py 2013-05-09 23:39:44 UTC (rev 11521)
@@ -372,6 +372,11 @@
# running solve_disambiguation.py with the -primary argument.
special_page_limit = 500
+# Maximum number of times to retry an API request before quitting.
+max_retries = 25
+# Minimum time to wait before resubmitting a failed API request.
+retry_wait = 5
+
############## TABLE CONVERSION BOT SETTINGS ##############
# will split long paragraphs for better reading the source.
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11517
Revision: 11517
Author: russblau
Date: 2013-05-09 17:50:39 +0000 (Thu, 09 May 2013)
Log Message:
-----------
Fix bug #3546402: Switching sites with different accounts does not re-login
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2013-05-09 17:46:39 UTC (rev 11516)
+++ branches/rewrite/pywikibot/site.py 2013-05-09 17:50:39 UTC (rev 11517)
@@ -798,8 +798,9 @@
"""Log the user in if not already logged in."""
# check whether a login cookie already exists for this user
self._loginstatus = LoginStatus.IN_PROGRESS
- if not hasattr(self, "_userinfo"):
- self.getuserinfo()
+ if hasattr(self, "_userinfo"):
+ del self._userinfo
+ self.getuserinfo()
if self.userinfo['name'] == self._username[sysop] and self.logged_in(sysop):
return
loginMan = api.LoginManager(site=self, sysop=sysop,
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11516
Revision: 11516
Author: multichill
Date: 2013-05-09 17:46:39 +0000 (Thu, 09 May 2013)
Log Message:
-----------
A new robot to harvest Wikipedia templates to Wikidata.
Added Paths:
-----------
branches/rewrite/scripts/harvest_template.py
Copied: branches/rewrite/scripts/harvest_template.py (from rev 11513, branches/rewrite/scripts/claimit.py)
===================================================================
--- branches/rewrite/scripts/harvest_template.py (rev 0)
+++ branches/rewrite/scripts/harvest_template.py 2013-05-09 17:46:39 UTC (rev 11516)
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+"""
+Copyright (C) 2013 Multichill
+Copyright (C) 2013 Pywikipediabot team
+
+Distributed under the MIT License
+
+Usage:
+
+python harvest_template.py -lang:nl -template:"Taxobox straalvinnige" orde P70 familie P71 geslacht P74
+
+This will work on all pages that transclude the template in the article namespace
+
+You can use any typical pagegenerator to provide with a list of pages
+
+python harvest_template.py -lang:nl -cat:Sisoridae -template:"Taxobox straalvinnige" -namespace:0 orde P70 familie P71 geslacht P74
+
+"""
+import re
+import pywikibot
+from pywikibot import pagegenerators
+
+class HarvestRobot:
+ """
+ A bot to add Wikidata claims
+ """
+ def __init__(self, generator, templateTitle, fields):
+ """
+ Arguments:
+ * generator - A generator that yields Page objects.
+ * templateTitle - The template to work on
+ * fields - A dictionary of fields that are of use to us
+
+ """
+ self.generator = generator
+ self.templateTitle = templateTitle.replace(u'_', u' ')
+ # TODO: Make it a list which also includes the redirects to the template
+ self.fields = fields
+ self.repo = pywikibot.Site().data_repository()
+ self.source = None
+ self.setSource(pywikibot.Site().language())
+
+ def setSource(self, lang):
+ '''
+ Get the source
+ '''
+ source_values = {'en': pywikibot.ItemPage(self.repo, 'Q328'),
+ 'sv': pywikibot.ItemPage(self.repo, 'Q169514'),
+ 'de': pywikibot.ItemPage(self.repo, 'Q48183'),
+ 'it': pywikibot.ItemPage(self.repo, 'Q11920'),
+ 'no': pywikibot.ItemPage(self.repo, 'Q191769'),
+ 'ar': pywikibot.ItemPage(self.repo, 'Q199700'),
+ 'es': pywikibot.ItemPage(self.repo, 'Q8449'),
+ 'pl': pywikibot.ItemPage(self.repo, 'Q1551807'),
+ 'ca': pywikibot.ItemPage(self.repo, 'Q199693'),
+ 'fr': pywikibot.ItemPage(self.repo, 'Q8447'),
+ 'nl': pywikibot.ItemPage(self.repo, 'Q10000'),
+ 'pt': pywikibot.ItemPage(self.repo, 'Q11921'),
+ 'ru': pywikibot.ItemPage(self.repo, 'Q206855'),
+ 'vi': pywikibot.ItemPage(self.repo, 'Q200180'),
+ 'be': pywikibot.ItemPage(self.repo, 'Q877583'),
+ 'uk': pywikibot.ItemPage(self.repo, 'Q199698'),
+ 'tr': pywikibot.ItemPage(self.repo, 'Q58255'),
+ } # TODO: Should be moved to a central wikidata library
+
+ if lang in source_values:
+ self.source = pywikibot.Claim(self.repo, 'p143')
+ self.source.setTarget(source_values.get(lang))
+
+ def run(self):
+ """
+ Starts the robot.
+ """
+ for page in self.generator:
+ self.procesPage(page)
+
+ def procesPage(self, page):
+ """
+ Proces a single page
+ """
+ item = pywikibot.ItemPage.fromPage(page)
+ pywikibot.output('Processing %s' % page)
+ if not item.exists():
+ pywikibot.output('%s doesn\'t have a wikidata item :(' % page)
+ #TODO FIXME: We should provide an option to create the page
+ else:
+ pagetext = page.get()
+ templates = pywikibot.extract_templates_and_params(pagetext)
+ for (template, fielddict) in templates:
+ # We found the template we were looking for
+ if template.replace(u'_', u' ')==self.templateTitle:
+ for field, value in fielddict.items():
+ # This field contains something useful for us
+ if field in self.fields:
+ # Check if the property isn't already set
+ claim = pywikibot.Claim(self.repo, self.fields[field])
+ if claim.getID() in item.get().get('claims'):
+ pywikibot.output(u'A claim for %s already exists. Skipping' % (claim.getID(),))
+ #TODO FIXME: This is a very crude way of dupe checking
+ else:
+ # Try to extract a valid page
+ match = re.search(pywikibot.link_regex, value)
+ if match:
+ try:
+ link = pywikibot.Link(match.group(1))
+ linkedPage = pywikibot.Page(link)
+ linkedItem = pywikibot.ItemPage.fromPage(linkedPage)
+ claim.setTarget(linkedItem)
+ pywikibot.output('Adding %s --> %s' % (claim.getID(), claim.getTarget().getID()))
+ item.addClaim(claim)
+ if self.source:
+ claim.addSource(self.source, bot=True)
+ except pywikibot.exceptions.NoPage:
+ pywikibot.output('[[%s]] doesn\'t exist so I can\'t link to it' % (linkedItem.title(),))
+
+
+def main():
+ gen = pagegenerators.GeneratorFactory()
+ commandline_arguments = list()
+ templateTitle = u''
+ for arg in pywikibot.handleArgs():
+ if arg.startswith('-template'):
+ if len(arg) == 9:
+ templateTitle = pywikibot.input(
+ u'Please enter the template to work on:')
+ else:
+ templateTitle = arg[10:]
+ elif gen.handleArg(arg):
+ continue
+ else:
+ commandline_arguments.append(arg)
+
+ if len(commandline_arguments) % 2 or not templateTitle:
+ raise ValueError # or something.
+ fields = dict()
+
+ for i in xrange (0, len(commandline_arguments), 2):
+ fields[commandline_arguments[i]] = commandline_arguments[i+1]
+
+ generator = gen.getCombinedGenerator()
+ if not generator:
+ # TODO: Build a transcluding generator based on templateTitle
+ return
+
+ bot = HarvestRobot(generator, templateTitle, fields)
+ bot.run()
+
+if __name__ == "__main__":
+ main()