http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11741
Revision: 11741
Author: xqt
Date: 2013-07-10 13:12:22 +0000 (Wed, 10 Jul 2013)
Log Message:
-----------
- Update documentation
- -side option is deprecated with wikidata
- new feature for -nocache: ignoring several lang codes retrieved from cache
Modified Paths:
--------------
branches/rewrite/scripts/featured.py
Modified: branches/rewrite/scripts/featured.py
===================================================================
--- branches/rewrite/scripts/featured.py 2013-07-10 10:49:12 UTC (rev 11740)
+++ branches/rewrite/scripts/featured.py 2013-07-10 13:12:22 UTC (rev 11741)
@@ -3,21 +3,44 @@
"""
This script understands various command-line arguments:
+ Task commands:
+
+-featured use this script for featured articles. Default task if no task
+ command is specified
+
+-good use this script for good articles.
+
+-lists use this script for featured lists.
+
+-former use this script for removing {{Link FA|xx}} from former
+ fearured articles
+
+ NOTE: you may have all of these commands in one run
+
+ Option commands:
+
-interactive: ask before changing each page
--nocache doesn't include /cache/featured /cache/lists or /cache/good
- file to remember if the article already was verified.
+-nocache doesn't include cache files file to remember if the article
+ already was verified.
+
+-nocache:xx,yy you may ignore language codes xx,yy,... from cache file
-fromlang:xx,yy xx,yy,zz,.. are the languages to be verified.
-fromlang:ar--fi Another possible with range the languages
+ (sorry, not implemented yet)
-fromall to verify all languages.
+-tolang:xx,yy xx,yy,zz,.. are the languages to be updated
+
-after:zzzz process pages after and including page zzzz
+ (sorry, not implemented yet)
-side use -side if you want to move all {{Link FA|lang}} next to the
corresponding interwiki links. Default is placing
{{Link FA|lang}} on top of the interwiki links.
+ (This option is deprecated with wikidata)
-count Only counts how many featured/good articles exist
on all wikis (given with the "-fromlang" argument) or
@@ -25,18 +48,10 @@
Example: featured.py -fromlang:en,he -count
counts how many featured articles exist in the en and he
wikipedias.
+ (sorry, not implemented yet)
--lists use this script for featured lists.
-
--good use this script for good articles.
-
--former use this script for removing {{Link FA|xx}} from former
- fearured articles
-
-quiet no corresponding pages are displayed.
-usage: featured.py [-interactive] [-nocache] [-top] [-after:zzzz] [-fromlang:xx,yy--zz|-fromall]
-
"""
__version__ = '$Id$'
@@ -208,7 +223,7 @@
'fromlang': None,
'good': False,
'list': False,
- 'nocache': False,
+ 'nocache': None,
'side': False, # not template_on_top
'quiet': False,
}
@@ -230,18 +245,19 @@
return True
def readcache(self, task):
- self.filename = pywikibot.config.datafilepath("cache", task)
- try:
- f = open(self.filename, "rb")
- self.cache = pickle.load(f)
- f.close()
- pywikibot.output(u'Cache file %s found with %d items.'
- % (self.filename, len(self.cache)))
- except IOError:
- pywikibot.output(u'Cache file %s not found.' % self.filename)
+ if not self.getOption('nocache') is True:
+ self.filename = pywikibot.config.datafilepath("cache", task)
+ try:
+ f = open(self.filename, "rb")
+ self.cache = pickle.load(f)
+ f.close()
+ pywikibot.output(u'Cache file %s found with %d items.'
+ % (self.filename, len(self.cache)))
+ except IOError:
+ pywikibot.output(u'Cache file %s not found.' % self.filename)
def writecache(self):
- if not self.getOption('nocache'):
+ if not self.getOption('nocache') is True:
pywikibot.output(u'Writing %d items to cache file %s.'
% (len(self.cache), self.filename))
f = open(self.filename,"wb")
@@ -282,16 +298,14 @@
else:
return ### 2DO
self.fromlang.sort()
- if not self.getOption('nocache'):
- self.readcache(task)
+ self.readcache(task)
for code in self.fromlang:
try:
self.treat(code, task)
except KeyboardInterrupt:
pywikibot.output('\nQuitting featured treat...')
break
- if not self.getOption('nocache'):
- self.writecache()
+ self.writecache()
# not implemented yet
def run_list(self):
@@ -319,16 +333,14 @@
else:
return ### 2DO
self.fromlang.sort()
- if not self.getOption('nocache'):
- self.readcache(task)
+ self.readcache(task)
for code in self.fromlang:
try:
self.treat(code, task)
except KeyboardInterrupt:
pywikibot.output('\nQuitting featured treat...')
break
- if not self.getOption('nocache'):
- self.writecache()
+ self.writecache()
def treat(self, code, process):
fromsite = pywikibot.Site(code)
@@ -554,7 +566,8 @@
if not tosite.lang in self.cache[fromsite.lang]:
self.cache[fromsite.lang][tosite.lang] = {}
cc = self.cache[fromsite.lang][tosite.lang]
- if self.getOption('nocache'):
+ if self.getOption('nocache') is True or \
+ fromsite.code in self.getOption('nocache'):
cc = {}
templatelist = self.getTemplateList(tosite.code, task)
findtemplate = '(' + '|'.join(templatelist) + ')'
@@ -666,6 +679,8 @@
part = True
elif arg.startswith('-after:'):
afterpage = arg[7:]
+ elif arg.startswith('-nocache:'):
+ options[arg[1:8]] = arg[9:].split(",")
else:
options[arg[1:].lower()] = True
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11737
Revision: 11737
Author: legoktm
Date: 2013-07-10 06:24:43 +0000 (Wed, 10 Jul 2013)
Log Message:
-----------
Implement an opt-in version of using mwparserfromhell rather regex.
Users can set "use_mwparserfromhell" as True in their user-config.py to enable this.
Modified Paths:
--------------
branches/rewrite/pywikibot/config2.py
branches/rewrite/pywikibot/textlib.py
branches/rewrite/tests/textlib_tests.py
Modified: branches/rewrite/pywikibot/config2.py
===================================================================
--- branches/rewrite/pywikibot/config2.py 2013-07-10 05:46:56 UTC (rev 11736)
+++ branches/rewrite/pywikibot/config2.py 2013-07-10 06:24:43 UTC (rev 11737)
@@ -583,6 +583,11 @@
# LS is a shortcut alias.
line_separator = LS = u'\n'
+# Settings to enable mwparserfromhell <http://mwparserfromhell.readthedocs.org/en/latest/>
+# Currently used in textlib.extract_templates_and_params
+# This should be more accurate than our current regex, but is currently opt-in.
+use_mwparserfromhell = False
+
# End of configuration section
# ============================
Modified: branches/rewrite/pywikibot/textlib.py
===================================================================
--- branches/rewrite/pywikibot/textlib.py 2013-07-10 05:46:56 UTC (rev 11736)
+++ branches/rewrite/pywikibot/textlib.py 2013-07-10 06:24:43 UTC (rev 11737)
@@ -13,7 +13,10 @@
#
__version__ = '$Id$'
-
+try:
+ import mwparserfromhell
+except ImportError:
+ mwparserfromhell = False
import pywikibot
import re
from HTMLParser import HTMLParser
@@ -886,10 +889,36 @@
with an integer value corresponding to its position among the unnnamed
parameters, and if this results multiple parameters with the same name
only the last value provided will be returned.
+
+ This uses a third party library (mwparserfromhell) if it is installed
+ and enabled in the user-config.py. Otherwise it falls back on a
+ regex based function defined below.
+
@param text: The wikitext from which templates are extracted
@type text: unicode or string
"""
+
+ if not (config.use_mwparserfromhell and mwparserfromhell):
+ return extract_templates_and_params_regex(text)
+ code = mwparserfromhell.parse(text)
+ result = []
+ for template in code.filter_templates():
+ params = {}
+ for param in template.params:
+ params[unicode(param.name)] = unicode(param.value)
+ result.append((unicode(template.name.strip()), params))
+ return result
+
+
+def extract_templates_and_params_regex(text):
+ """
+ See the documentation for extract_templates_and_params
+ This does basically the same thing, but uses regex.
+ @param text:
+ @return:
+ """
+
# remove commented-out stuff etc.
thistxt = removeDisabledParts(text)
Modified: branches/rewrite/tests/textlib_tests.py
===================================================================
--- branches/rewrite/tests/textlib_tests.py 2013-07-10 05:46:56 UTC (rev 11736)
+++ branches/rewrite/tests/textlib_tests.py 2013-07-10 06:24:43 UTC (rev 11737)
@@ -6,6 +6,10 @@
#
__version__ = '$Id: api_tests.py 8238 2010-06-02 13:50:48Z xqt $'
+try:
+ import mwparserfromhell
+except ImportError:
+ mwparserfromhell = False
import unittest
import codecs
import os
@@ -34,11 +38,18 @@
self.assertContains("enwiki_help_editing", u"Editing")
def testExtractTemplates(self):
+ if not (pywikibot.config.use_mwparserfromhell and mwparserfromhell):
+ return # We'll test the regex function in the test below
func = textlib.extract_templates_and_params # It's really long.
self.assertEqual(func('{{a}}'), [('a', {})])
self.assertEqual(func('{{a|b=c}}'), [('a', {'b': 'c'})])
self.assertEqual(func('{{a|b|c=d}}'), [('a', {u'1': 'b', 'c': 'd'})])
+ def testExtractTemplatesRegex(self):
+ func = textlib.extract_templates_and_params_regex # It's really long.
+ self.assertEqual(func('{{a}}'), [('a', {})])
+ self.assertEqual(func('{{a|b=c}}'), [('a', {'b': 'c'})])
+ self.assertEqual(func('{{a|b|c=d}}'), [('a', {u'1': 'b', 'c': 'd'})])
@unittest.expectedFailure
def testSpacesInSection(self):
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11735
Revision: 11735
Author: amir
Date: 2013-07-10 03:43:29 +0000 (Wed, 10 Jul 2013)
Log Message:
-----------
important bug fix for checking images in commons, it was an out-dated setting
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2013-07-09 16:26:36 UTC (rev 11734)
+++ trunk/pywikipedia/checkimages.py 2013-07-10 03:43:29 UTC (rev 11735)
@@ -961,7 +961,7 @@
pywikibot.output(u'%s is on commons!' % self.imageName)
on_commons_text = self.image.getImagePageHtml()
- if u"<div class='sharedUploadNotice'>" in on_commons_text:
+ if re.search(ur"\<div class\=(?:'|\")sharedUploadNotice(?:'|\")\>",on_commons_text):
pywikibot.output(
u"But, the file doesn't exist on your project! Skip...")
# We have to skip the check part for that image because