Revision: 7910
Author: xqt
Date: 2010-02-05 09:03:56 +0000 (Fri, 05 Feb 2010)
Log Message:
-----------
* removeDeprecatedTemplates: (restored from older version) but it also replaces old
templates with a new one, keeping its parameter if necessary
* fixHtml: replace header tags where only spaces are in the same line with mw-syntax
* exception handling for EditConflict
* additional choice for quitting the script
Modified Paths:
--------------
trunk/pywikipedia/cosmetic_changes.py
Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py 2010-02-05 06:42:52 UTC (rev 7909)
+++ trunk/pywikipedia/cosmetic_changes.py 2010-02-05 09:03:56 UTC (rev 7910)
@@ -214,6 +214,37 @@
'zh' : ([u'documentation', u'doc'], u'/doc'),
}
+# Template which should be replaced or removed.
+# Use a list with two entries. The first entry will be replaced by the second.
+# Examples:
+# For removing {{Foo}}, the list must be:
+# (u'Foo', None),
+#
+# The following also works:
+# (u'Foo', ''),
+#
+# For replacing {{Foo}} with {{Bar}} the list must be:
+# (u'Foo', u'Bar'),
+#
+# This also removes all template parameters of {{Foo}}
+# For replacing {{Foo}} with {{Bar}} but keep the template
+# parameters in its original order, please use:
+# (u'Foo', u'Bar\g<parameters>),
+
+deprecatedTemplates = {
+ 'wikipedia': {
+ 'de': [
+ (u'Stub', None),
+ (u'Belege', u'Belege fehlen\g<parameters>'),
+ (u'Quelle', u'Belege fehlen\g<parameters>'),
+ (u'Quellen', u'Belege fehlen\g<parameters>'),
+ ],
+ 'pdc':[
+ (u'Schkiss', None),
+ ],
+ }
+}
+
class CosmeticChangesToolkit:
def __init__(self, site, debug=False, redirect=False, namespace=None,
pageTitle=None):
self.site = site
@@ -235,6 +266,7 @@
text = self.cleanUpSectionHeaders(text)
text = self.putSpacesInLists(text)
text = self.translateAndCapitalizeNamespaces(text)
+ text = self.replaceDeprecatedTemplates(text)
text = self.resolveHtmlEntities(text)
text = self.validXhtml(text)
text = self.removeUselessSpaces(text)
@@ -570,6 +602,21 @@
text = pywikibot.replaceExcept(text,
r'(?m)^(?P<bullet>[:;]*(\*+|#+)[:;\*#]*)(?P<char>[^\s\*#:;].+?)',
'\g<bullet> \g<char>', exceptions)
return text
+ def replaceDeprecatedTemplates(self, text):
+ exceptions = ['comment', 'math', 'nowiki',
'pre']
+ if self.site.family.name in deprecatedTemplates and self.site.lang in
deprecatedTemplates[self.site.family.name]:
+ for template in deprecatedTemplates[self.site.family.name][self.site.lang]:
+ old = template[0]
+ new = template[1]
+ if new == None:
+ new = ''
+ else:
+ new = '{{'+new+'}}'
+ if not self.site.nocapitalize:
+ old = '[' + old[0].upper() + old[0].lower() + ']' +
old[1:]
+ text = pywikibot.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' +
old + '(?P<parameters>\|[^}]+|)}}', new, exceptions)
+ return text
+
#from fixes.py
def fixSyntaxSave(self, text):
exceptions = ['nowiki', 'comment', 'math', 'pre',
'source', 'startspace']
@@ -600,6 +647,12 @@
# horizontal line with attributes; can't be done with wiki syntax
# so we only make it XHTML compliant
text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>',
r'<hr \1 />', exceptions)
+ # a header where only spaces are in the same line
+ for level in range(1, 7):
+ equals = '\\1%s \\2 %s\\3' % ("="*level,
"="*level)
+ text = pywikibot.replaceExcept(text,
+ r'(?i)([\r\n]) *<h%d> *([^<]+?)
*</h%d> *([\r\n])'%(level, level),
+ r'%s'%equals, exceptions)
#remove empty <ref/>-tag
text = pywikibot.replaceExcept(text, r'(?i)<ref\s*/>', r'',
exceptions)
# TODO: maybe we can make the bot replace <p> tags with \r\n's.
@@ -631,6 +684,7 @@
self.generator = generator
self.acceptall = acceptall
self.comment = comment
+ self.done = False
def treat(self, page):
try:
@@ -641,9 +695,13 @@
changedText = ccToolkit.change(page.get())
if changedText.strip() != page.get().strip():
if not self.acceptall:
- choice = pywikibot.inputChoice(u'Do you want to accept these
changes?', ['Yes', 'No', 'All'], ['y', 'N',
'a'], 'N')
+ choice = pywikibot.inputChoice(u'Do you want to accept these
changes?',
+ ['Yes', 'No',
'All', 'Quit'], ['y', 'N', 'a', 'q'],
'N')
if choice == 'a':
self.acceptall = True
+ elif choice == 'q':
+ self.done = True
+ return
if self.acceptall or choice == 'y':
page.put(changedText, comment=self.comment)
else:
@@ -654,10 +712,13 @@
pywikibot.output("Page %s is a redirect; skipping." %
page.aslink())
except pywikibot.LockedPage:
pywikibot.output("Page %s is locked?!" % page.aslink())
+ except pywikibot.EditConflict:
+ pywikibot.output("An edit conflict has occured at %s." %
page.aslink())
def run(self):
try:
for page in self.generator:
+ if self.done: break
self.treat(page)
except KeyboardInterrupt:
pywikibot.output('\nQuitting program...')