http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11161
Revision: 11161
Author: xqt
Date: 2013-03-02 14:46:09 +0000 (Sat, 02 Mar 2013)
Log Message:
-----------
some PEP8 changes
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2013-03-02 13:59:34 UTC (rev 11160)
+++ trunk/pywikipedia/interwiki.py 2013-03-02 14:46:09 UTC (rev 11161)
@@ -343,7 +343,10 @@
__version__ = '$Id$'
#
-import sys, copy, re, os
+import sys
+import copy
+import re
+import os
import time
import codecs
import socket
@@ -360,102 +363,96 @@
'&pagegenerators_help;': pagegenerators.parameterHelp
}
+
class SaveError(pywikibot.Error):
"""
An attempt to save a page with changed interwiki has failed.
"""
+
class LinkMustBeRemoved(SaveError):
"""
An interwiki link has to be removed, but this can't be done because of user
preferences or because the user chose not to change the page.
"""
+
class GiveUpOnPage(pywikibot.Error):
"""
The user chose not to work on this page and its linked pages any more.
"""
+
# Subpage templates. Must be in lower case,
# whereas subpage itself must be case sensitive
moved_links = {
- 'ar' : ([u'documentation',
- u'template documentation',
- u'شرح',
- u'توثيق'], u'/doc'),
- 'bn' : (u'documentation', u'/doc'),
- 'ca' : (u'ús de la plantilla', u'/ús'),
- 'cs' : (u'dokumentace', u'/doc'),
- 'da' : (u'dokumentation', u'/doc'),
- 'de' : (u'dokumentation', u'/Meta'),
+ 'ar': ([u'documentation', u'template documentation', u'شرح', u'توثيق'],
+ u'/doc'),
+ 'bn': (u'documentation', u'/doc'),
+ 'ca': (u'ús de la plantilla', u'/ús'),
+ 'cs': (u'dokumentace', u'/doc'),
+ 'da': (u'dokumentation', u'/doc'),
+ 'de': (u'dokumentation', u'/Meta'),
'dsb': ([u'dokumentacija', u'doc'], u'/Dokumentacija'),
- 'en' : ([u'documentation',
- u'template documentation',
- u'template doc',
- u'doc',
- u'documentation, template'], u'/doc'),
- 'es' : ([u'documentación', u'documentación de plantilla'], u'/doc'),
- 'eu' : (u'txantiloi dokumentazioa', u'/dok'),
- 'fa' : ([u'documentation',
- u'template documentation',
- u'template doc',
- u'doc',
- u'توضیحات',
- u'زیرصفحه توضیحات'], u'/doc'),
+ 'en': ([u'documentation', u'template documentation', u'template doc',
+ u'doc', u'documentation, template'], u'/doc'),
+ 'es': ([u'documentación', u'documentación de plantilla'], u'/doc'),
+ 'eu': (u'txantiloi dokumentazioa', u'/dok'),
+ 'fa': ([u'documentation', u'template documentation', u'template doc',
+ u'doc', u'توضیحات', u'زیرصفحه توضیحات'], u'/doc'),
# fi: no idea how to handle this type of subpage at :Metasivu:
- 'fi' : (u'mallineohje', None),
- 'fr' : ([u'/documentation', u'documentation', u'doc_modèle',
- u'documentation modèle', u'documentation modèle compliqué',
- u'documentation modèle en sous-page',
- u'documentation modèle compliqué en sous-page',
- u'documentation modèle utilisant les parserfunctions en sous-page',
+ 'fi': (u'mallineohje', None),
+ 'fr': ([u'/documentation', u'documentation', u'doc_modèle',
+ u'documentation modèle', u'documentation modèle compliqué',
+ u'documentation modèle en sous-page',
+ u'documentation modèle compliqué en sous-page',
+ u'documentation modèle utilisant les parserfunctions en sous-page',
],
- u'/Documentation'),
+ u'/Documentation'),
'hsb': ([u'dokumentacija', u'doc'], u'/Dokumentacija'),
- 'hu' : (u'sablondokumentáció', u'/doc'),
- 'id' : (u'template doc', u'/doc'),
+ 'hu': (u'sablondokumentáció', u'/doc'),
+ 'id': (u'template doc', u'/doc'),
'ilo': (u'documentation', u'/doc'),
- 'ja' : (u'documentation', u'/doc'),
- 'ka' : (u'თარგის ინფო', u'/ინფო'),
- 'ko' : (u'documentation', u'/설명문서'),
- 'ms' : (u'documentation', u'/doc'),
- 'no' : (u'dokumentasjon', u'/dok'),
- 'nn' : (u'dokumentasjon', u'/dok'),
- 'pl' : (u'dokumentacja', u'/opis'),
- 'pt' : ([u'documentação', u'/doc'], u'/doc'),
- 'ro' : (u'documentaţie', u'/doc'),
- 'ru' : (u'doc', u'/doc'),
+ 'ja': (u'documentation', u'/doc'),
+ 'ka': (u'თარგის ინფო', u'/ინფო'),
+ 'ko': (u'documentation', u'/설명문서'),
+ 'ms': (u'documentation', u'/doc'),
+ 'no': (u'dokumentasjon', u'/dok'),
+ 'nn': (u'dokumentasjon', u'/dok'),
+ 'pl': (u'dokumentacja', u'/opis'),
+ 'pt': ([u'documentação', u'/doc'], u'/doc'),
+ 'ro': (u'documentaţie', u'/doc'),
+ 'ru': (u'doc', u'/doc'),
'simple': ([u'documentation',
u'template documentation',
u'template doc',
u'doc',
u'documentation, template'], u'/doc'),
- 'sk' : (u'dokumentácia', u'/Dokumentácia'),
- 'sv' : (u'dokumentation', u'/dok'),
- 'uk' : ([u'документація',
- u'doc',
- u'documentation'], u'/Документація'),
- 'vi' : (u'documentation', u'/doc'),
- 'zh' : ([u'documentation', u'doc'], u'/doc'),
+ 'sk': (u'dokumentácia', u'/Dokumentácia'),
+ 'sv': (u'dokumentation', u'/dok'),
+ 'uk': ([u'документація', u'doc', u'documentation'], u'/Документація'),
+ 'vi': (u'documentation', u'/doc'),
+ 'zh': ([u'documentation', u'doc'], u'/doc'),
}
# A list of template names in different languages.
# Pages which contain these shouldn't be changed.
ignoreTemplates = {
'_default': [u'delete'],
- 'ar' : [u'قيد الاستخدام'],
- 'cs' : [u'Pracuje_se'],
- 'de' : [u'inuse', 'in use', u'in bearbeitung', u'inbearbeitung',
- u'löschen', u'sla',
- u'löschantrag', u'löschantragstext',
- u'falschschreibung',
- u'obsolete schreibung', 'veraltete schreibweise'],
- 'en' : [u'inuse', u'softredirect'],
- 'fa' : [u'در دست ویرایش ۲', u'حذف سریع'],
+ 'ar': [u'قيد الاستخدام'],
+ 'cs': [u'Pracuje_se'],
+ 'de': [u'inuse', 'in use', u'in bearbeitung', u'inbearbeitung',
+ u'löschen', u'sla',
+ u'löschantrag', u'löschantragstext',
+ u'falschschreibung',
+ u'obsolete schreibung', 'veraltete schreibweise'],
+ 'en': [u'inuse', u'softredirect'],
+ 'fa': [u'در دست ویرایش ۲', u'حذف سریع'],
'pdc': [u'lösche'],
- 'zh' : [u'inuse'],
+ 'zh': [u'inuse'],
}
+
class Global(object):
"""
Container class for global settings.
@@ -497,9 +494,9 @@
contentsondisk = config.interwiki_contents_on_disk
lacklanguage = None
minlinks = 0
- quiet = False
+ quiet = False
restoreAll = False
- async = False
+ async = False
summary = u''
repository = False
@@ -512,9 +509,12 @@
elif arg.startswith('-hintfile'):
hintfilename = arg[10:]
if (hintfilename is None) or (hintfilename == ''):
- hintfilename = pywikibot.input(u'Please enter the hint filename:')
+ hintfilename = pywikibot.input(
+ u'Please enter the hint filename:')
f = codecs.open(hintfilename, 'r', config.textfile_encoding)
- R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)') # hint or title ends either before | or before ]]
+
+ # hint or title ends either before | or before ]]
+ R = re.compile(ur'\[\[(.+?)(?:\]\]|\|)')
for pageTitle in R.findall(f.read()):
self.hints.append(pageTitle)
f.close()
@@ -574,7 +574,7 @@
elif arg.startswith('-neverlink:'):
self.neverlink += arg[11:].split(",")
elif arg.startswith('-ignore:'):
- self.ignore += [pywikibot.Page(None,p) for p in arg[8:].split(",")]
+ self.ignore += [pywikibot.Page(None, p) for p in arg[8:].split(",")]
elif arg.startswith('-ignorefile:'):
ignorefile = arg[12:]
ignorePageGen = pagegenerators.TextfilePageGenerator(ignorefile)
@@ -604,7 +604,8 @@
self.async = True
elif arg.startswith('-summary'):
if len(arg) == 8:
- self.summary = pywikibot.input(u'What summary do you want to use?')
+ self.summary = pywikibot.input(
+ u'What summary do you want to use?')
else:
self.summary = arg[9:]
elif arg.startswith('-lack:'):
@@ -618,6 +619,7 @@
return False
return True
+
class StoredPage(pywikibot.Page):
"""
Store the Page contents on disk to avoid sucking too much
@@ -634,20 +636,20 @@
SPstore = None
# attributes created by pywikibot.Page.__init__
- SPcopy = [ '_editrestriction',
- '_site',
- '_namespace',
- '_section',
- '_title',
- 'editRestriction',
- 'moveRestriction',
- '_permalink',
- '_userName',
- '_ipedit',
- '_editTime',
- '_startTime',
- '_revisionId',
- '_deletedRevs' ]
+ SPcopy = ['_editrestriction',
+ '_site',
+ '_namespace',
+ '_section',
+ '_title',
+ 'editRestriction',
+ 'moveRestriction',
+ '_permalink',
+ '_userName',
+ '_ipedit',
+ '_editTime',
+ '_startTime',
+ '_revisionId',
+ '_deletedRevs']
def SPdeleteStore():
if StoredPage.SPpath:
@@ -685,6 +687,7 @@
_contents = property(SPgetContents, SPsetContents, SPdelContents)
+
class PageTree(object):
"""
Structure to manipulate a set of pages.
@@ -763,6 +766,7 @@
for page in plist:
yield page
+
class Subject(object):
"""
Class to follow the progress of a single 'subject' (i.e. a page with
@@ -813,9 +817,8 @@
done <- NL(pending) U done
return done
-
- Subject objects only operate on pages that should have been preloaded before.
- In fact, at any time:
+ Subject objects only operate on pages that should have been preloaded
+ before. In fact, at any time:
* todo contains new Pages that have not been loaded yet
* done contains Pages that have been loaded, and that have been treated.
* If batch preloadings are successful, Page._get() is never called from
@@ -848,7 +851,7 @@
# As we haven't yet found a page that links to the origin page, we
# start with an empty list for it.
if originPage:
- self.foundIn = {self.originPage:[]}
+ self.foundIn = {self.originPage: []}
else:
self.foundIn = {}
# This is a list of all pages that are currently scheduled for
@@ -876,7 +879,6 @@
for page in tree.filter(site):
if page.exists() and page.isDisambig():
return page
- return None
def getFoundNonDisambig(self, site):
"""
@@ -887,10 +889,9 @@
"""
for tree in [self.done, self.pending]:
for page in tree.filter(site):
- if page.exists() and not page.isDisambig() \
- and not page.isRedirectPage() and not page.isCategoryRedirect():
+ if page.exists() and not page.isDisambig() and \
+ not page.isRedirectPage() and not page.isCategoryRedirect():
return page
- return None
def getFoundInCorrectNamespace(self, site):
"""
@@ -901,25 +902,36 @@
"""
for tree in [self.done, self.pending, self.todo]:
for page in tree.filter(site):
- # -hintsonly: before we have an origin page, any namespace will do.
- if self.originPage and page.namespace() == self.originPage.namespace():
- if page.exists() and not page.isRedirectPage() and not page.isCategoryRedirect():
+ # -hintsonly: before we have an origin page, any namespace will
+ # do.
+ if self.originPage and \
+ page.namespace() == self.originPage.namespace():
+ if page.exists() and not \
+ page.isRedirectPage() and not page.isCategoryRedirect():
return page
- return None
- def translate(self, hints = None, keephintedsites = False):
+ def translate(self, hints=None, keephintedsites=False):
"""Add the given translation hints to the todo list"""
if globalvar.same and self.originPage:
if hints:
- pages = titletranslate.translate(self.originPage, hints = hints + ['all:'],
- auto = globalvar.auto, removebrackets = globalvar.hintnobracket)
+ pages = titletranslate.translate(
+ self.originPage,
+ hints=hints + ['all:'],
+ auto=globalvar.auto,
+ removebrackets=globalvar.hintnobracket)
else:
- pages = titletranslate.translate(self.originPage, hints = ['all:'],
- auto = globalvar.auto, removebrackets = globalvar.hintnobracket)
+ pages = titletranslate.translate(
+ self.originPage,
+ hints=['all:'],
+ auto=globalvar.auto,
+ removebrackets=globalvar.hintnobracket)
else:
- pages = titletranslate.translate(self.originPage, hints=hints,
- auto=globalvar.auto, removebrackets=globalvar.hintnobracket,
- site=pywikibot.getSite())
+ pages = titletranslate.translate(
+ self.originPage,
+ hints=hints,
+ auto=globalvar.auto,
+ removebrackets=globalvar.hintnobracket,
+ site=pywikibot.getSite())
for page in pages:
if globalvar.contentsondisk:
page = StoredPage(page)
@@ -946,7 +958,8 @@
# Bug-check: Isn't there any work still in progress? We can't work on
# different sites at a time!
if len(self.pending) > 0:
- raise 'BUG: Can\'t start to work on %s; still working on %s' % (site, self.pending)
+ raise "BUG: Can't start to work on %s; still working on %s" \
+ % (site, self.pending)
# Prepare a list of suitable pages
result = []
for page in self.todo.filter(site):
@@ -958,7 +971,7 @@
# If there are any, return them. Otherwise, nothing is in progress.
return result
- def makeForcedStop(self,counter):
+ def makeForcedStop(self, counter):
"""
Ends work on the page before the normal end.
"""
@@ -1018,26 +1031,33 @@
if linkedPage in self.foundIn:
# We have seen this page before, don't ask again.
return False
- elif self.originPage and self.originPage.namespace() != linkedPage.namespace():
+ elif self.originPage and \
+ self.originPage.namespace() != linkedPage.namespace():
# Allow for a mapping between different namespaces
- crossFrom = self.originPage.site.family.crossnamespace.get(self.originPage.namespace(), {})
- crossTo = crossFrom.get(self.originPage.site.language(), crossFrom.get('_default', {}))
- nsmatch = crossTo.get(linkedPage.site.language(), crossTo.get('_default', []))
+ crossFrom = self.originPage.site.family.crossnamespace.get(
+ self.originPage.namespace(), {})
+ crossTo = crossFrom.get(self.originPage.site.language(),
+ crossFrom.get('_default', {}))
+ nsmatch = crossTo.get(linkedPage.site.language(),
+ crossTo.get('_default', []))
if linkedPage.namespace() in nsmatch:
return False
if globalvar.autonomous:
- pywikibot.output(u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i."
- % (linkingPage, linkingPage.namespace(),
- linkedPage, linkedPage.namespace()))
+ pywikibot.output(
+u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i."
+ % (linkingPage, linkingPage.namespace(), linkedPage,
+ linkedPage.namespace()))
# Fill up foundIn, so that we will not write this notice
self.foundIn[linkedPage] = [linkingPage]
return True
else:
preferredPage = self.getFoundInCorrectNamespace(linkedPage.site)
if preferredPage:
- pywikibot.output(u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i because page %s in the correct namespace has already been found."
- % (linkingPage, linkingPage.namespace(), linkedPage,
- linkedPage.namespace(), preferredPage))
+ pywikibot.output(
+u"NOTE: Ignoring link from page %s in namespace %i to page %s in namespace %i "
+u"because page %s in the correct namespace has already been found."
+ % (linkingPage, linkingPage.namespace(), linkedPage,
+ linkedPage.namespace(), preferredPage))
return True
else:
choice = pywikibot.inputChoice(
@@ -1052,13 +1072,17 @@
if choice == 'g':
self.makeForcedStop(counter)
elif choice == 'a':
- newHint = pywikibot.input(u'Give the alternative for language %s, not using a language code:'
- % linkedPage.site.language())
+ newHint = pywikibot.input(
+ u'Give the alternative for language %s, not '
+ u'using a language code:'
+ % linkedPage.site.language())
if newHint:
- alternativePage = pywikibot.Page(linkedPage.site, newHint)
+ alternativePage = pywikibot.Page(
+ linkedPage.site, newHint)
if alternativePage:
# add the page that was entered by the user
- self.addIfNew(alternativePage, counter, None)
+ self.addIfNew(alternativePage, counter,
+ None)
else:
pywikibot.output(
u"NOTE: ignoring %s and its interwiki links"
@@ -1070,14 +1094,18 @@
return False
def wiktionaryMismatch(self, page):
- if self.originPage and globalvar.same=='wiktionary':
+ if self.originPage and globalvar.same == 'wiktionary':
if page.title().lower() != self.originPage.title().lower():
- pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode" % (page, self.originPage))
- return True
- elif page.title() != self.originPage.title() and self.originPage.site.nocapitalize and page.site.nocapitalize:
- pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode because both languages are uncapitalized."
+ pywikibot.output(u"NOTE: Ignoring %s for %s in wiktionary mode"
% (page, self.originPage))
return True
+ elif page.title() != self.originPage.title() and \
+ self.originPage.site.nocapitalize and page.site.nocapitalize:
+ pywikibot.output(
+ u"NOTE: Ignoring %s for %s in wiktionary mode because both "
+ u"languages are uncapitalized."
+ % (page, self.originPage))
+ return True
return False
def disambigMismatch(self, page, counter):
@@ -1095,15 +1123,17 @@
chosen to use instead of the given page.
"""
if not self.originPage:
- return (False, None) # any page matches until we have an origin page
+ return (False, None) # any page matches til we have an origin page
if globalvar.autonomous:
if self.originPage.isDisambig() and not page.isDisambig():
- pywikibot.output(u"NOTE: Ignoring link from disambiguation page %s to non-disambiguation %s"
- % (self.originPage, page))
+ pywikibot.output(
+ u"NOTE: Ignoring link from disambiguation page %s to "
+ u"non-disambiguation %s" % (self.originPage, page))
return (True, None)
elif not self.originPage.isDisambig() and page.isDisambig():
- pywikibot.output(u"NOTE: Ignoring link from non-disambiguation page %s to disambiguation %s"
- % (self.originPage, page))
+ pywikibot.output(
+ u"NOTE: Ignoring link from non-disambiguation page %s to "
+ u"disambiguation %s" % (self.originPage, page))
return (True, None)
else:
choice = 'y'
@@ -1111,32 +1141,39 @@
disambig = self.getFoundDisambig(page.site)
if disambig:
pywikibot.output(
- u"NOTE: Ignoring non-disambiguation page %s for %s because disambiguation page %s has already been found."
+ u"NOTE: Ignoring non-disambiguation page %s for %s "
+ u"because disambiguation page %s has already been "
+ u"found."
% (page, self.originPage, disambig))
return (True, None)
else:
choice = pywikibot.inputChoice(
- u'WARNING: %s is a disambiguation page, but %s doesn\'t seem to be one. Follow it anyway?'
+ u"WARNING: %s is a disambiguation page, but %s doesn't "
+ u"seem to be one. Follow it anyway?"
% (self.originPage, page),
['Yes', 'No', 'Add an alternative', 'Give up'],
['y', 'n', 'a', 'g'])
elif not self.originPage.isDisambig() and page.isDisambig():
nondisambig = self.getFoundNonDisambig(page.site)
if nondisambig:
- pywikibot.output(u"NOTE: Ignoring disambiguation page %s for %s because non-disambiguation page %s has already been found."
- % (page, self.originPage, nondisambig))
+ pywikibot.output(
+ u"NOTE: Ignoring disambiguation page %s for %s because "
+ u"non-disambiguation page %s has already been found."
+ % (page, self.originPage, nondisambig))
return (True, None)
else:
choice = pywikibot.inputChoice(
- u'WARNING: %s doesn\'t seem to be a disambiguation page, but %s is one. Follow it anyway?'
+ u'WARNING: %s doesn\'t seem to be a disambiguation '
+ u'page, but %s is one. Follow it anyway?'
% (self.originPage, page),
['Yes', 'No', 'Add an alternative', 'Give up'],
['y', 'n', 'a', 'g'])
if choice == 'n':
return (True, None)
elif choice == 'a':
- newHint = pywikibot.input(u'Give the alternative for language %s, not using a language code:'
- % page.site.language())
+ newHint = pywikibot.input(
+ u'Give the alternative for language %s, not using a '
+ u'language code:' % page.site.language())
alternativePage = pywikibot.Page(page.site, newHint)
return (True, alternativePage)
elif choice == 'g':
@@ -1171,7 +1208,8 @@
return
if (self.untranslated or globalvar.askhints) and not self.hintsAsked \
and self.originPage and self.originPage.exists() \
- and not self.originPage.isRedirectPage() and not self.originPage.isCategoryRedirect():
+ and not self.originPage.isRedirectPage() and \
+ not self.originPage.isCategoryRedirect():
# Only once!
self.hintsAsked = True
if globalvar.untranslated:
@@ -1181,17 +1219,24 @@
pywikibot.output(self.originPage.get()[:t])
# loop
while True:
- newhint = pywikibot.input(u'Give a hint (? to see pagetext):')
+ newhint = pywikibot.input(
+ u'Give a hint (? to see pagetext):')
if newhint == '?':
t += globalvar.showtextlinkadd
pywikibot.output(self.originPage.get()[:t])
elif newhint and not ':' in newhint:
- pywikibot.output(u'Please enter a hint in the format language:pagename or type nothing if you do not have a hint.')
+ pywikibot.output(
+ u'Please enter a hint in the format '
+ u'language:pagename or type nothing if you do not '
+ u'have a hint.')
elif not newhint:
break
else:
- pages = titletranslate.translate(self.originPage, hints=[newhint],
- auto = globalvar.auto, removebrackets=globalvar.hintnobracket)
+ pages = titletranslate.translate(
+ self.originPage,
+ hints=[newhint],
+ auto=globalvar.auto,
+ removebrackets=globalvar.hintnobracket)
for page in pages:
self.addIfNew(page, counter, None)
if globalvar.hintsareright:
@@ -1204,14 +1249,14 @@
In other words, all the pages in self.pending have already
been preloaded.
- The only argument is an instance
- of a counter class, that has methods minus() and plus() to keep
- counts of the total work todo.
+ The only argument is an instance of a counter class, that has methods
+ minus() and plus() to keep counts of the total work todo.
+
"""
# Loop over all the pages that should have been taken care of
for page in self.pending:
- if page.title == None: ### seems a DataPage
- page.get() ### get it's title (and content)
+ if page.title == None: ### seems a DataPage
+ page.get() ### get it's title (and content)
# Mark the page as done
self.done.add(page)
@@ -1220,9 +1265,11 @@
dictName, year = page.autoFormat()
if dictName is not None:
if self.originPage:
- pywikibot.output(u'WARNING: %s:%s relates to %s:%s, which is an auto entry %s(%s)'
- % (self.originPage.site.language(), self.originPage,
- page.site.language(), page, dictName, year))
+ pywikibot.output(
+ u'WARNING: %s:%s relates to %s:%s, which is an '
+ u'auto entry %s(%s)'
+ % (self.originPage.site.language(), self.originPage,
+ page.site.language(), page, dictName, year))
# Abort processing if the bot is running in autonomous mode.
if globalvar.autonomous:
@@ -1240,12 +1287,13 @@
pywikibot.output(u"NOTE: %s does not exist. Skipping."
% page)
if page == self.originPage:
- # The page we are working on is the page that does not exist.
- # No use in doing any work on it in that case.
+ # The page we are working on is the page that does not
+ # exist. No use in doing any work on it in that case.
for site, count in self.todo.siteCounts():
counter.minus(site, count)
self.todo = PageTree()
- # In some rare cases it might be we already did check some 'automatic' links
+ # In some rare cases it might be we already did check some
+ # 'automatic' links
self.done = PageTree()
continue
@@ -1270,7 +1318,8 @@
pywikibot.output(u"NOTE: %s is %sredirect to %s"
% (page, redir, redirectTargetPage))
if self.originPage is None or page == self.originPage:
- # the 1st existig page becomes the origin page, if none was supplied
+ # the 1st existig page becomes the origin page, if none was
+ # supplied
if globalvar.initialredirect:
if globalvar.contentsondisk:
redirectTargetPage = StoredPage(redirectTargetPage)
@@ -1281,8 +1330,8 @@
self.todo.add(redirectTargetPage)
counter.plus(redirectTargetPage.site)
else:
- # This is a redirect page to the origin. We don't need to
- # follow the redirection.
+ # This is a redirect page to the origin. We don't need
+ # to follow the redirection.
# In this case we can also stop all hints!
for site, count in self.todo.siteCounts():
counter.minus(site, count)
@@ -1326,7 +1375,8 @@
# Page exists, isnt a redirect, and is a plain link (no section)
if self.originPage is None:
- # the 1st existig page becomes the origin page, if none was supplied
+ # the 1st existig page becomes the origin page, if none was
+ # supplied
self.originPage = page
try:
iw = page.interwiki()
@@ -1348,7 +1398,8 @@
duplicate = None
for p in self.done.filter(page.site):
- if p != page and p.exists() and not p.isRedirectPage() and not p.isCategoryRedirect():
+ if p != page and p.exists() and \
+ not p.isRedirectPage() and not p.isCategoryRedirect():
duplicate = p
break
@@ -1358,7 +1409,8 @@
# Ignore the interwiki links.
iw = ()
if globalvar.lacklanguage:
- if globalvar.lacklanguage in [link.site.language() for link in iw]:
+ if globalvar.lacklanguage in [link.site.language()
+ for link in iw]:
iw = ()
self.workonme = False
if len(iw) < globalvar.minlinks:
@@ -1366,8 +1418,9 @@
self.workonme = False
elif globalvar.autonomous and duplicate and not skip:
- pywikibot.output(u"Stopping work on %s because duplicate pages"\
- " %s and %s are found" % (self.originPage, duplicate, page))
+ pywikibot.output(u"Stopping work on %s because duplicate pages"
+ " %s and %s are found"
+ % (self.originPage, duplicate, page))
self.makeForcedStop(counter)
try:
f = codecs.open(
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11159
Revision: 11159
Author: xqt
Date: 2013-03-02 13:57:27 +0000 (Sat, 02 Mar 2013)
Log Message:
-----------
some docs from rewrite
Modified Paths:
--------------
trunk/pywikipedia/userinterfaces/terminal_interface_base.py
Modified: trunk/pywikipedia/userinterfaces/terminal_interface_base.py
===================================================================
--- trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2013-03-02 13:56:43 UTC (rev 11158)
+++ trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2013-03-02 13:57:27 UTC (rev 11159)
@@ -121,11 +121,13 @@
def input(self, question, password = False):
"""
+ Ask the user a question and return the answer.
+
Works like raw_input(), but returns a unicode string instead of ASCII.
Unlike raw_input, this function automatically adds a space after the
question.
-
+
"""
# sound the terminal bell to notify the user
@@ -142,6 +144,9 @@
return text
def inputChoice(self, question, options, hotkeys, default=None):
+ """
+ Ask the user a question with a predefined list of acceptable answers.
+ """
options = options[:] # we don't want to edit the passed parameter
for i in range(len(options)):
option = options[i]
@@ -164,18 +169,19 @@
answer = self.input(prompt)
if answer.lower() in hotkeys or answer.upper() in hotkeys:
return answer
- elif default and answer=='': # empty string entered
+ elif default and answer=='': # empty string entered
return default
def editText(self, text, jumpIndex=None, highlight=None):
- """
+ """Return the text as edited by the user.
+
Uses a Tkinter edit box because we don't have a console editor
Parameters:
* text - a Unicode string
* jumpIndex - an integer: position at which to put the caret
* highlight - a substring; each occurence will be highlighted
-
+
"""
try:
import gui
@@ -186,6 +192,7 @@
return editor.edit(text, jumpIndex=jumpIndex, highlight=highlight)
def askForCaptcha(self, url):
+ """Show the user a CAPTCHA image and return the answer."""
try:
import webbrowser
wikipedia.output(u'Opening CAPTCHA in your web browser...')
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11157
Revision: 11157
Author: xqt
Date: 2013-03-02 13:46:19 +0000 (Sat, 02 Mar 2013)
Log Message:
-----------
some PEP8 changes
Modified Paths:
--------------
branches/rewrite/scripts/basic.py
Modified: branches/rewrite/scripts/basic.py
===================================================================
--- branches/rewrite/scripts/basic.py 2013-03-02 12:57:54 UTC (rev 11156)
+++ branches/rewrite/scripts/basic.py 2013-03-02 13:46:19 UTC (rev 11157)
@@ -33,6 +33,7 @@
'¶ms;': pagegenerators.parameterHelp
}
+
class BasicBot:
# Edit summary message that should be used is placed on /i18n subdirectory.
# The file containing these messages should have the same name as the caller
@@ -103,7 +104,7 @@
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
- pywikibot.output(u'Comment: %s' %comment)
+ pywikibot.output(u'Comment: %s' % comment)
if not self.dry:
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11156
Revision: 11156
Author: xqt
Date: 2013-03-02 12:57:54 +0000 (Sat, 02 Mar 2013)
Log Message:
-----------
some PEP8 changes
Modified Paths:
--------------
trunk/pywikipedia/BeautifulSoup.py
Modified: trunk/pywikipedia/BeautifulSoup.py
===================================================================
--- trunk/pywikipedia/BeautifulSoup.py 2013-03-02 10:39:02 UTC (rev 11155)
+++ trunk/pywikipedia/BeautifulSoup.py 2013-03-02 12:57:54 UTC (rev 11156)
@@ -89,9 +89,9 @@
import re
import sgmllib
try:
- from htmlentitydefs import name2codepoint
+ from htmlentitydefs import name2codepoint
except ImportError:
- name2codepoint = {}
+ name2codepoint = {}
try:
set
except NameError:
@@ -103,12 +103,13 @@
DEFAULT_OUTPUT_ENCODING = "utf-8"
+
def _match_css_class(str):
"""Build a RE to match the given CSS class."""
return re.compile(r"(^|.*\s)%s($|\s)" % str)
+
# First, the classes that represent markup elements.
-
class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
@@ -128,8 +129,8 @@
def replaceWith(self, replaceWith):
oldParent = self.parent
myIndex = self.parent.index(self)
- if hasattr(replaceWith, "parent")\
- and replaceWith.parent is self.parent:
+ if hasattr(replaceWith, "parent") and \
+ replaceWith.parent is self.parent:
# We're replacing this element with one of its siblings.
index = replaceWith.parent.index(replaceWith)
if index and index < myIndex:
@@ -186,11 +187,11 @@
return lastChild
def insert(self, position, newChild):
- if isinstance(newChild, basestring) \
- and not isinstance(newChild, NavigableString):
+ if isinstance(newChild, basestring) and not \
+ isinstance(newChild, NavigableString):
newChild = NavigableString(newChild)
- position = min(position, len(self.contents))
+ position = min(position, len(self.contents))
if hasattr(newChild, 'parent') and newChild.parent is not None:
# We're 'inserting' an element that's already one
# of this object's children.
@@ -227,7 +228,7 @@
while not parentsNextSibling:
parentsNextSibling = parent.nextSibling
parent = parent.parent
- if not parent: # This is the last element in the document.
+ if not parent: # This is the last element in the document.
break
if parentsNextSibling:
newChildsLastElement.next = parentsNextSibling
@@ -272,8 +273,9 @@
criteria and appear after this Tag in the document."""
return self._findAll(name, attrs, text, limit,
self.nextSiblingGenerator, **kwargs)
- fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
+ fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x
+
def findPrevious(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the first item that matches the given criteria and
appears before this Tag in the document."""
@@ -284,8 +286,8 @@
"""Returns all items that match the given criteria and appear
before this Tag in the document."""
return self._findAll(name, attrs, text, limit, self.previousGenerator,
- **kwargs)
- fetchPrevious = findAllPrevious # Compatibility with pre-3.x
+ **kwargs)
+ fetchPrevious = findAllPrevious # Compatibility with pre-3.x
def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the closest sibling to this Tag that matches the
@@ -299,7 +301,7 @@
criteria and appear before this Tag in the document."""
return self._findAll(name, attrs, text, limit,
self.previousSiblingGenerator, **kwargs)
- fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
+ fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x
def findParent(self, name=None, attrs={}, **kwargs):
"""Returns the closest parent of this Tag that matches the given
@@ -318,8 +320,9 @@
return self._findAll(name, attrs, None, limit, self.parentGenerator,
**kwargs)
- fetchParents = findParents # Compatibility with pre-3.x
+ fetchParents = findParents # Compatibility with pre-3.x
+
#These methods do the real heavy lifting.
def _findOne(self, method, name, attrs, text, **kwargs):
@@ -415,11 +418,12 @@
s = unicode(s)
else:
if encoding:
- s = self.toEncoding(str(s), encoding)
+ s = self.toEncoding(str(s), encoding)
else:
s = unicode(s)
return s
+
class NavigableString(unicode, PageElement):
def __new__(cls, value):
@@ -444,7 +448,8 @@
if attr == 'string':
return self
else:
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
+ raise AttributeError("'%s' object has no attribute '%s'"
+ % (self.__class__.__name__, attr))
def __unicode__(self):
return str(self).decode(DEFAULT_OUTPUT_ENCODING)
@@ -455,11 +460,13 @@
else:
return self
+
class CData(NavigableString):
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
return "<![CDATA[%s]]>" % NavigableString.__str__(self, encoding)
+
class ProcessingInstruction(NavigableString):
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
output = self
@@ -467,14 +474,17 @@
output = self.substituteEncoding(output, encoding)
return "<?%s?>" % self.toEncoding(output, encoding)
+
class Comment(NavigableString):
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
return "<!--%s-->" % NavigableString.__str__(self, encoding)
+
class Declaration(NavigableString):
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
return "<!%s>" % NavigableString.__str__(self, encoding)
+
class Tag(PageElement):
"""Represents a found HTML tag with its attributes and contents."""
@@ -482,15 +492,15 @@
def _invert(h):
"Cheap function to invert a hash."
i = {}
- for k,v in h.items():
+ for k, v in h.items():
i[v] = k
return i
- XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
- "quot" : '"',
- "amp" : "&",
- "lt" : "<",
- "gt" : ">" }
+ XML_ENTITIES_TO_SPECIAL_CHARS = {"apos": "'",
+ "quot": '"',
+ "amp": "&",
+ "lt": "<",
+ "gt": ">"}
XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
@@ -549,8 +559,8 @@
self.attrs = map(convert, self.attrs)
def getString(self):
- if (len(self.contents) == 1
- and isinstance(self.contents[0], NavigableString)):
+ if (len(self.contents) == 1 and isinstance(self.contents[0],
+ NavigableString)):
return self.contents[0]
def setString(self, string):
@@ -592,7 +602,7 @@
raise ValueError("Tag.index: element not in tag")
def has_key(self, key):
- return self._getAttrMap().has_key(key)
+ return key in self._getAttrMap()
def __getitem__(self, key):
"""tag[key] returns the value of the 'key' attribute for the tag,
@@ -636,7 +646,7 @@
#We don't break because bad HTML can define the same
#attribute multiple times.
self._getAttrMap()
- if self.attrMap.has_key(key):
+ if key in self.attrMap:
del self.attrMap[key]
def __call__(self, *args, **kwargs):
@@ -651,7 +661,8 @@
return self.find(tag[:-3])
elif tag.find('__') != 0:
return self.find(tag)
- raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
+ raise AttributeError("'%s' object has no attribute '%s'"
+ % (self.__class__, tag))
def __eq__(self, other):
"""Returns true iff this tag has the same name, the same attributes,
@@ -661,7 +672,9 @@
same attributes in a different order. Should this be fixed?"""
if other is self:
return True
- if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other):
+ if not hasattr(other, 'name') or not hasattr(other, 'attrs') or \
+ not hasattr(other, 'contents') or self.name != other.name or \
+ self.attrs != other.attrs or len(self) != len(other):
return False
for i in range(0, len(self.contents)):
if self.contents[i] != other.contents[i]:
@@ -734,7 +747,8 @@
# value might also contain angle brackets, or
# ampersands that aren't part of entities. We need
# to escape those to XML entities too.
- val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val)
+ val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity,
+ val)
attrs.append(fmt % (self.toEncoding(key, encoding),
self.toEncoding(val, encoding)))
@@ -798,7 +812,7 @@
prettyPrint=False, indentLevel=0):
"""Renders the contents of this tag as a string in the given
encoding. If encoding is None, returns a Unicode string.."""
- s=[]
+ s = []
for c in self:
text = None
if isinstance(c, NavigableString):
@@ -912,13 +926,13 @@
if isinstance(markupName, Tag):
markup = markupName
markupAttrs = markup
- callFunctionWithTagData = callable(self.name) \
- and not isinstance(markupName, Tag)
+ callFunctionWithTagData = callable(self.name) and \
+ not isinstance(markupName, Tag)
if (not self.name) \
- or callFunctionWithTagData \
- or (markup and self._matches(markup, self.name)) \
- or (not markup and self._matches(markupName, self.name)):
+ or callFunctionWithTagData \
+ or (markup and self._matches(markup, self.name)) \
+ or (not markup and self._matches(markupName, self.name)):
if callFunctionWithTagData:
match = self.name(markupName, markupAttrs)
else:
@@ -926,11 +940,11 @@
markupAttrMap = None
for attr, matchAgainst in self.attrs.items():
if not markupAttrMap:
- if hasattr(markupAttrs, 'get'):
+ if hasattr(markupAttrs, 'get'):
markupAttrMap = markupAttrs
- else:
+ else:
markupAttrMap = {}
- for k,v in markupAttrs:
+ for k, v in markupAttrs:
markupAttrMap[k] = v
attrValue = markupAttrMap.get(attr)
if not self._matches(attrValue, matchAgainst):
@@ -948,11 +962,10 @@
found = None
# If given a list of items, scan it for a text element that
# matches.
- if hasattr(markup, "__iter__") \
- and not isinstance(markup, Tag):
+ if hasattr(markup, "__iter__") and not isinstance(markup, Tag):
for element in markup:
- if isinstance(element, NavigableString) \
- and self.search(element):
+ if isinstance(element, NavigableString) and \
+ self.search(element):
found = element
break
# If it's a Tag, make sure its name or attributes match.
@@ -961,13 +974,13 @@
if not self.text:
found = self.searchTag(markup)
# If it's text, make sure the text matches.
- elif isinstance(markup, NavigableString) or \
- isinstance(markup, basestring):
+ elif isinstance(markup, NavigableString) or isinstance(markup,
+ basestring):
if self._matches(markup, self.text):
found = markup
else:
- raise Exception, "I don't know how to match against a %s" \
- % markup.__class__
+ raise Exception("I don't know how to match against a %s"
+ % markup.__class__)
return found
def _matches(self, markup, matchAgainst):
@@ -988,10 +1001,10 @@
if hasattr(matchAgainst, 'match'):
# It's a regexp object.
result = markup and matchAgainst.search(markup)
- elif hasattr(matchAgainst, '__iter__'): # list-like
+ elif hasattr(matchAgainst, '__iter__'): # list-like
result = markup in matchAgainst
elif hasattr(matchAgainst, 'items'):
- result = markup.has_key(matchAgainst)
+ result = matchAgainst in markup
elif matchAgainst and isinstance(markup, basestring):
if isinstance(markup, unicode):
matchAgainst = unicode(matchAgainst)
@@ -1002,6 +1015,7 @@
result = matchAgainst == markup
return result
+
class ResultSet(list):
"""A ResultSet is just a list that keeps track of the SoupStrainer
that created it."""
@@ -1009,6 +1023,7 @@
list.__init__([])
self.source = source
+
# Now, some helper functions.
def buildTagMap(default, *args):
@@ -1019,9 +1034,9 @@
for portion in args:
if hasattr(portion, 'items'):
#It's a map. Merge it.
- for k,v in portion.items():
+ for k, v in portion.items():
built[k] = v
- elif hasattr(portion, '__iter__'): # is a list
+ elif hasattr(portion, '__iter__'): # is a list
#It's a list. Map each item to the default.
for k in portion:
built[k] = default
@@ -1030,6 +1045,7 @@
built[portion] = default
return built
+
# Now, the parser classes.
class BeautifulStoneSoup(Tag, SGMLParser):
@@ -1074,7 +1090,7 @@
# can be replaced with a single space. A text node that contains
# fancy Unicode spaces (usually non-breaking) should be left
# alone.
- STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, }
+ STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None,
markupMassage=True, smartQuotesTo=XML_ENTITIES,
@@ -1151,7 +1167,7 @@
n = int(name)
except ValueError:
return
- if not 0 <= n <= 127 : # ASCII ends at 127, not 255
+ if not 0 <= n <= 127: # ASCII ends at 127, not 255
return
return self.convert_codepoint(n)
@@ -1162,9 +1178,10 @@
if not hasattr(self, 'originalEncoding'):
self.originalEncoding = None
else:
- dammit = UnicodeDammit\
- (markup, [self.fromEncoding, inDocumentEncoding],
- smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
+ dammit = UnicodeDammit(markup,
+ [self.fromEncoding, inDocumentEncoding],
+ smartQuotesTo=self.smartQuotesTo,
+ isHTML=isHTML)
markup = dammit.unicode
self.originalEncoding = dammit.originalEncoding
self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
@@ -1194,7 +1211,7 @@
#print "__getattr__ called on %s.%s" % (self.__class__, methodName)
if methodName.startswith('start_') or methodName.startswith('end_') \
- or methodName.startswith('do_'):
+ or methodName.startswith('do_'):
return SGMLParser.__getattr__(self, methodName)
elif not methodName.startswith('__'):
return Tag.__getattr__(self, methodName)
@@ -1204,8 +1221,8 @@
def isSelfClosingTag(self, name):
"""Returns true iff the given string is the name of a
self-closing tag according to this parser."""
- return self.SELF_CLOSING_TAGS.has_key(name) \
- or self.instanceSelfClosingTags.has_key(name)
+ return name in self.SELF_CLOSING_TAGS or \
+ name in self.instanceSelfClosingTags
def reset(self):
Tag.__init__(self, self, self.ROOT_TAG_NAME)
@@ -1244,8 +1261,8 @@
currentData = ' '
self.currentData = []
if self.parseOnlyThese and len(self.tagStack) <= 1 and \
- (not self.parseOnlyThese.text or \
- not self.parseOnlyThese.search(currentData)):
+ (not self.parseOnlyThese.text or not
+ self.parseOnlyThese.search(currentData)):
return
o = containerClass(currentData)
o.setup(self.currentTag, self.previous)
@@ -1254,7 +1271,6 @@
self.previous = o
self.currentTag.contents.append(o)
-
def _popToTag(self, name, inclusivePop=True):
"""Pops the tag stack up to and including the most recent
instance of the given tag. If inclusivePop is false, pops the tag
@@ -1296,8 +1312,8 @@
"""
nestingResetTriggers = self.NESTABLE_TAGS.get(name)
- isNestable = nestingResetTriggers != None
- isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
+ isNestable = nestingResetTriggers is not None
+ isResetNesting = name in self.RESET_NESTING_TAGS
popTo = None
inclusive = True
for i in range(len(self.tagStack)-1, 0, -1):
@@ -1310,7 +1326,7 @@
if (nestingResetTriggers is not None
and p.name in nestingResetTriggers) \
or (nestingResetTriggers is None and isResetNesting
- and self.RESET_NESTING_TAGS.has_key(p.name)):
+ and p.name in self.RESET_NESTING_TAGS):
#If we encounter one of the nesting reset triggers
#peculiar to this tag, or we encounter another tag
@@ -1337,7 +1353,8 @@
self._smartPop(name)
if self.parseOnlyThese and len(self.tagStack) <= 1 \
- and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)):
+ and (self.parseOnlyThese.text or
+ not self.parseOnlyThese.searchTag(name, attrs)):
return
tag = Tag(self, name, attrs, self.currentTag, self.previous)
@@ -1411,7 +1428,7 @@
data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref)
if not data and self.convertHTMLEntities and \
- not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
+ not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref):
# TODO: We've got a problem here. We're told this is
# an entity reference, but it's not an XML entity
# reference or an HTML entity reference. Nonetheless,
@@ -1448,12 +1465,12 @@
declaration as a CData object."""
j = None
if self.rawdata[i:i+9] == '<![CDATA[':
- k = self.rawdata.find(']]>', i)
- if k == -1:
- k = len(self.rawdata)
- data = self.rawdata[i+9:k]
- j = k+3
- self._toStringSubclass(data, CData)
+ k = self.rawdata.find(']]>', i)
+ if k == -1:
+ k = len(self.rawdata)
+ data = self.rawdata[i+9:k]
+ j = k + 3
+ self._toStringSubclass(data, CData)
else:
try:
j = SGMLParser.parse_declaration(self, i)
@@ -1463,6 +1480,7 @@
j = i + len(toHandle)
return j
+
class BeautifulSoup(BeautifulStoneSoup):
"""This parser knows the following facts about HTML:
@@ -1512,18 +1530,18 @@
BeautifulStoneSoup before writing your own subclass."""
def __init__(self, *args, **kwargs):
- if not kwargs.has_key('smartQuotesTo'):
+ if not 'smartQuotesTo' in kwargs:
kwargs['smartQuotesTo'] = self.HTML_ENTITIES
kwargs['isHTML'] = True
BeautifulStoneSoup.__init__(self, *args, **kwargs)
SELF_CLOSING_TAGS = buildTagMap(None,
- ('br' , 'hr', 'input', 'img', 'meta',
+ ('br', 'hr', 'input', 'img', 'meta',
'spacer', 'link', 'frame', 'base', 'col'))
PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
- QUOTE_TAGS = {'script' : None, 'textarea' : None}
+ QUOTE_TAGS = {'script': None, 'textarea': None}
#According to the HTML standard, each of these inline tags can
#contain another tag of the same type. Furthermore, it's common
@@ -1537,21 +1555,21 @@
NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del')
#Lists can contain other lists, but there are restrictions.
- NESTABLE_LIST_TAGS = { 'ol' : [],
- 'ul' : [],
- 'li' : ['ul', 'ol'],
- 'dl' : [],
- 'dd' : ['dl'],
- 'dt' : ['dl'] }
+ NESTABLE_LIST_TAGS = {'ol': [],
+ 'ul': [],
+ 'li': ['ul', 'ol'],
+ 'dl': [],
+ 'dd': ['dl'],
+ 'dt': ['dl']}
#Tables can contain other tables, but there are restrictions.
- NESTABLE_TABLE_TAGS = {'table' : [],
- 'tr' : ['table', 'tbody', 'tfoot', 'thead'],
- 'td' : ['tr'],
- 'th' : ['tr'],
- 'thead' : ['table'],
- 'tbody' : ['table'],
- 'tfoot' : ['table'],
+ NESTABLE_TABLE_TAGS = {'table': [],
+ 'tr': ['table', 'tbody', 'tfoot', 'thead'],
+ 'td': ['tr'],
+ 'th': ['tr'],
+ 'thead': ['table'],
+ 'tbody': ['table'],
+ 'tfoot': ['table'],
}
NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre')
@@ -1587,11 +1605,11 @@
contentType = value
contentTypeIndex = i
- if httpEquiv and contentType: # It's an interesting meta tag.
+ if httpEquiv and contentType: # It's an interesting meta tag.
match = self.CHARSET_RE.search(contentType)
if match:
if (self.declaredHTMLEncoding is not None or
- self.originalEncoding == self.fromEncoding):
+ self.originalEncoding == self.fromEncoding):
# An HTML encoding was sniffed while converting
# the document to Unicode, or an HTML encoding was
# sniffed during a previous pass through the
@@ -1616,9 +1634,11 @@
if tag and tagNeedsEncodingSubstitution:
tag.containsSubstitutions = True
+
class StopParsing(Exception):
pass
+
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
@@ -1644,10 +1664,10 @@
it's valid HTML and BeautifulSoup screwed up by assuming it
wouldn't be."""
- I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \
- ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong',
- 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b',
- 'big')
+ I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = (
+ 'em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', 'cite',
+ 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', 'big'
+ )
I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',)
@@ -1655,6 +1675,7 @@
I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS,
I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS)
+
class MinimalSoup(BeautifulSoup):
"""The MinimalSoup class is for parsing HTML that contains
pathologically bad markup. It makes no assumptions about tag
@@ -1668,6 +1689,7 @@
RESET_NESTING_TAGS = buildTagMap('noscript')
NESTABLE_TAGS = {}
+
class BeautifulSOAP(BeautifulStoneSoup):
"""This class will push a tag with only a single string child into
the tag's parent as an attribute. The attribute's name is the tag
@@ -1695,10 +1717,11 @@
parent._getAttrMap()
if (isinstance(tag, Tag) and len(tag.contents) == 1 and
isinstance(tag.contents[0], NavigableString) and
- not parent.attrMap.has_key(tag.name)):
+ not tag.name in parent.attrMap):
parent[tag.name] = tag.contents[0]
BeautifulStoneSoup.popTag(self)
+
#Enterprise class names! It has come to our attention that some people
#think the names of the Beautiful Soup parser classes are too silly
#and "unprofessional" for use in enterprise screen-scraping. We feel
@@ -1749,6 +1772,7 @@
except ImportError:
pass
+
class UnicodeDammit:
"""A class for detecting the encoding of a *ML document and
converting it to a Unicode string. If the source encoding is
@@ -1759,14 +1783,14 @@
# meta tags to the corresponding Python codec names. It only covers
# values that aren't in Python's aliases and can't be determined
# by the heuristics in find_codec.
- CHARSET_ALIASES = { "macintosh" : "mac-roman",
- "x-sjis" : "shift-jis" }
+ CHARSET_ALIASES = {"macintosh": "mac-roman",
+ "x-sjis": "shift-jis"}
def __init__(self, markup, overrideEncodings=[],
smartQuotesTo='xml', isHTML=False):
self.declaredHTMLEncoding = None
self.markup, documentEncoding, sniffedEncoding = \
- self._detectEncoding(markup, isHTML)
+ self._detectEncoding(markup, isHTML)
self.smartQuotesTo = smartQuotesTo
self.triedEncodings = []
if markup == '' or isinstance(markup, unicode):
@@ -1819,9 +1843,8 @@
if self.smartQuotesTo and proposed.lower() in("windows-1252",
"iso-8859-1",
"iso-8859-2"):
- markup = re.compile("([\x80-\x9f])").sub \
- (lambda(x): self._subMSChar(x.group(1)),
- markup)
+ markup = re.compile("([\x80-\x9f])").sub(
+ lambda(x): self._subMSChar(x.group(1)), markup)
try:
# print "Trying to convert document to %s" % proposed
@@ -1841,11 +1864,11 @@
# strip Byte Order Mark (if present)
if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
- and (data[2:4] != '\x00\x00'):
+ and (data[2:4] != '\x00\x00'):
encoding = 'utf-16be'
data = data[2:]
- elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
- and (data[2:4] != '\x00\x00'):
+ elif (len(data) >= 4) and \
+ (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'):
encoding = 'utf-16le'
data = data[2:]
elif data[:3] == '\xef\xbb\xbf':
@@ -1871,8 +1894,8 @@
# UTF-16BE
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
- elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
- and (xml_data[2:4] != '\x00\x00'):
+ elif (len(xml_data) >= 4) and \
+ (xml_data[:2] == '\xfe\xff') and (xml_data[2:4] != '\x00\x00'):
# UTF-16BE with BOM
sniffed_xml_encoding = 'utf-16be'
xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
@@ -1881,7 +1904,7 @@
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
- (xml_data[2:4] != '\x00\x00'):
+ (xml_data[2:4] != '\x00\x00'):
# UTF-16LE with BOM
sniffed_xml_encoding = 'utf-16le'
xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
@@ -1927,7 +1950,6 @@
xml_encoding = sniffed_xml_encoding
return xml_data, xml_encoding, sniffed_xml_encoding
-
def find_codec(self, charset):
return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
or (charset and self._codec(charset.replace("-", ""))) \
@@ -1945,63 +1967,70 @@
return codec
EBCDIC_TO_ASCII_MAP = None
+
def _ebcdic_to_ascii(self, s):
c = self.__class__
if not c.EBCDIC_TO_ASCII_MAP:
- emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
- 16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
- 128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
- 144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
- 32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
- 38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
- 45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
- 186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
- 195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
- 201,202,106,107,108,109,110,111,112,113,114,203,204,205,
- 206,207,208,209,126,115,116,117,118,119,120,121,122,210,
- 211,212,213,214,215,216,217,218,219,220,221,222,223,224,
- 225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
- 73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
- 82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
- 90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
- 250,251,252,253,254,255)
+ emap = (0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28,
+ 29, 30, 31, 128, 129, 130, 131, 132, 10, 23, 27, 136, 137,
+ 138, 139, 140, 5, 6, 7, 144, 145, 22, 147, 148, 149, 150, 4,
+ 152, 153, 154, 155, 20, 21, 158, 26, 32, 160, 161, 162, 163,
+ 164, 165, 166, 167, 168, 91, 46, 60, 40, 43, 33, 38, 169,
+ 170, 171, 172, 173, 174, 175, 176, 177, 93, 36, 42, 41, 59,
+ 94, 45, 47, 178, 179, 180, 181, 182, 183, 184, 185, 124, 44,
+ 37, 95, 62, 63, 186, 187, 188, 189, 190, 191, 192, 193, 194,
+ 96, 58, 35, 64, 39, 61, 34, 195, 97, 98, 99, 100, 101, 102,
+ 103, 104, 105, 196, 197, 198, 199, 200, 201, 202, 106, 107,
+ 108, 109, 110, 111, 112, 113, 114, 203, 204, 205, 206, 207,
+ 208, 209, 126, 115, 116, 117, 118, 119, 120, 121, 122, 210,
+ 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
+ 223, 224, 225, 226, 227, 228, 229, 230, 231, 123, 65, 66,
+ 67, 68, 69, 70, 71, 72, 73, 232, 233, 234, 235, 236, 237,
+ 125, 74, 75, 76, 77, 78, 79, 80, 81, 82, 238, 239, 240, 241,
+ 242, 243, 92, 159, 83, 84, 85, 86, 87, 88, 89, 90, 244, 245,
+ 246, 247, 248, 249, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57,
+ 250, 251, 252, 253, 254, 255)
import string
- c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
- ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
+ c.EBCDIC_TO_ASCII_MAP = string.maketrans(''.join(map(chr,
+ range(256))),
+ ''.join(map(chr, emap)))
return s.translate(c.EBCDIC_TO_ASCII_MAP)
- MS_CHARS = { '\x80' : ('euro', '20AC'),
- '\x81' : ' ',
- '\x82' : ('sbquo', '201A'),
- '\x83' : ('fnof', '192'),
- '\x84' : ('bdquo', '201E'),
- '\x85' : ('hellip', '2026'),
- '\x86' : ('dagger', '2020'),
- '\x87' : ('Dagger', '2021'),
- '\x88' : ('circ', '2C6'),
- '\x89' : ('permil', '2030'),
- '\x8A' : ('Scaron', '160'),
- '\x8B' : ('lsaquo', '2039'),
- '\x8C' : ('OElig', '152'),
- '\x8D' : '?',
- '\x8E' : ('#x17D', '17D'),
- '\x8F' : '?',
- '\x90' : '?',
- '\x91' : ('lsquo', '2018'),
- '\x92' : ('rsquo', '2019'),
- '\x93' : ('ldquo', '201C'),
- '\x94' : ('rdquo', '201D'),
- '\x95' : ('bull', '2022'),
- '\x96' : ('ndash', '2013'),
- '\x97' : ('mdash', '2014'),
- '\x98' : ('tilde', '2DC'),
- '\x99' : ('trade', '2122'),
- '\x9a' : ('scaron', '161'),
- '\x9b' : ('rsaquo', '203A'),
- '\x9c' : ('oelig', '153'),
- '\x9d' : '?',
- '\x9e' : ('#x17E', '17E'),
- '\x9f' : ('Yuml', ''),}
+ MS_CHARS = {
+ '\x80': ('euro', '20AC'),
+ '\x81': ' ',
+ '\x82': ('sbquo', '201A'),
+ '\x83': ('fnof', '192'),
+ '\x84': ('bdquo', '201E'),
+ '\x85': ('hellip', '2026'),
+ '\x86': ('dagger', '2020'),
+ '\x87': ('Dagger', '2021'),
+ '\x88': ('circ', '2C6'),
+ '\x89': ('permil', '2030'),
+ '\x8A': ('Scaron', '160'),
+ '\x8B': ('lsaquo', '2039'),
+ '\x8C': ('OElig', '152'),
+ '\x8D': '?',
+ '\x8E': ('#x17D', '17D'),
+ '\x8F': '?',
+ '\x90': '?',
+ '\x91': ('lsquo', '2018'),
+ '\x92': ('rsquo', '2019'),
+ '\x93': ('ldquo', '201C'),
+ '\x94': ('rdquo', '201D'),
+ '\x95': ('bull', '2022'),
+ '\x96': ('ndash', '2013'),
+ '\x97': ('mdash', '2014'),
+ '\x98': ('tilde', '2DC'),
+ '\x99': ('trade', '2122'),
+ '\x9a': ('scaron', '161'),
+ '\x9b': ('rsaquo', '203A'),
+ '\x9c': ('oelig', '153'),
+ '\x9d': '?',
+ '\x9e': ('#x17E', '17E'),
+ '\x9f': ('Yuml', ''),
+ }
#######################################################################
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11155
Revision: 11155
Author: xqt
Date: 2013-03-02 10:39:02 +0000 (Sat, 02 Mar 2013)
Log Message:
-----------
some PEP8 changes
Modified Paths:
--------------
trunk/pywikipedia/basic.py
Modified: trunk/pywikipedia/basic.py
===================================================================
--- trunk/pywikipedia/basic.py 2013-03-02 10:37:00 UTC (rev 11154)
+++ trunk/pywikipedia/basic.py 2013-03-02 10:39:02 UTC (rev 11155)
@@ -36,6 +36,7 @@
'¶ms;': pagegenerators.parameterHelp
}
+
class BasicBot:
# Edit summary message that should be used is placed on /i18n subdirectory.
# The file containing these messages should have the same name as the caller
@@ -108,7 +109,7 @@
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
- pywikibot.output(u'Comment: %s' %comment)
+ pywikibot.output(u'Comment: %s' % comment)
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No'], ['y', 'N'], 'N')
@@ -131,6 +132,7 @@
return True
return False
+
class AutoBasicBot(BasicBot):
# Intended for usage e.g. as cronjob without prompting the user.
@@ -142,23 +144,31 @@
## @since 10326
# @remarks needed by various bots
def save(self, page, text, comment=None, **kwargs):
- pywikibot.output(u'\03{lightblue}Writing to wiki on %s...\03{default}' % page.title(asLink=True))
+ pywikibot.output(u'\03{lightblue}Writing to wiki on %s...\03{default}'
+ % page.title(asLink=True))
comment_output = comment or pywikibot.action
- pywikibot.output(u'\03{lightblue}Comment: %s\03{default}' % comment_output)
+ pywikibot.output(u'\03{lightblue}Comment: %s\03{default}'
+ % comment_output)
#pywikibot.showDiff(page.get(), text)
- for i in range(3): # try max. 3 times
+ for i in range(3):
try:
# Save the page
page.put(text, comment=comment, **kwargs)
except pywikibot.LockedPage:
- pywikibot.output(u"\03{lightblue}Page %s is locked; skipping.\03{default}" % page.title(asLink=True))
+ pywikibot.output(
+ u"\03{lightblue}Page %s is locked; skipping.\03{default}"
+ % page.title(asLink=True))
except pywikibot.EditConflict:
- pywikibot.output(u'\03{lightblue}Skipping %s because of edit conflict\03{default}' % (page.title()))
+ pywikibot.output(
+ u'\03{lightblue}Skipping %s because of edit '
+ u'conflict\03{default}' % (page.title()))
except pywikibot.SpamfilterError, error:
- pywikibot.output(u'\03{lightblue}Cannot change %s because of spam blacklist entry %s\03{default}' % (page.title(), error.url))
+ pywikibot.output(
+ u'\03{lightblue}Cannot change %s because of spam blacklist '
+ u'entry %s\03{default}' % (page.title(), error.url))
else:
return True
return False
@@ -167,18 +177,22 @@
# @remarks needed by various bots
def append(self, page, text, comment=None, section=None, **kwargs):
if section:
- pywikibot.output(u'\03{lightblue}Appending to wiki on %s in section %s...\03{default}' % (page.title(asLink=True), section))
-
- for i in range(3): # try max. 3 times
+ pywikibot.output(
+ u'\03{lightblue}Appending to wiki on %s in section '
+ u'%s...\03{default}' % (page.title(asLink=True), section))
+ for i in range(3):
try:
# Append to page section
- page.append(text, comment=comment, section=section, **kwargs)
+ page.append(text, comment=comment, section=section,
+ **kwargs)
except pywikibot.PageNotSaved, error:
- pywikibot.output(u'\03{lightblue}Cannot change %s because of %s\03{default}' % (page.title(), error))
+ pywikibot.output(
+ u'\03{lightblue}Cannot change %s because of '
+ u'%s\03{default}' % (page.title(), error))
else:
return True
else:
- content = self.load( page ) # 'None' if not existing page
+ content = self.load(page) # 'None' if not existing page
if not content: # (create new page)
content = u''
@@ -197,7 +211,7 @@
Returns a list of dict with the templates parameters found.
"""
- self._content = self.load(page) # 'None' if not existing page
+ self._content = self.load(page) # 'None' if not existing page
templates = []
if not self._content:
@@ -208,7 +222,7 @@
param_default = {}
param_default.update(default)
param_default.update(tmpl[1])
- templates.append( param_default )
+ templates.append(param_default)
return templates
## @since 10326
@@ -226,8 +240,10 @@
Returns a list of jobs. This list may be empty.
"""
- try: actual = page.getVersionHistory(revCount=1)[0]
- except: pass
+ try:
+ actual = page.getVersionHistory(revCount=1)[0]
+ except:
+ pass
secure = False
for item in queue_security[0]:
@@ -235,20 +251,21 @@
secure = secure and (actual[3] == queue_security[1])
- if not secure: return []
+ if not secure:
+ return []
data = self._REGEX_eol.split(page.get())
if reset:
pywikibot.output(u'\03{lightblue}Job queue reset...\03{default}')
-
pywikibot.setAction(u'reset job queue')
- page.put(u'', minorEdit = True)
+ page.put(u'', minorEdit=True)
queue = []
for line in data:
- queue.append( line[1:].strip() )
+ queue.append(line[1:].strip())
return queue
+
def main():
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11154
Revision: 11154
Author: xqt
Date: 2013-03-02 10:37:00 +0000 (Sat, 02 Mar 2013)
Log Message:
-----------
Import order
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2013-03-02 10:27:00 UTC (rev 11153)
+++ trunk/pywikipedia/add_text.py 2013-03-02 10:37:00 UTC (rev 11154)
@@ -73,11 +73,13 @@
__version__ = '$Id$'
#
-import re, pagegenerators, urllib2, urllib
+import re, urllib2, urllib
+import webbrowser
+import codecs
import wikipedia as pywikibot
from pywikibot import i18n
-import codecs, config
-import webbrowser
+import pagegenerators
+import config
# This is required for the text that is shown when you run this script
# with the parameter -help.
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11153
Revision: 11153
Author: xqt
Date: 2013-03-02 10:27:00 +0000 (Sat, 02 Mar 2013)
Log Message:
-----------
some PEP8 changes
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2013-03-02 09:14:47 UTC (rev 11152)
+++ trunk/pywikipedia/add_text.py 2013-03-02 10:27:00 UTC (rev 11153)
@@ -66,7 +66,7 @@
#
# (C) Filnik, 2007-2010
-# (C) Pywikipedia bot team, 2007-2010
+# (C) Pywikipedia bot team, 2007-2013
#
# Distributed under the terms of the MIT license.
#
@@ -82,17 +82,23 @@
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
- '¶ms;': pagegenerators.parameterHelp,
+ '¶ms;': pagegenerators.parameterHelp,
}
nn_iw_msg = u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->'
+
class NoEnoughData(pywikibot.Error):
""" Error class for when the user doesn't specified all the data needed """
+
class NothingFound(pywikibot.Error):
- """ An exception indicating that a regex has return [] instead of results."""
+ """
+ An exception indicating that a regex has return [] instead of results.
+ """
+
+
# Useful for the untagged function
def pageText(url):
""" Function to load HTML text of a URL """
@@ -104,36 +110,43 @@
response.close()
# When you load to many users, urllib2 can give this error.
except urllib2.HTTPError:
- pywikibot.output(u"Server error. Pausing for 10 seconds... " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()) )
+ pywikibot.output(u"Server error. Pausing for 10 seconds... " +
+ time.strftime("%d %b %Y %H:%M:%S (UTC)",
+ time.gmtime()))
response.close()
time.sleep(10)
return pageText(url)
return text
-def untaggedGenerator(untaggedProject, limit = 500):
+def untaggedGenerator(untaggedProject, limit=500):
""" Function to get the pages returned by this tool:
- http://toolserver.org/~daniel/WikiSense/UntaggedImages.php """
+ http://toolserver.org/~daniel/WikiSense/UntaggedImages.php
+
+ """
lang = untaggedProject.split('.', 1)[0]
project = '.' + untaggedProject.split('.', 1)[1]
+ URL = 'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?'
if lang == 'commons':
- link = 'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?wikifam=commons.…'
+ link = '%swikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=%d&order=img_timestamp&format=html' \
+ % (URL, limit)
else:
- link = 'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?wikilang=' + lang + '&wikifam=' + project + '&order=img_timestamp&max=' + str(limit) + '&ofs=0&max=' + str(limit)
+ link = '%swikilang=%s&wikifam=%s&order=img_timestamp&max=%d&ofs=0&max=%d' \
+ % (URL, lang, project, limit, limit)
text = pageText(link)
- #print text
- regexp = r"""<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>"""
+ regexp = r"<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>"
results = re.findall(regexp, text)
if results == []:
print link
raise NothingFound(
-'Nothing found! Try to use the tool by yourself to be sure that it works!')
+ 'Nothing found! Try to use the tool by yourself to be sure that it '
+ 'works!')
else:
for result in results:
yield pywikibot.Page(pywikibot.getSite(), result)
-def add_text(page = None, addText = None, summary = None, regexSkip = None,
- regexSkipUrl = None, always = False, up = False, putText = True,
- oldTextGiven = None, reorderEnabled = True, create=False):
+def add_text(page=None, addText=None, summary=None, regexSkip=None,
+ regexSkipUrl=None, always=False, up=False, putText=True,
+ oldTextGiven=None, reorderEnabled=True, create=False):
if not addText:
raise NoEnoughData('You have to specify what text you want to add!')
if not summary:
@@ -174,12 +187,11 @@
errorCount = 0
site = pywikibot.getSite()
- # /wiki/ is not always the right path in non-wiki projects
pathWiki = site.family.nicepath(site.lang)
if putText:
pywikibot.output(u'Loading %s...' % page.title())
- if oldTextGiven == None:
+ if oldTextGiven is None:
try:
text = page.get()
except pywikibot.NoPage:
@@ -189,29 +201,29 @@
text = u''
else:
pywikibot.output(u"%s doesn't exist, skip!" % page.title())
- return (False, False, always) # continue
+ return (False, False, always)
except pywikibot.IsRedirectPage:
pywikibot.output(u"%s is a redirect, skip!" % page.title())
- return (False, False, always) # continue
+ return (False, False, always)
else:
text = oldTextGiven
# Understand if the bot has to skip the page or not
# In this way you can use both -except and -excepturl
- if regexSkipUrl != None:
+ if regexSkipUrl is not None:
url = '%s%s' % (pathWiki, page.urlname())
result = re.findall(regexSkipUrl, site.getUrl(url))
if result != []:
pywikibot.output(
u'''Exception! regex (or word) used with -exceptUrl is in the page. Skip!
Match was: %s''' % result)
- return (False, False, always) # continue
- if regexSkip != None:
+ return (False, False, always)
+ if regexSkip is not None:
result = re.findall(regexSkip, text)
if result != []:
pywikibot.output(
u'''Exception! regex (or word) used with -except is in the page. Skip!
Match was: %s''' % result)
- return (False, False, always) # continue
+ return (False, False, always)
# If not up, text put below
if not up:
newtext = text
@@ -229,8 +241,9 @@
# nn got a message between the categories and the iw's
# and they want to keep it there, first remove it
hasCommentLine = False
- if (site.language()==u'nn'):
- regex = re.compile('(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)')
+ if (site.language() == u'nn'):
+ regex = re.compile(
+ '(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)')
found = regex.findall(newtext)
if found:
hasCommentLine = True
@@ -240,31 +253,31 @@
newtext += u"\n%s" % addText
# Reputting the categories
newtext = pywikibot.replaceCategoryLinks(newtext,
- categoriesInside, site, True)
+ categoriesInside, site,
+ True)
#Put the nn iw message back
- if site.language()==u'nn' and (interwikiInside or hasCommentLine):
+ if site.language() == u'nn' and (interwikiInside or hasCommentLine):
newtext = newtext + u'\r\n\r\n' + nn_iw_msg
# Dealing the stars' issue
allstars = []
starstext = pywikibot.removeDisabledParts(text)
for star in starsList:
- regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star,
- re.I)
+ regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
+ % star, re.I)
found = regex.findall(starstext)
if found != []:
newtext = regex.sub('', newtext)
allstars += found
if allstars != []:
- newtext = newtext.strip()+'\r\n\r\n'
+ newtext = newtext.strip() + '\r\n\r\n'
allstars.sort()
for element in allstars:
newtext += '%s\r\n' % element.strip()
# Adding the interwiki
- newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, site)
+ newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside,
+ site)
else:
- # Adding the text
newtext += u"\n%s" % addText
- # If instead the text must be added above...
else:
newtext = addText + '\n' + text
if putText and text != newtext:
@@ -279,7 +292,8 @@
if not always:
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
- ['Yes', 'No', 'All', 'open in Browser'], ['y', 'N', 'a', 'b'], 'N')
+ ['Yes', 'No', 'All', 'open in Browser'],
+ ['y', 'n', 'a', 'b'], 'n')
if choice == 'a':
always = True
elif choice == 'n':
@@ -326,14 +340,19 @@
else:
return (text, newtext, always)
+
def main():
# If none, the var is setted only for check purpose.
- summary = None; addText = None; regexSkip = None; regexSkipUrl = None;
- generator = None; always = False
- textfile=None
- talkPage=False
+ summary = None
+ addText = None
+ regexSkip = None
+ regexSkipUrl = None
+ generator = None
+ always = False
+ textfile = None
+ talkPage = False
reorderEnabled = True
- namespaces=[]
+ namespaces = []
# Load a lot of default generators
genFactory = pagegenerators.GeneratorFactory()
# Put the text above or below the text?
@@ -360,8 +379,7 @@
if len(arg) == 5:
generator = [pywikibot.Page(
pywikibot.getSite(),
- pywikibot.input(u'What page do you want to use?')
- )]
+ pywikibot.input(u'What page do you want to use?'))]
else:
generator = [pywikibot.Page(pywikibot.getSite(), arg[6:])]
elif arg.startswith('-excepturl'):
@@ -397,7 +415,6 @@
f.close()
if not generator:
generator = genFactory.getCombinedGenerator()
- # Check if there are the minimal settings
if not generator:
raise NoEnoughData(
'You have to specify the generator you want to use for the script!')
@@ -406,14 +423,14 @@
site = pywikibot.getSite()
for namespace in site.namespaces():
index = site.getNamespaceIndex(namespace)
- if index%2==1 and index>0:
+ if index % 2 == 1 and index > 0:
namespaces += [index]
generator = pagegenerators.NamespaceFilterPageGenerator(
generator, namespaces)
- # Main Loop
for page in generator:
(text, newtext, always) = add_text(page, addText, summary, regexSkip,
- regexSkipUrl, always, up, True, reorderEnabled=reorderEnabled,
+ regexSkipUrl, always, up, True,
+ reorderEnabled=reorderEnabled,
create=talkPage)
if __name__ == "__main__":