Revision: 8707
Author: xqt
Date: 2010-11-07 16:45:42 +0000 (Sun, 07 Nov 2010)
Log Message:
-----------
MaxTriesExceededError() for postData
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-11-07 16:11:18 UTC (rev 8706)
+++ trunk/pywikipedia/wikipedia.py 2010-11-07 16:45:42 UTC (rev 8707)
@@ -5164,7 +5164,7 @@
retry_attempt += 1
if retry_attempt > config.maxretries:
raise MaxTriesExceededError()
- output(u"""WARNING: Could not open '%s'.\nMaybe the server is down. Retrying in %i minutes..."""
+ output(u"WARNING: Could not open '%s'.\nMaybe the server is down. Retrying in %i minutes..."
% (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
# Next time wait longer, but not longer than half an hour
@@ -5179,7 +5179,10 @@
except Exception, e:
output(u'%s' %e)
if config.retry_on_fail:
- output(u"""WARNING: Could not open '%s'. Maybe the server or\n your connection is down. Retrying in %i minutes..."""
+ retry_attempt += 1
+ if retry_attempt > config.maxretries:
+ raise MaxTriesExceededError()
+ output(u"WARNING: Could not open '%s'. Maybe the server or\n your connection is down. Retrying in %i minutes..."
% (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
retry_idle_time *= 2
@@ -5287,6 +5290,7 @@
# case the server is down or overloaded).
# Wait for retry_idle_time minutes (growing!) between retries.
retry_idle_time = 1
+ retry_attempt = 0
while True:
try:
request = urllib2.Request(url, data, headers)
@@ -5310,8 +5314,11 @@
elif e.code == 504:
output(u'HTTPError: %s %s' % (e.code, e.msg))
if retry:
+ retry_attempt += 1
+ if retry_attempt > config.maxretries:
+ raise MaxTriesExceededError()
output(
-u"""WARNING: Could not open '%s'.Maybe the server or\n your connection is down. Retrying in %i minutes..."""
+u"WARNING: Could not open '%s'.Maybe the server or\n your connection is down. Retrying in %i minutes..."
% (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
# Next time wait longer,
@@ -5327,8 +5334,11 @@
except Exception, e:
output(u'%s' %e)
if retry:
+ retry_attempt += 1
+ if retry_attempt > config.maxretries:
+ raise MaxTriesExceededError()
output(
-u"""WARNING: Could not open '%s'. Maybe the server or\n your connection is down. Retrying in %i minutes..."""
+u"WARNING: Could not open '%s'. Maybe the server or\n your connection is down. Retrying in %i minutes..."
% (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
retry_idle_time *= 2
Revision: 8706
Author: xqt
Date: 2010-11-07 16:11:18 +0000 (Sun, 07 Nov 2010)
Log Message:
-----------
Fix for Page.get(expandtemplates=True)
patch bug #3096196 submitted by lankier
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-11-07 14:55:50 UTC (rev 8705)
+++ trunk/pywikipedia/wikipedia.py 2010-11-07 16:11:18 UTC (rev 8706)
@@ -592,20 +592,21 @@
retrieved yet, or if force is True. This can raise the following
exceptions that should be caught by the calling code:
- NoPage: The page does not exist
- IsRedirectPage: The page is a redirect. The argument of the
+ - NoPage: The page does not exist
+ - IsRedirectPage: The page is a redirect. The argument of the
exception is the title of the page it redirects to.
- SectionError: The subject does not exist on a page with a # link
+ - SectionError: The section does not exist on a page with a # link
- If get_redirect is True, return the redirect text and save the
- target of the redirect, do not raise an exception.
- If force is True, reload all page attributes, including
- errors.
- If change_edit_time is False, do not check this version for changes
- before saving. This should be used only if the page has been loaded
- previously.
- If expandtemplates is True, all templates in the page content are
- fully resolved too (if API is used).
+ @param force: reload all page attributes, including errors.
+ @param get_redirect: return the redirect text, do not follow the
+ redirect, do not raise an exception.
+ @param sysop: if the user has a sysop account, use it to retrieve
+ this page
+ @param change_edit_time: if False, do not check this version for
+ changes before saving. This should be used only if the page has
+ been loaded previously.
+ @param expandtemplates: all templates in the page content are fully
+ resolved too (if API is used).
"""
# NOTE: The following few NoPage exceptions could already be thrown at
@@ -624,12 +625,14 @@
if self.site().isInterwikiLink(self.title()):
raise NoPage('%s is not a local page on %s!'
% (self.aslink(), self.site()))
- if force or expandtemplates:
- # When forcing, we retry the page no matter what. Old exceptions
- # and contents do not apply any more.
- for attr in ['_redirarg', '_getexception', '_contents']:
+ if force:
+ # When forcing, we retry the page no matter what:
+ # * Old exceptions and contents do not apply any more
+ # * Deleting _contents and _expandcontents to force reload
+ for attr in ['_redirarg', '_getexception',
+ '_contents', '_expandcontents']:
if hasattr(self, attr):
- delattr(self,attr)
+ delattr(self, attr)
else:
# Make sure we re-raise an exception we got on an earlier attempt
if hasattr(self, '_redirarg') and not get_redirect:
@@ -640,10 +643,18 @@
else:
raise self._getexception
# Make sure we did try to get the contents once
- if not hasattr(self, '_contents'):
+ if expandtemplates:
+ attr = '_expandcontents'
+ else:
+ attr = '_contents'
+ if not hasattr(self, attr):
try:
- self._contents = self._getEditPage(get_redirect=get_redirect, throttle=throttle, sysop=sysop,
- expandtemplates = expandtemplates)
+ contents = self._getEditPage(get_redirect=get_redirect, throttle=throttle, sysop=sysop,
+ expandtemplates = expandtemplates)
+ if expandtemplates:
+ self._expandcontents = contents
+ else:
+ self._contents = contents
hn = self.section()
if hn:
m = re.search("=+ *%s *=+" % hn, self._contents)
@@ -669,6 +680,8 @@
output("The IP address is blocked, retry by login.")
self.site().forceLogin(sysop=sysop)
return self.get(force, get_redirect, throttle, sysop, change_edit_time)
+ if expandtemplates:
+ return self._expandcontents
return self._contents
def _getEditPage(self, get_redirect=False, throttle=True, sysop=False,
@@ -920,7 +933,11 @@
def getOldVersion(self, oldid, force=False, get_redirect=False,
throttle=True, sysop=False, change_edit_time=True):
- """Return text of an old revision of this page; same options as get()."""
+ """Return text of an old revision of this page; same options as get().
+
+ @param oldid: The revid of the revision desired.
+
+ """
# TODO: should probably check for bad pagename, NoPage, and other
# exceptions that would prevent retrieving text, as get() does
Revision: 8705
Author: xqt
Date: 2010-11-07 14:55:50 +0000 (Sun, 07 Nov 2010)
Log Message:
-----------
ru-translation for imagetransfer.py - path bug #3096561 submitted by rubin16
Modified Paths:
--------------
trunk/pywikipedia/imagetransfer.py
Modified: trunk/pywikipedia/imagetransfer.py
===================================================================
--- trunk/pywikipedia/imagetransfer.py 2010-11-07 12:27:43 UTC (rev 8704)
+++ trunk/pywikipedia/imagetransfer.py 2010-11-07 14:55:50 UTC (rev 8705)
@@ -48,6 +48,7 @@
'nl':u"Afbeelding gekopieerd vanaf %s. De beschrijving daar was:\r\n\r\n%s",
'pl':u"Ten obraz został skopiowany z %s. Oryginalny opis to:\r\n\r\n%s",
'pt':u"Esta imagem foi copiada de %s. A descrição original foi:\r\n\r\n%s",
+ 'ru':u"Изображение было скопировано с %s. Оригинальное описание содержало:\r\n\r\n%s"
'sr':u"Ова слика је копирана са %s. Оригинални опис је:\r\n\r\n%s",
'zh':u"本圖像從 %s 複製,原始說明資料:\r\n\r\n%s",
}
Revision: 8704
Author: xqt
Date: 2010-11-07 12:27:43 +0000 (Sun, 07 Nov 2010)
Log Message:
-----------
minor changes (bug #3104529)
Modified Paths:
--------------
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2010-11-06 19:13:28 UTC (rev 8703)
+++ trunk/pywikipedia/replace.py 2010-11-07 12:27:43 UTC (rev 8704)
@@ -132,7 +132,7 @@
python replace.py referer referrer -file:typos.txt -excepttext:HTTP
-Please type "replace.py | more" if you can't read the top of the help.
+Please type "replace.py -help | more" if you can't read the top of the help.
"""
from __future__ import generators
#
Revision: 8703
Author: xqt
Date: 2010-11-06 19:13:28 +0000 (Sat, 06 Nov 2010)
Log Message:
-----------
global test for unicode bug #3081100
enable removing hi-interwikis if code is not affected (follow up for r8607, r8634, r8687)
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2010-11-06 13:20:55 UTC (rev 8702)
+++ trunk/pywikipedia/interwiki.py 2010-11-06 19:13:28 UTC (rev 8703)
@@ -1867,7 +1867,7 @@
if not globalvar.cleanup or \
rmPage.aslink(forceInterwiki=True) not in globalvar.remove or \
rmPage.site().sitename() == 'wikipedia:hi' and \
- page.site().sitename() != 'wikipedia:de': #work-arround for bug #3081100 (do not remove hi-pages)
+ pywikibot.unicode_error: #work-arround for bug #3081100 (do not remove hi-pages)
new[rmsite] = rmPage
pywikibot.output(
u"WARNING: %s is either deleted or has a mismatching disambiguation state."
@@ -2566,17 +2566,6 @@
globalvar=Global()
if __name__ == "__main__":
- # TEST for bug #3081100
- if not __import__('unicodedata').normalize('NFC', u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917') == u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917':
- pywikibot.output("""
-
-================================================================================
-\03{lightyellow}WARNING:\03{lightred} your python version might trigger issue #3081100\03{default}
-See https://sourceforge.net/tracker/index.php?func=detail&aid=3081100&group_id=… for more information.
-\03{lightyellow}Use an older python version (<2.6.5) if you are running on wikimedia sites!\03{default}
-================================================================================
-
-""")
try:
main()
finally:
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-11-06 13:20:55 UTC (rev 8702)
+++ trunk/pywikipedia/wikipedia.py 2010-11-06 19:13:28 UTC (rev 8703)
@@ -7462,6 +7462,18 @@
# the argument is not global. Let the specific bot script care
# about it.
nonGlobalArgs.append(arg)
+
+ # TEST for bug #3081100
+ if unicode_error and (default_code == 'hi' or moduleName=='interwiki'):
+ output("""
+
+================================================================================
+\03{lightyellow}WARNING:\03{lightred} your python version might trigger issue #3081100\03{default}
+See https://sourceforge.net/tracker/index.php?func=detail&aid=3081100&group_id=… for more information.
+\03{lightyellow}Use an older python version (<2.6.5) if you are running on wikimedia sites!\03{default}
+================================================================================
+
+""")
if verbose:
output(u'Pywikipediabot %s' % (version.getversion()))
output(u'Python %s' % (sys.version))
@@ -7478,6 +7490,14 @@
verbose = 0
debug = False
+# TEST for bug #3081100
+unicode_error = __import__('unicodedata').normalize(
+ 'NFC',
+ u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917'
+ ) != u'\u092e\u093e\u0930\u094d\u0915 \u091c\u093c\u0941\u0915\u0947\u0930\u092c\u0930\u094d\u0917'
+if unicode_error:
+ print u'unicode test: triggers problem #3081100'
+
default_family = config.family
default_code = config.mylang
logfile = None
Revision: 8702
Author: filnik
Date: 2010-11-06 13:20:55 +0000 (Sat, 06 Nov 2010)
Log Message:
-----------
Bugfix for move-block
Modified Paths:
--------------
trunk/pywikipedia/blockpageschecker.py
Modified: trunk/pywikipedia/blockpageschecker.py
===================================================================
--- trunk/pywikipedia/blockpageschecker.py 2010-11-06 03:10:04 UTC (rev 8701)
+++ trunk/pywikipedia/blockpageschecker.py 2010-11-06 13:20:55 UTC (rev 8702)
@@ -254,7 +254,7 @@
category = pywikibot.translate(site, categoryToCheck)
commentUsed = pywikibot.translate(site, comment)
if not generator:
- gen = genFactory.getCombinedGenerator()
+ generator = genFactory.getCombinedGenerator()
if not generator:
generator = list()
pywikibot.output(u'Loading categories...')
@@ -319,7 +319,10 @@
text, changes = re.subn('<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
if changes == 0:
text, changes = re.subn('(%s)' % replaceToPerform, '', text)
- pywikibot.output(u'The page is editable for all, deleting the template...')
+ msg = u'The page is editable for all'
+ if not moveBlockCheck:
+ msg += ", deleting the template.."
+ pywikibot.output(msg)
elif editRestr[0] == 'sysop':
# total edit protection
@@ -372,23 +375,27 @@
# move-total-protection
if (TemplateInThePage[0] == 'sysop-move' and TTMP != None) or (TemplateInThePage[0] == 'unique' and TU != None):
pywikibot.output(u'The page is protected from moving to the sysop, skipping...')
+ if TU != None:
+ text = oldtext # no changes needed, better to revert the old text.
else:
pywikibot.output(u'The page is protected from moving to the sysop, but the template seems not correct. Fixing...')
- if TU != None:
- text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
- else:
- text, changes = re.subn(TemplateInThePage[1], TNR[3], text)
+ if TU != None:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ else:
+ text, changes = re.subn(TemplateInThePage[1], TNR[3], text)
elif TSMP != None or TU != None:
# implicitely moveRestr[0] = 'autoconfirmed', move-semi-protection
if TemplateInThePage[0] == 'autoconfirmed-move' or TemplateInThePage[0] == 'unique':
pywikibot.output(u'The page is movable only for the autoconfirmed users, skipping...')
+ if TU != None:
+ text = oldtext # no changes needed, better to revert the old text.
else:
pywikibot.output(u'The page is movable only for the autoconfirmed users, but the template seems not correct. Fixing...')
- if TU != None:
- text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
- else:
- text, changes = re.subn(TemplateInThePage[1], TNR[2], text)
+ if TU != None:
+ text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
+ else:
+ text, changes = re.subn(TemplateInThePage[1], TNR[2], text)
if changes == 0:
# We tried to fix move-protection templates, but it did not work.
Revision: 8700
Author: xqt
Date: 2010-11-06 02:18:01 +0000 (Sat, 06 Nov 2010)
Log Message:
-----------
saves the titles to disk (patch bug #3093682 submitted by Bin?\195?\161ris) Thanks!
-- Introduced two new command line argument, -save and -savenew, and an updated main() to process them
-- Updated ReplaceRobot class with two new functions and edit counter, and a new parameter
-- Extended help (description of new feature and one more example)
-- Some code wrappings to fit in 80 characters I found because I know xqt likes them :)
Modified Paths:
--------------
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2010-11-06 00:48:58 UTC (rev 8699)
+++ trunk/pywikipedia/replace.py 2010-11-06 02:18:01 UTC (rev 8700)
@@ -32,6 +32,17 @@
before the one specified (may also be given as
-xmlstart:Article).
+-save Saves the titles of the articles to a file instead of
+ modifying the articles. This way you may collect titles to
+ work on in automatic mode, and process them later with
+ -file. Opens the file for append, if exists.
+ If you insert the contents of the file into a wikipage, it
+ will appear as a numbered list, and may be used with -links.
+ Argument may also be given as "-save:filename".
+
+-savenew Just like -save, except that overwrites the existing file.
+ Argument may also be given as "-savenew:filename".
+
-addcat:cat_name Adds "cat_name" category to every altered page.
-excepttitle:XYZ Skip pages with titles that contain XYZ. If the -regex
@@ -109,21 +120,30 @@
python replace.py -page:John_Doe -fix:isbn
+Let's suppose, you want to change "color" to "colour" manually, but gathering
+the articles is too slow, so you want to save the list while you are sleeping.
+You have Windows, so "python" is not necessary. Use this:
+
+ replace.py -xml -save:color.txt color colour -always
+You may use color.txt later with -file or -links, if you upload it to the wiki.
+
This command will change 'referer' to 'referrer', but not in pages which
talk about HTTP, where the typo has become part of the standard:
python replace.py referer referrer -file:typos.txt -excepttext:HTTP
+
+Please type "replace.py | more" if you can't read the top of the help.
"""
from __future__ import generators
#
-# (C) Daniel Herding & the Pywikipedia team, 2004-2009
+# (C) Daniel Herding & the Pywikipedia team, 2004-2010
#
__version__='$Id$'
#
# Distributed under the terms of the MIT license.
#
-import sys, re, time
+import sys, re, time, codecs
import wikipedia as pywikibot
import pagegenerators
import editarticle
@@ -263,7 +283,7 @@
"""
def __init__(self, generator, replacements, exceptions={},
acceptall=False, allowoverlap=False, recursive=False,
- addedCat=None, sleep=None, editSummary=''):
+ addedCat=None, sleep=None, editSummary='', articles=None):
"""
Arguments:
* generator - A generator that yields Page objects.
@@ -278,6 +298,8 @@
replaced.
* addedCat - If set to a value, add this category to every page
touched.
+ * articles - An open file to save the page titles. If None,
+ we work on our wikisite immediately (default).
Structure of the exceptions dictionary:
This dictionary can have these keys:
@@ -310,6 +332,11 @@
self.sleep = sleep
# Some function to set default editSummary should probably be added
self.editSummary = editSummary
+ self.articles = articles
+
+ #An edit counter to split the file by 100 titles if -save or -savenew
+ #is on, and to display the number of edited articles otherwise.
+ self.editcounter = 0
def isTitleExcepted(self, title):
"""
@@ -354,6 +381,31 @@
allowoverlap=self.allowoverlap)
return new_text
+ def writeEditCounter(self):
+ """ At the end of our work this writes the counter. """
+ if self.articles:
+ pywikibot.output(u'%d title%s saved.'
+ % (self.editcounter,
+ (lambda x: bool(x-1) and 's were' or ' was')
+ (self.editcounter)))
+ else:
+ pywikibot.output(u'%d page%s changed.'
+ % (self.editcounter,
+ (lambda x: bool(x-1) and 's were' or ' was')
+ (self.editcounter)))
+
+ def splitLine(self):
+ """Returns a splitline after every 100th title. Splitline is in HTML
+ comment format in case we want to insert the list into a wikipage.
+ We use it to make the file more readable.
+
+ """
+ if self.editcounter % 100:
+ return ''
+ else:
+ return (u'<!-- ***** %dth title is above this line. ***** -->\n' %
+ self.editcounter)
+
def run(self):
"""
Starts the robot.
@@ -408,7 +460,8 @@
break
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
- ['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"],
+ ['Yes', 'No', 'Edit', 'open in Browser', 'All',
+ 'Quit'],
['y', 'N', 'e', 'b', 'a', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
@@ -427,30 +480,57 @@
new_text = original_text
continue
if choice == 'q':
+ self.writeEditCounter()
return
if choice == 'a':
self.acceptall = True
if choice == 'y':
- page.put_async(new_text, self.editSummary)
+ if not self.articles:
+ #Primary behaviour: working on wiki
+ page.put_async(new_text, self.editSummary)
+ self.editcounter += 1
+ #Bug: this increments even if put_async fails
+ #This is separately in two clauses of if for
+ #future purposes to get feedback form put_async
+ else:
+ #Save the title for later processing instead of editing
+ self.editcounter += 1
+ self.articles.write(u'#%s\n%s'
+ % (page.title(asLink=True),
+ self.splitLine()))
+ self.articles.flush() # For the peace of our soul :-)
# choice must be 'N'
break
if self.acceptall and new_text != original_text:
- try:
- page.put(new_text, self.editSummary)
- except pywikibot.EditConflict:
- pywikibot.output(u'Skipping %s because of edit conflict'
- % (page.title(),))
- except pywikibot.SpamfilterError, e:
- pywikibot.output(
- u'Cannot change %s because of blacklist entry %s'
- % (page.title(), e.url))
- except pywikibot.PageNotSaved, error:
- pywikibot.output(u'Error putting page: %s'
- % (error.args,))
- except pywikibot.LockedPage:
- pywikibot.output(u'Skipping %s (locked page)'
- % (page.title(),))
+ if not self.articles:
+ #Primary behaviour: working on wiki
+ try:
+ page.put(new_text, self.editSummary)
+ self.editcounter += 1 #increment only on success
+ except pywikibot.EditConflict:
+ pywikibot.output(u'Skipping %s because of edit conflict'
+ % (page.title(),))
+ except pywikibot.SpamfilterError, e:
+ pywikibot.output(
+ u'Cannot change %s because of blacklist entry %s'
+ % (page.title(), e.url))
+ except pywikibot.PageNotSaved, error:
+ pywikibot.output(u'Error putting page: %s'
+ % (error.args,))
+ except pywikibot.LockedPage:
+ pywikibot.output(u'Skipping %s (locked page)'
+ % (page.title(),))
+ else:
+ #Save the title for later processing instead of editing
+ self.editcounter += 1
+ self.articles.write(u'#%s\n%s'
+ % (page.title(asLink=True),
+ self.splitLine()))
+ self.articles.flush()
+ #Finally:
+ self.writeEditCounter()
+
def prepareRegexForMySQL(pattern):
pattern = pattern.replace('\s', '[:space:]')
pattern = pattern.replace('\d', '[:digit:]')
@@ -517,6 +597,11 @@
# Between a regex and another (using -fix) sleep some time (not to waste
# too much CPU
sleep = None
+ # Do not save the page titles, rather work on wiki
+ titlefile = None
+ filename = None
+ # If we save, primary behaviour is append rather then new file
+ append = True
# Read commandline parameters.
for arg in pywikibot.handleArgs(*args):
@@ -539,9 +624,22 @@
elif arg.startswith('-page'):
if len(arg) == 5:
PageTitles.append(pywikibot.input(
- u'Which page do you want to change?'))
+ u'Which page do you want to change?'))
else:
PageTitles.append(arg[6:])
+ elif arg.startswith('-savenew'):
+ if len(arg) == 8:
+ filename = pywikibot.input(
+u'Please enter the filename to save the titles \n(will be deleted if exists):')
+ else:
+ filename = arg[9:]
+ elif arg.startswith('-save'):
+ append = False
+ if len(arg) == 5:
+ filename = pywikibot.input(
+ u'Please enter the filename to save the titles:')
+ else:
+ filename = arg[6:]
elif arg.startswith('-excepttitle:'):
exceptions['title'].append(arg[13:])
elif arg.startswith('-requiretitle:'):
@@ -585,8 +683,9 @@
replacements.append((commandline_replacements[0],
commandline_replacements[1]))
if not summary_commandline:
- editSummary = pywikibot.translate(pywikibot.getSite(), msg ) % (' (-' + commandline_replacements[0] + ' +'
- + commandline_replacements[1] + ')')
+ editSummary = pywikibot.translate(pywikibot.getSite(), msg ) % (
+ ' (-%s +%s)' % (commandline_replacements[0],
+ commandline_replacements[1]))
elif (len(commandline_replacements) > 1):
if (fix is None):
for i in xrange (0, len(commandline_replacements), 2):
@@ -598,7 +697,8 @@
for i in range(0, len(commandline_replacements), 2)]
replacementsDescription = '(%s)' % ', '.join(
[('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
- editSummary = pywikibot.translate(pywikibot.getSite(), msg ) % replacementsDescription
+ editSummary = pywikibot.translate(pywikibot.getSite(), msg ) \
+ % replacementsDescription
else:
raise pywikibot.Error(
'Specifying -fix with replacements is undefined')
@@ -710,10 +810,30 @@
preloadingGen = pagegenerators.PreloadingGenerator(gen,
pageNumber=20, lookahead=100)
else:
- preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=maxquerysize)
+ preloadingGen = pagegenerators.PreloadingGenerator(gen,
+ pageNumber=maxquerysize)
+
+ #Finally we open the file for page titles or set article to None
+ if filename:
+ try:
+ #This opens in strict error mode, that means bot will stop
+ #on encoding errors with ValueError.
+ #See http://docs.python.org/library/codecs.html#codecs.open
+ titlefile = codecs.open(filename, encoding='utf-8',
+ mode=(lambda x: x and 'a' or 'w')(append))
+ except IOError:
+ pywikibot.output("%s cannot be opened for writing." %
+ filename)
+ return
bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
- allowoverlap, recursive, add_cat, sleep, editSummary)
- bot.run()
+ allowoverlap, recursive, add_cat, sleep, editSummary,
+ titlefile)
+ try:
+ bot.run()
+ finally:
+ if titlefile:
+ #Just for the spirit of programming (it was flushed)
+ titlefile.close()
if __name__ == "__main__":
Revision: 8699
Author: xqt
Date: 2010-11-06 00:48:58 +0000 (Sat, 06 Nov 2010)
Log Message:
-----------
Sometimes usefull
Added Paths:
-----------
trunk/pywikipedia/touch.py
Removed Paths:
-------------
trunk/pywikipedia/archive/touch.py
Deleted: trunk/pywikipedia/archive/touch.py
===================================================================
--- trunk/pywikipedia/archive/touch.py 2010-11-05 15:30:44 UTC (rev 8698)
+++ trunk/pywikipedia/archive/touch.py 2010-11-06 00:48:58 UTC (rev 8699)
@@ -1,95 +0,0 @@
-#!/usr/bin/python
-# -*- coding: utf-8 -*-
-
-"""
-This bot goes over multiple pages of a wiki, and edits them without
-changing. This is for example used to get category links in templates
-working.
-
-This script understands various command-line arguments:
-
-¶ms;
-
--redir specifies that the robot should touch redirect pages;
- otherwise, they will be skipped.
-
-All other parameters will be regarded as a page title; in this case, the bot
-will only touch a single page.
-"""
-
-#
-# (C) Pywikipedia team
-#
-__version__='$Id: touch.py 6549 2009-03-24 02:58:56Z nicdumz $'
-#
-# Distributed under the terms of the MIT license.
-#
-
-import wikipedia as pywikibot
-import pagegenerators, config
-import sys
-
-docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
-
-
-class TouchBot:
- def __init__(self, generator, touch_redirects):
- self.generator = generator
- self.touch_redirects = touch_redirects
-
- def run(self):
- for page in self.generator:
- try:
- # get the page, and save it using the unmodified text.
- # whether or not getting a redirect throws an exception
- # depends on the variable self.touch_redirects.
- text = page.get(get_redirect = self.touch_redirects)
- page.put(text)
- except pywikibot.NoPage:
- pywikibot.output(u"Page %s does not exist?!" % page.aslink())
- except pywikibot.IsRedirectPage:
- pywikibot.output(u"Page %s is a redirect; skipping." % page.aslink())
- except pywikibot.LockedPage:
- pywikibot.output(u"Page %s is locked?!" % page.aslink())
- except pywikibot.PageNotSaved:
- pywikibot.output(u"Page %s not saved" % page.aslink())
-
-def main():
- # Disable cosmetic changes because we don't want to modify any page
- # content, so that we don't flood the histories with minor changes.
- config.cosmetic_changes = False
- #page generator
- gen = None
- genFactory = pagegenerators.GeneratorFactory()
- redirs = False
- # If the user chooses to work on a single page, this temporary array is
- # used to read the words from the page title. The words will later be
- # joined with spaces to retrieve the full title.
- pageTitle = []
- for arg in pywikibot.handleArgs():
- if genFactory.handleArg(arg):
- continue
- if arg == '-redir':
- redirs = True
- else:
- pageTitle.append(arg)
-
- gen = genFactory.getCombinedGenerator()
- if not gen:
- if pageTitle:
- # work on a single page
- page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
- gen = iter([page])
- else:
- pywikibot.showHelp('touch')
- return
- preloadingGen = pagegenerators.PreloadingGenerator(gen)
- bot = TouchBot(preloadingGen, redirs)
- bot.run()
-
-
-if __name__ == "__main__":
- try:
- main()
- finally:
- pywikibot.stopme()
Copied: trunk/pywikipedia/touch.py (from rev 8695, trunk/pywikipedia/archive/touch.py)
===================================================================
--- trunk/pywikipedia/touch.py (rev 0)
+++ trunk/pywikipedia/touch.py 2010-11-06 00:48:58 UTC (rev 8699)
@@ -0,0 +1,95 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+This bot goes over multiple pages of a wiki, and edits them without
+changing. This is for example used to get category links in templates
+working.
+
+This script understands various command-line arguments:
+
+¶ms;
+
+-redir specifies that the robot should touch redirect pages;
+ otherwise, they will be skipped.
+
+All other parameters will be regarded as a page title; in this case, the bot
+will only touch a single page.
+"""
+
+#
+# (C) Pywikipedia team
+#
+__version__='$Id: touch.py 6549 2009-03-24 02:58:56Z nicdumz $'
+#
+# Distributed under the terms of the MIT license.
+#
+
+import wikipedia as pywikibot
+import pagegenerators, config
+import sys
+
+docuReplacements = {'¶ms;': pagegenerators.parameterHelp}
+
+
+class TouchBot:
+ def __init__(self, generator, touch_redirects):
+ self.generator = generator
+ self.touch_redirects = touch_redirects
+
+ def run(self):
+ for page in self.generator:
+ try:
+ # get the page, and save it using the unmodified text.
+ # whether or not getting a redirect throws an exception
+ # depends on the variable self.touch_redirects.
+ text = page.get(get_redirect = self.touch_redirects)
+ page.put(text)
+ except pywikibot.NoPage:
+ pywikibot.output(u"Page %s does not exist?!" % page.aslink())
+ except pywikibot.IsRedirectPage:
+ pywikibot.output(u"Page %s is a redirect; skipping." % page.aslink())
+ except pywikibot.LockedPage:
+ pywikibot.output(u"Page %s is locked?!" % page.aslink())
+ except pywikibot.PageNotSaved:
+ pywikibot.output(u"Page %s not saved" % page.aslink())
+
+def main():
+ # Disable cosmetic changes because we don't want to modify any page
+ # content, so that we don't flood the histories with minor changes.
+ config.cosmetic_changes = False
+ #page generator
+ gen = None
+ genFactory = pagegenerators.GeneratorFactory()
+ redirs = False
+ # If the user chooses to work on a single page, this temporary array is
+ # used to read the words from the page title. The words will later be
+ # joined with spaces to retrieve the full title.
+ pageTitle = []
+ for arg in pywikibot.handleArgs():
+ if genFactory.handleArg(arg):
+ continue
+ if arg == '-redir':
+ redirs = True
+ else:
+ pageTitle.append(arg)
+
+ gen = genFactory.getCombinedGenerator()
+ if not gen:
+ if pageTitle:
+ # work on a single page
+ page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
+ gen = iter([page])
+ else:
+ pywikibot.showHelp('touch')
+ return
+ preloadingGen = pagegenerators.PreloadingGenerator(gen)
+ bot = TouchBot(preloadingGen, redirs)
+ bot.run()
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
Revision: 8698
Author: xqt
Date: 2010-11-05 15:30:44 +0000 (Fri, 05 Nov 2010)
Log Message:
-----------
UserEditFilterGenerator() and options -user -skipuser
yield a page if a given user is or is not author of that page. Yet it looks at the last 100 edits only.
Modified Paths:
--------------
trunk/pywikipedia/template.py
Modified: trunk/pywikipedia/template.py
===================================================================
--- trunk/pywikipedia/template.py 2010-11-05 06:52:20 UTC (rev 8697)
+++ trunk/pywikipedia/template.py 2010-11-05 15:30:44 UTC (rev 8698)
@@ -24,6 +24,16 @@
-namespace: Only process templates in the given namespace number (may be used
multiple times).
+-user: Only process pages edited by a given user
+
+-skipuser: Only process pages not edited by a given user
+
+-timestamp: (With -user or -skipuser). Only check for a user where his edit is
+ not older than the given timestamp. Timestamp must be writen in
+ MediaWiki timestamp format which is "%Y%m%d%H%M%S"
+ If this parameter is missed, all edits are checked but this is
+ restricted to the last 100 edits.
+
-summary: Lets you pick a custom edit summary. Use quotes if edit summary contains
spaces.
@@ -94,6 +104,35 @@
import replace, pagegenerators
import re, sys, string, catlib
+def UserEditFilterGenerator(generator, username, timestamp=None, skip=False):
+ """
+ Generator which will yield Pages depending of user:username is an Author of
+ that page (only looks at the last 100 editors).
+ If timestamp is set in MediaWiki format JJJJMMDDhhmmss, older edits are
+ ignored
+ If skip is set, pages edited by the given user are ignored otherwise only
+ pages edited by this user are given back
+
+ """
+ if timestamp:
+ ts = pywikibot.Timestamp.fromtimestampformat(timestamp)
+ for page in generator:
+ editors = page.getLatestEditors(limit=100)
+ found = False
+ for ed in editors:
+ uts = pywikibot.Timestamp.fromISOformat(ed['timestamp'])
+ if not timestamp or uts>=ts:
+ if username == ed['user']:
+ found = True
+ break
+ else:
+ break
+ if found and not skip or not found and skip:
+ yield page
+ else:
+ pywikibot.output(u'Skipping %s' % page.title(asLink=True))
+
+
class XmlDumpTemplatePageGenerator:
"""
Generator which will yield Pages to pages that might contain the chosen
@@ -381,12 +420,17 @@
genFactory = pagegenerators.GeneratorFactory()
# If xmlfilename is None, references will be loaded from the live wiki.
xmlfilename = None
+ user = None
+ skip = False
+ timestamp = None
# read command line parameters
for arg in pywikibot.handleArgs():
if arg == '-remove':
remove = True
elif arg == '-subst':
subst = True
+ elif arg == ('-always'):
+ acceptAll = True
elif arg.startswith('-xml'):
if len(arg) == 4:
xmlfilename = pywikibot.input(u'Please enter the XML dump\'s filename: ')
@@ -401,8 +445,13 @@
addedCat = arg[len('-category:'):]
elif arg.startswith('-summary:'):
editSummary = arg[len('-summary:'):]
- elif arg.startswith('-always'):
- acceptAll = True
+ elif arg.startswith('-user:'):
+ user = arg[len('-user:'):]
+ elif arg.startswith('-skipuser:'):
+ user = arg[len('-skipuser:'):]
+ skip = True
+ elif arg.startswith('-timestamp:'):
+ timestamp = arg[len('-timestamp:'):]
else:
if not genFactory.handleArg(arg):
templateNames.append(pywikibot.Page(pywikibot.getSite(), arg, defaultNamespace=10).titleWithoutNamespace())
@@ -436,7 +485,8 @@
if namespaces:
gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
-
+ if user:
+ gen = UserEditFilterGenerator(gen, user, timestamp, skip)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
bot = TemplateRobot(preloadingGen, templates, subst, remove, editSummary, acceptAll, addedCat)