http://www.mediawiki.org/wiki/Special:Code/pywikipedia/8946
Revision: 8946
Author: xqt
Date: 2011-02-09 22:13:09 +0000 (Wed, 09 Feb 2011)
Log Message:
-----------
enable clean_sandbox for user testpages. Patch submitted by DrTrigon. Thanks. (bug 3108403)
Modified Paths:
--------------
trunk/pywikipedia/clean_sandbox.py
Modified: trunk/pywikipedia/clean_sandbox.py
===================================================================
--- trunk/pywikipedia/clean_sandbox.py 2011-02-09 21:07:53 UTC (rev 8945)
+++ trunk/pywikipedia/clean_sandbox.py 2011-02-09 22:13:09 UTC (rev 8946)
@@ -14,6 +14,17 @@
hours and limits it between 5 and 15 minutes.
The minimum delay time is 5 minutes.
+ -userlist Use this parameter to run the script in the user name-
+ space.
+ > ATTENTION: on most wiki THIS IS FORBIDEN FOR BOTS ! <
+ > (please talk with your admin first) <
+ Since it is considered bad style to edit user page with-
+ out permission, you have to pass a page containing a
+ list of user to process. Argument e.g. is given as
+ "-userlist:Benutzer:DrTrigonBot/Diene_Mir\!".
+ Please be also aware that the rules when to clean the
+ user sandbox differ from those for project sandbox.
+
"""
#
# (C) Leonardo Gregianin, 2006
@@ -21,7 +32,12 @@
# (C) Andre Engels, 2007
# (C) Siebrand Mazeland, 2007
# (C) xqt, 2009
+# (C) Dr. Trigon, 2011
#
+# DrTrigonBot: http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot
+# Clean User Sandbox Robot (clean_user_sandbox.py)
+# https://fisheye.toolserver.org/browse/drtrigon/pywikipedia/clean_user_sandb…
+#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id$'
@@ -120,13 +136,18 @@
}
class SandboxBot:
- def __init__(self, hours, no_repeat, delay):
+ def __init__(self, hours, no_repeat, delay, userlist):
self.hours = hours
self.no_repeat = no_repeat
if delay == None:
self.delay = min(15, max(5, int(self.hours *60)))
else:
self.delay = max(5, delay)
+ self.site = pywikibot.getSite()
+ if userlist == None:
+ self.userlist = None
+ else:
+ self.userlist = [page.title().split(u'/')[0] for page in pywikibot.Page(self.site, userlist).linkedPages()]
def run(self):
@@ -143,29 +164,43 @@
int(time2[10:12])
return abs(t2-t1)
- mySite = pywikibot.getSite()
+ mySite = self.site
while True:
wait = False
now = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime())
localSandboxTitle = pywikibot.translate(mySite, sandboxTitle)
+ IsUserSandbox = (self.userlist is not None) # DrTrigonBot (Clean User Sandbox Robot)
+ if IsUserSandbox:
+ localSandboxTitle = u'%s/' + localSandboxTitle.split(u':')[-1]
+ localSandboxTitle = [localSandboxTitle % user for user in self.userlist]
if type(localSandboxTitle) is list:
titles = localSandboxTitle
else:
titles = [localSandboxTitle,]
for title in titles:
sandboxPage = pywikibot.Page(mySite, title)
+ pywikibot.output(u'Preparing to process sandbox page %s' % sandboxPage.title(asLink=True))
try:
text = sandboxPage.get()
translatedContent = pywikibot.translate(mySite, content)
translatedMsg = pywikibot.translate(mySite, msg)
subst = 'subst:' in translatedContent
+ pos = text.find(translatedContent.strip())
if text.strip() == translatedContent.strip():
pywikibot.output(u'The sandbox is still clean, no change necessary.')
elif subst and sandboxPage.userName() == mySite.loggedInAs():
pywikibot.output(u'The sandbox might be clean, no change necessary.')
- elif text.find(translatedContent.strip()) <> 0 and not subst:
- sandboxPage.put(translatedContent, translatedMsg)
- pywikibot.output(u'Standard content was changed, sandbox cleaned.')
+ elif pos <> 0 and not subst:
+ if IsUserSandbox:
+ endpos = pos + len(translatedContent.strip())
+ if (pos < 0) or (endpos == len(text)):
+ pywikibot.output(u'The user sandbox is still clean or not set up, no change necessary.')
+ else:
+ sandboxPage.put(text[:endpos], translatedMsg)
+ pywikibot.output(u'Standard content was changed, user sandbox cleaned.')
+ else:
+ sandboxPage.put(translatedContent, translatedMsg)
+ pywikibot.output(u'Standard content was changed, sandbox cleaned.')
else:
diff = minutesDiff(sandboxPage.editTime(), time.strftime("%Y%m%d%H%M%S", time.gmtime()))
if pywikibot.verbose:
@@ -179,6 +214,9 @@
wait = True
except pywikibot.EditConflict:
pywikibot.output(u'*** Loading again because of edit conflict.\n')
+ except pywikibot.NoPage:
+ pywikibot.output(u'*** The sandbox is not existent, skipping.')
+ continue
if self.no_repeat:
pywikibot.output(u'\nDone.')
return
@@ -192,6 +230,7 @@
def main():
hours = 1
delay = None
+ userlist = None
no_repeat = True
for arg in pywikibot.handleArgs():
if arg.startswith('-hours:'):
@@ -199,11 +238,13 @@
no_repeat = False
elif arg.startswith('-delay:'):
delay = int(arg[7:])
+ elif arg.startswith('-userlist:'):
+ userlist = arg[10:]
else:
pywikibot.showHelp('clean_sandbox')
return
- bot = SandboxBot(hours, no_repeat, delay)
+ bot = SandboxBot(hours, no_repeat, delay, userlist)
try:
bot.run()
except KeyboardInterrupt:
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/8940
Revision: 8940
Author: russblau
Date: 2011-02-08 16:25:26 +0000 (Tue, 08 Feb 2011)
Log Message:
-----------
remove checks for non-existent InvalidTitle exception
Modified Paths:
--------------
branches/rewrite/scripts/cosmetic_changes.py
branches/rewrite/scripts/interwiki.py
branches/rewrite/scripts/redirect.py
Modified: branches/rewrite/scripts/cosmetic_changes.py
===================================================================
--- branches/rewrite/scripts/cosmetic_changes.py 2011-02-08 15:51:41 UTC (rev 8939)
+++ branches/rewrite/scripts/cosmetic_changes.py 2011-02-08 16:25:26 UTC (rev 8940)
@@ -217,10 +217,7 @@
# [[page_title|link_text]]trailing_chars
# We only work on namespace 0 because pipes and linktrails work
# differently for images and categories.
- try:
- page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site))
- except pywikibot.InvalidTitle:
- return match.group()
+ page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site))
if page.namespace() == 0:
# Replace underlines by spaces, also multiple underlines
titleWithSection = re.sub('_+', ' ', titleWithSection)
@@ -532,7 +529,7 @@
text,
r"([\r\n])\=\= *(Licensing|License information|{{int:license-header}}) *\=\=",
r"\1== {{int:license}} ==", exceptions, True)
-
+
# frequent field values to {{int:}} versions
text = pywikibot.replaceExcept(
text,
@@ -542,10 +539,10 @@
text,
r'(\| *Permission *\=) *(?:[Ss]ee below|[Ss]iehe unten) *([\r\n])',
r'\1\2', exceptions, True)
-
+
# added to transwikied pages
text = pywikibot.replaceExcept(text, r'__NOTOC__', '', exceptions, True)
-
+
# tracker element for js upload form
text = pywikibot.replaceExcept(
text,
@@ -553,7 +550,7 @@
'', exceptions[1:], True)
text = pywikibot.replaceExcept(text, r'{{ImageUpload\|(?:basic|full)}}',
'', exceptions, True)
-
+
# duplicated section headers
text = pywikibot.replaceExcept(
text,
Modified: branches/rewrite/scripts/interwiki.py
===================================================================
--- branches/rewrite/scripts/interwiki.py 2011-02-08 15:51:41 UTC (rev 8939)
+++ branches/rewrite/scripts/interwiki.py 2011-02-08 16:25:26 UTC (rev 8940)
@@ -71,7 +71,7 @@
-number: used as -number:#, specifies that the robot should process
that amount of pages and then stop. This is only useful in
combination with -start. The default is not to stop.
-
+
-until: used as -until:title, specifies that the robot should
process pages in wiki default sort order up to, and
including, "title" and then stop. This is only useful in
@@ -227,13 +227,13 @@
only when you are sure you have first gotten the interwiki
links on the starting page exactly right).
(note: without ending colon)
-
+
-hintsareright do not follow interwiki links to sites for which hints
on existing pages are given. Note that, hints given
interactively, via the -askhint command line option,
are only effective once they have been entered, thus
interwiki links on the starting page are followed
- regardess of hints given when prompted.
+ regardess of hints given when prompted.
(Warning! Should be used with caution!)
(note: without ending colon)
@@ -598,7 +598,7 @@
memory when a big number of Page objects will be loaded
at the same time.
"""
-
+
# Please prefix the class members names by SP
# to avoid possible name clashes with pywikibot.Page
@@ -608,8 +608,8 @@
SPstore = None
# attributes created by pywikibot.Page.__init__
- SPcopy = [ '_editrestriction',
- '_site',
+ SPcopy = [ '_editrestriction',
+ '_site',
'_namespace',
'_section',
'_title',
@@ -622,7 +622,7 @@
'_startTime',
'_revisionId',
'_deletedRevs' ]
-
+
def SPdeleteStore():
if StoredPage.SPpath:
del StoredPage.SPstore
@@ -637,7 +637,7 @@
import shelve
index = 1
while True:
- path = config.datafilepath('cache', 'pagestore' + str(index))
+ path = config.datafilepath('cache', 'pagestore' + str(index))
if not os.path.exists(path): break
index += 1
StoredPage.SPpath = path
@@ -666,7 +666,7 @@
"""
def __init__(self):
# self.tree :
- # Dictionary:
+ # Dictionary:
# keys: Site
# values: list of pages
# All pages found within Site are kept in
@@ -676,7 +676,7 @@
# the remove() operation,
# keeping list values is important, because
# the order in which the pages were found matters:
- # the earlier a page is found, the closer it is to the
+ # the earlier a page is found, the closer it is to the
# Subject.originPage. Chances are that pages found within
# 2 interwiki distance from the originPage are more related
# to the original topic than pages found later on, after
@@ -691,7 +691,7 @@
def filter(self, site):
"""
Iterates over pages that are in Site site
- """
+ """
try:
for page in self.tree[site]:
yield page
@@ -731,7 +731,7 @@
"""
for site, d in self.tree.iteritems():
yield site, len(d)
-
+
def __iter__(self):
for site, plist in self.tree.iteritems():
for page in plist:
@@ -753,7 +753,7 @@
pseudocode:
todo <- [originPage]
done <- []
- while todo != []:
+ while todo != []:
pending <- todo
todo <-NL(pending) / done
done <- NL(pending) U done
@@ -761,23 +761,23 @@
There is, however, one limitation that is induced by implementation:
- to compute efficiently NL(P), one has to load the page contents of
- pages in P.
+ to compute efficiently NL(P), one has to load the page contents of
+ pages in P.
(Not only the langlinks have to be parsed from each Page, but we also want
to know if the Page is a redirect, a disambiguation, etc...)
- Because of this, the pages in pending have to be preloaded.
+ Because of this, the pages in pending have to be preloaded.
However, because the pages in pending are likely to be in several sites
we cannot "just" preload them as a batch.
- Instead of doing "pending <- todo" at each iteration, we have to elect a
- Site, and we put in pending all the pages from todo that belong to that
+ Instead of doing "pending <- todo" at each iteration, we have to elect a
+ Site, and we put in pending all the pages from todo that belong to that
Site:
Code becomes:
todo <- {originPage.site:[originPage]}
done <- []
- while todo != {}:
+ while todo != {}:
site <- electSite()
pending <- todo[site]
@@ -789,10 +789,10 @@
Subject objects only operate on pages that should have been preloaded before.
- In fact, at any time:
+ In fact, at any time:
* todo contains new Pages that have not been loaded yet
* done contains Pages that have been loaded, and that have been treated.
- * If batch preloadings are successful, Page._get() is never called from
+ * If batch preloadings are successful, Page._get() is never called from
this Object.
"""
@@ -904,7 +904,7 @@
def whatsNextPageBatch(self, site):
"""
By calling this method, you 'promise' this instance that you will
- preload all the 'site' Pages that are in the todo list.
+ preload all the 'site' Pages that are in the todo list.
This routine will return a list of pages that can be treated.
"""
@@ -1149,7 +1149,7 @@
def batchLoaded(self, counter):
"""
This is called by a worker to tell us that the promised batch of
- pages was loaded.
+ pages was loaded.
In other words, all the pages in self.pending have already
been preloaded.
@@ -1190,8 +1190,8 @@
for site, count in self.todo.siteCounts():
counter.minus(site, count)
self.todo = PageTree()
- # In some rare cases it might be we already did check some 'automatic' links
- self.done = PageTree()
+ # In some rare cases it might be we already did check some 'automatic' links
+ self.done = PageTree()
continue
elif page.isRedirectPage() or page.isCategoryRedirect():
@@ -1204,13 +1204,6 @@
redirectTargetPage = page.getRedirectTarget()
else:
redirectTargetPage = page.getCategoryRedirectTarget()
- except pywikibot.InvalidTitle:
- # MW considers #redirect [[en:#foo]] as a redirect page,
- # but we can't do anything useful with such pages
- if not globalvar.quiet:
- pywikibot.output(u"NOTE: %s redirects to an invalid title"
- % page)
- continue
if not globalvar.quiet:
pywikibot.output(u"NOTE: %s is %sredirect to %s"
% (page, redir, redirectTargetPage))
@@ -1252,7 +1245,7 @@
for site, count in self.todo.siteCounts():
counter.minus(site, count)
self.todo = PageTree()
- self.done = PageTree()
+ self.done = PageTree()
continue
elif page.section():
@@ -1298,8 +1291,8 @@
elif globalvar.autonomous and duplicate and not skip:
pywikibot.output(u"Stopping work on %s because duplicate pages"\
- " %s and %s are found" % (self.originPage,
- duplicate,
+ " %s and %s are found" % (self.originPage,
+ duplicate,
page))
self.makeForcedStop(counter)
try:
@@ -1312,7 +1305,7 @@
f.write(u" [%s%s graph]" % (config.interwiki_graph_url, filename))
f.write("\n")
f.close()
- # FIXME: What errors are we catching here?
+ # FIXME: What errors are we catching here?
# except: should be avoided!!
except:
#raise
@@ -1653,12 +1646,12 @@
Delete the contents that are stored on disk for this Subject.
We cannot afford to define this in a StoredPage destructor because
- StoredPage instances can get referenced cyclicly: that would stop the
+ StoredPage instances can get referenced cyclicly: that would stop the
garbage collector from destroying some of those objects.
It's also not necessary to set these lines as a Subject destructor:
deleting all stored content one entry by one entry when bailing out
- after a KeyboardInterrupt for example is redundant, because the
+ after a KeyboardInterrupt for example is redundant, because the
whole storage file will be eventually removed.
"""
if globalvar.contentsondisk:
@@ -1699,7 +1692,7 @@
for iw in re.finditer('<!-- *\[\[(.*?:.*?)\]\] *-->', pagetext):
try:
ignorepage = pywikibot.Page(page.site, iw.groups()[0])
- except (pywikibot.NoSuchSite, pywikibot.InvalidTitle):
+ except (pywikibot.NoSuchSite, ):
continue
try:
@@ -2227,7 +2220,7 @@
#Version info marks bots without unicode error
#This also prevents abuse filter blocking on de-wiki
if not pywikibot.unicode_error:
- mcomment += u'r%s) (' % sys.version.split()[0]
+ mcomment += u'r%s) (' % sys.version.split()[0]
mcomment += globalvar.summary
@@ -2237,7 +2230,7 @@
mcomment += i18n.twtranslate(insite.lang, commentname) % changes
mods = i18n.twtranslate('en', commentname) % changes
-
+
return mods, mcomment, adding, removing, modifying
def botMayEdit (page):
@@ -2353,7 +2346,7 @@
globalvar.summary = u''
elif globalvar.summary:
globalvar.summary += u'; '
-
+
# ensure that we don't try to change main page
try:
site = pywikibot.getSite()
@@ -2364,7 +2357,7 @@
if newPages is not None:
if len(namespaces) == 0:
- ns = 0
+ ns = 0
elif len(namespaces) == 1:
ns = namespaces[0]
if ns != 'all':
Modified: branches/rewrite/scripts/redirect.py
===================================================================
--- branches/rewrite/scripts/redirect.py 2011-02-08 15:51:41 UTC (rev 8939)
+++ branches/rewrite/scripts/redirect.py 2011-02-08 16:25:26 UTC (rev 8940)
@@ -477,10 +477,6 @@
pywikibot.output(
u'Warning: Redirect target %s is not a valid page title.'
% str(e)[10:])
- #sometimes this error occures. Invalid Title starting with a '#'
- except pywikibot.InvalidTitle, err:
- pywikibot.output(u'Warning: %s' % err)
- break
except pywikibot.NoPage:
if len(redirList) == 1:
pywikibot.output(u'Skipping: Page %s does not exist.'