Revision: 5275
Author: btongminh
Date: 2008-04-26 12:05:51 +0000 (Sat, 26 Apr 2008)
Log Message:
-----------
Add starvation to logger threads. Remove obsolete code.
Modified Paths:
--------------
trunk/pywikipedia/commonsdelinker/delinker.py
Modified: trunk/pywikipedia/commonsdelinker/delinker.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/delinker.py 2008-04-25 19:13:20 UTC (rev 5274)
+++ trunk/pywikipedia/commonsdelinker/delinker.py 2008-04-26 12:05:51 UTC (rev 5275)
@@ -535,6 +535,8 @@
self.pool.jobLock.release()
class Logger(threadpool.Thread):
+ timeout = 360
+
def __init__(self, pool, CommonsDelinker):
threadpool.Thread.__init__(self, pool)
self.CommonsDelinker = CommonsDelinker
@@ -607,7 +609,21 @@
traceback.print_exc(file = sys.stderr)
self.exit()
self.CommonsDelinker.thread_died()
+
+ def starve(self):
+ self.pool.jobLock.acquire()
+ try:
+ if self.pool[id(self)].isSet(): return False
+ output(u'%s Starving' % self)
+ self.database.close()
+ del self.pool[id(self)]
+ self.pool.threads.remove(self)
+ return True
+ finally:
+ self.pool.jobLock.release()
+
+
class CommonsDelinker(object):
def __init__(self):
self.config = config.CommonsDelinker
@@ -832,33 +848,8 @@
time.sleep(self.config['timeout'])
def thread_died(self):
- # A thread died, it may be possible that we cannot
- # function any more. Currently only for CheckUsages
- # and Loggers. Delinkers should not be able to die.
- cu = 0
- self.CheckUsages.jobLock.acquire()
- for thread in self.CheckUsages.threads:
- if thread.isAlive() and not thread.quit:
- cu += 1
- self.CheckUsages.jobLock.release()
- lg = 0
- self.Loggers.jobLock.acquire()
- for thread in self.Loggers.threads:
- if thread.isAlive() and not thread.quit:
- lg += 1
- unlogged = self.Loggers.jobQueue[:]
- self.Loggers.jobLock.release()
-
- # We can no longer function if we have only one
- # CheckUsage or zero Loggers available.
- # TODO: config settings?
- if cu <= 1:
- output(u'ERROR!!! Too few CheckUsages left to function', False)
- threadpool.terminate()
- if lg <= 0:
- output(u'ERROR!!! Too few Loggers left to function', False)
- print >>sys.stderr, 'Currently unlogged:', unlogged
- threadpool.terminate()
+ # Obsolete
+ return
@staticmethod
def output(*args):
Bugs item #1924322, was opened at 2008-03-24 06:19
Message generated for change (Comment added) made by nobody
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1924322&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Nobody/Anonymous (nobody)
Assigned to: Nobody/Anonymous (nobody)
Summary: interwiki links on subpages in templates
Initial Comment:
In English and some other major wikipedias interwiki links are placed on /doc subpage (or whatever it's called) in templates. Interwiki bot should check if such a page exists and not place interwiki links on main template page but place/update links on that subpage. Otherwise, everytime a bot places interwiki on a template with this structure, the main template page needs to be cleaned and interwiki links moved to a subpage manually
----------------------------------------------------------------------
Comment By: Nobody/Anonymous (nobody)
Date: 2008-04-25 14:57
Message:
Logged In: NO
A few pages in the project namespace also use subpages for headers and
interwiki links. Subpages usually have <includeonly> around the iw and
category links, interwiki bots should leave that in place.
----------------------------------------------------------------------
Comment By: Nobody/Anonymous (nobody)
Date: 2008-04-17 13:13
Message:
Logged In: NO
Every like it about his problem.
If there is a page with subpages in templates. The interwiki links on the
subpages needs to places in inside the nowiki section. So if the last word
of the page is </nowiki> the interwiki link needs to be placed before the
</nowiki> tag.
I'm the operator of the CarsracBot with its home on the nl.wikipedia.org
And I have seen this behaviour on that articles where
en:list_of_asteroides/1101-1200 is a part of.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1924322&group_…
Revision: 5274
Author: russblau
Date: 2008-04-25 19:13:20 +0000 (Fri, 25 Apr 2008)
Log Message:
-----------
more site methods; fix bugs; lazier site login (although I'm still not happy with this; getting langlinks requires connecting to each wiki server)
Modified Paths:
--------------
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/exceptions.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/__init__.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -51,11 +51,6 @@
key = '%s:%s:%s' % (fam, code, user)
if not _sites.has_key(key):
_sites[key] = __Site(code=code, fam=fam, user=user)
- _sites[key].getsiteinfo()
- try:
- _sites[key].login(False)
- except NoUsername:
- pass
return _sites[key]
getSite = Site # alias for backwards-compability
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/data/api.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -223,19 +223,7 @@
# double the next wait, but do not exceed 120 seconds
self.retry_wait = min(120, self.retry_wait * 2)
- def lag_wait(self, lag):
- """Wait due to server lag."""
- # unlike regular wait, this shuts down all access to site
- self.site.sitelock.acquire()
- try:
- # wait at least 5 seconds, no more than 120
- wait = max(5, min(120, lag//2))
- logging.warn("Pausing %s seconds due to server lag." % wait)
- time.sleep(wait)
- finally:
- self.site.sitelock.release()
-
class PageGenerator(object):
"""Iterator for response to a request of type action=query&generator=foo."""
def __init__(self, generator, **kwargs):
@@ -293,7 +281,6 @@
# FIXME: this won't handle generators with <redirlinks> subelements
# correctly yet
while True:
- self.site.throttle()
self.data = self.request.submit()
if not self.data or not isinstance(self.data, dict):
raise StopIteration
@@ -392,7 +379,6 @@
"""Iterate objects for elements found in response."""
# this looks for the resultkey ''inside'' a <page> entry
while True:
- self.site.throttle()
self.data = self.request.submit()
if not self.data or not isinstance(self.data, dict):
raise StopIteration
Modified: branches/rewrite/pywikibot/exceptions.py
===================================================================
--- branches/rewrite/pywikibot/exceptions.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/exceptions.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -31,6 +31,15 @@
class IsNotRedirectPage(Error):
"""Page is not a redirect page"""
+class CircularRedirect(Error):
+ """Page is a circular redirect
+
+ Exception argument is the redirect target; this may be the same title
+ as this page or a different title (in which case the target page directly
+ or indirectly redirects back to this one)
+
+ """
+
class LockedPage(Error):
"""Page is locked"""
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/page.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -660,7 +660,11 @@
exception. This method also can raise a NoPage exception.
"""
- return self.site().follow_redirect(self)
+ if not self.isRedirectPage():
+ raise pywikibot.IsNotRedirectPage
+ if not isinstance(self._redir, Page):
+ self.site().getredirtarget(self)
+ return self._redir
def getVersionHistory(self, forceReload=False, reverseOrder=False,
getAll=False, revCount=500):
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-04-25 16:19:26 UTC (rev 5273)
+++ branches/rewrite/pywikibot/site.py 2008-04-25 19:13:20 UTC (rev 5274)
@@ -103,7 +103,17 @@
self._pagemutex = threading.Lock()
self._locked_pages = []
- self.throttle = Throttle(self, multiplydelay=True, verbosedelay=True)
+ @property
+ def throttle(self):
+ """Return this Site's throttle. Initialize a new one if needed."""
+ if not hasattr(self, "_throttle"):
+ self._throttle = Throttle(self, multiplydelay=True, verbosedelay=True)
+ self.getsiteinfo()
+ try:
+ self.login(False)
+ except pywikibot.NoUsername:
+ pass
+ return self._throttle
def family(self):
"""Return the associated Family object."""
@@ -124,6 +134,8 @@
def __getattr__(self, attr):
"""Calls to methods not defined in this object are passed to Family."""
+ if hasattr(self.__class__, attr):
+ return self.__class__.attr
try:
method = getattr(self.family(), attr)
f = lambda *args, **kwargs: \
@@ -509,7 +521,42 @@
if not hasattr(page, "_redir"):
self.getpageinfo(page)
return bool(page._redir)
-
+
+ def getredirtarget(self, page):
+ """Return Page object for the redirect target of page."""
+ if not hasattr(page, "_redir"):
+ self.getpageinfo(page)
+ if not page._redir:
+ raise pywikibot.IsNotRedirectPage
+ title = page.title(withSection=False)
+ query = api.Request(site=self, action="query", property="info",
+ inprop="protection|talkid|subjectid",
+ titles=title.encode(self.encoding()),
+ redirects="")
+ result = query.submit()
+ if "query" not in result or "redirects" not in result["query"]:
+ raise RuntimeError(
+ "getredirtarget: No 'redirects' found for page %s."
+ % title)
+ redirmap = dict((item['from'], item['to'])
+ for item in result['query']['redirects'])
+ if title not in redirmap:
+ raise RuntimeError(
+ "getredirtarget: 'redirects' contains no key for page %s."
+ % title)
+ if "pages" not in result['query']:
+ # no "pages" element indicates a circular redirect
+ raise pywikibot.CircularRedirect(redirmap[title])
+ for pagedata in result['query']['pages'].values():
+ # there should be only one value in 'pages', and it is the target
+ if pagedata['title'] not in redirmap.values():
+ raise RuntimeError(
+ "getredirtarget: target page '%s' not found in 'redirects'"
+ % pagedata['title'])
+ target = pywikibot.Page(self, pagedata['title'], pagedata['ns'])
+ api.update_page(target, pagedata)
+ page._redir = target
+
# following group of methods map more-or-less directly to API queries
def getbacklinks(self, page, followRedirects=False, filterRedirects=None,
@@ -783,7 +830,22 @@
yield pywikibot.Link(linkdata['*'],
source=pywikibot.Site(linkdata['lang']))
+ def getextlinks(self, page):
+ """Iterate all external links on page, yielding URL strings."""
+ eltitle = page.title(withSection=False)
+ elquery = api.PropertyGenerator("extlinks",
+ titles=eltitle.encode(self.encoding())
+ )
+ for pageitem in elquery:
+ if pageitem['title'] != eltitle:
+ raise RuntimeError(
+ "getlanglinks: Query on %s returned data on '%s'"
+ % (page, pageitem['title']))
+ for linkdata in pageitem['extlinks']:
+ yield linkdata['*']
+
+
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
class NotImplementedYet:
Revision: 5273
Author: filnik
Date: 2008-04-25 16:19:26 +0000 (Fri, 25 Apr 2008)
Log Message:
-----------
minor fixes at the docu
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2008-04-25 15:36:24 UTC (rev 5272)
+++ trunk/pywikipedia/add_text.py 2008-04-25 16:19:26 UTC (rev 5273)
@@ -1,7 +1,8 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
-This is a Bot written by Filnik to add a text in a given category.
+This is a Bot written by Filnik to add a text at the end of the page but above categories,
+interwiki and template for the stars of the interwiki.
These command line parameters can be used to specify which pages to work on:
@@ -15,8 +16,8 @@
-except Use a regex to understand if the template is already in the page
-excepturl Use the html page as text where you want to see if there's the text, not the wiki-page.
-newimages Add text in the new images
--untagged Add text in the images that doesn't have any license template
--always If used, the bot won't asked if it should add the text specified
+-untagged Add text in the images that don't have any license template
+-always If used, the bot won't ask if it should add the text specified
-up If used, put the text above and not below
--- Example ---
@@ -36,11 +37,11 @@
"""
#
-# (C) Filnik, 2007
+# (C) Filnik, 2007-2008
#
# Distributed under the terms of the MIT license.
#
-__version__ = '$Id: AddText.py,v 1.0 2007/11/27 17:08:30 filnik Exp$'
+__version__ = '$Id: add_text.py,v 1.5 2008/04/25 17:08:30 filnik Exp$'
#
import re, pagegenerators, urllib2, urllib
Revision: 5272
Author: russblau
Date: 2008-04-25 15:36:24 +0000 (Fri, 25 Apr 2008)
Log Message:
-----------
implemented getlanglinks(); current implementation causes the bot to connect to every site referred to in a language link, which probably isn't desirable.
Modified Paths:
--------------
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
branches/rewrite/pywikibot/tests/page_tests.py
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-04-25 08:32:54 UTC (rev 5271)
+++ branches/rewrite/pywikibot/data/api.py 2008-04-25 15:36:24 UTC (rev 5272)
@@ -465,6 +465,7 @@
raise AssertionError(
"Page %s has neither 'pageid' nor 'missing' attribute"
% pagedict['title'])
+ page._redir = 'redirect' in pagedict
if 'lastrevid' in pagedict:
page._revid = pagedict['lastrevid']
if 'touched' in pagedict:
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-04-25 08:32:54 UTC (rev 5271)
+++ branches/rewrite/pywikibot/page.py 2008-04-25 15:36:24 UTC (rev 5272)
@@ -520,7 +520,6 @@
# no restricting template found
return True
-
def put(self, newtext, comment=None, watchArticle=None, minorEdit=True,
force=False):
"""Save the page with the contents of the first argument as the text.
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-04-25 08:32:54 UTC (rev 5271)
+++ branches/rewrite/pywikibot/site.py 2008-04-25 15:36:24 UTC (rev 5272)
@@ -484,21 +484,32 @@
return self._namespaces[num]
return self._namespaces[num][0]
+ def getpageinfo(self, page):
+ """Load page info from api and save in page attributes"""
+ title = page.title(withSection=False)
+ query = api.PropertyGenerator(
+ "info",
+ inprop="protection|talkid|subjectid",
+ titles=title.encode(self.encoding()))
+ for pageitem in query:
+ if pageitem['title'] != title:
+ raise RuntimeError(
+ "getpageinfo: Query on %s returned data on '%s'"
+ % (page, pageitem['title']))
+ api.update_page(page, pageitem)
+
def page_exists(self, page):
"""Return True if and only if page is an existing page on site."""
if not hasattr(page, "_pageid"):
- query = api.PropertyGenerator(
- "info", inprop="protection|talkid|subjectid",
- titles=page.title(withSection=False
- ).encode(self.encoding()))
- for pageitem in query:
- if pageitem['title'] != page.title(withSection=False):
- raise RuntimeError(
- "page_exists: Query on %s returned data on '%s'"
- % (page, pageitem['title']))
- page._pageid = pageitem['pageid']
+ self.getpageinfo(page)
return page._pageid > 0
+ def page_isredirect(self, page):
+ """Return True if and only if page is a redirect."""
+ if not hasattr(page, "_redir"):
+ self.getpageinfo(page)
+ return bool(page._redir)
+
# following group of methods map more-or-less directly to API queries
def getbacklinks(self, page, followRedirects=False, filterRedirects=None,
@@ -752,8 +763,27 @@
page._revisions[revision.revid] = revision
if latest:
page._revid = revision.revid
-
+ def getinterwiki(self, page):
+ # TODO
+ raise NotImplementedError
+
+ def getlanglinks(self, page):
+ """Iterate all interlanguage links on page, yielding Link objects."""
+ lltitle = page.title(withSection=False)
+ llquery = api.PropertyGenerator("langlinks",
+ titles=lltitle.encode(self.encoding())
+ )
+ for pageitem in llquery:
+ if pageitem['title'] != lltitle:
+ raise RuntimeError(
+ "getlanglinks: Query on %s returned data on '%s'"
+ % (page, pageitem['title']))
+ for linkdata in pageitem['langlinks']:
+ yield pywikibot.Link(linkdata['*'],
+ source=pywikibot.Site(linkdata['lang']))
+
+
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
class NotImplementedYet:
Modified: branches/rewrite/pywikibot/tests/page_tests.py
===================================================================
--- branches/rewrite/pywikibot/tests/page_tests.py 2008-04-25 08:32:54 UTC (rev 5271)
+++ branches/rewrite/pywikibot/tests/page_tests.py 2008-04-25 15:36:24 UTC (rev 5272)
@@ -203,6 +203,9 @@
if __name__ == '__main__':
try:
- unittest.main()
- except SystemExit:
- pass
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
+ finally:
+ pywikibot.stopme()
Revision: 5271
Author: nicdumz
Date: 2008-04-25 08:32:54 +0000 (Fri, 25 Apr 2008)
Log Message:
-----------
* moving the Special:Protectedpages from blockpageschecker to wikipedia.py
* Using a new protectedpages_address family function instead of a hard coded url
* better sysop-protection check for blockpageschecker.py (In this case, we _already_ getEditRestrictions() via the api, so manually checking for editability is almost costless)
Modified Paths:
--------------
trunk/pywikipedia/blockpageschecker.py
trunk/pywikipedia/family.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/blockpageschecker.py
===================================================================
--- trunk/pywikipedia/blockpageschecker.py 2008-04-24 17:33:36 UTC (rev 5270)
+++ trunk/pywikipedia/blockpageschecker.py 2008-04-25 08:32:54 UTC (rev 5271)
@@ -24,6 +24,8 @@
or when you have problems with them. (add the namespace after ":" where
you want to check - default checks all protected pages)
+-moveprotected: Same as -protectedpages, for moveprotected pages
+
Furthermore, the following command line parameters are supported:
-always Doesn't ask every time if the bot should make the change or not, do it always.
@@ -166,35 +168,6 @@
return ('autoconfirmed-move', catchRegex)
return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex
-def ProtectedPagesData(namespace = None):
- """ Yield all the pages blocked, using Special:ProtectedPages """
- # Avoid problems of encoding and stuff like that, let it divided please
- url = '/w/index.php?title=Special:ProtectedPages&type=edit&level=0'
- if namespace != None: # /!\ if namespace seems simpler, but returns false when ns=0
-
- url += '&namespace=%s' % namespace
- site = wikipedia.getSite()
- parser_text = site.getUrl(url)
- while 1:
- #<li><a href="/wiki/Pagina_principale" title="Pagina principale">Pagina principale</a> <small>(6.522 byte)</small> (protetta)</li>
- m = re.findall(r'<li><a href=".*?" title=".*?">(.*?)</a>.*?<small>\((.*?)\)</small>.*?\((.*?)\)</li>', parser_text)
- for data in m:
- title = data[0]
- size = data[1]
- status = data[2]
- yield (title, size, status)
- nextpage = re.findall(r'<.ul>\(.*?\).*?\(.*?\).*?\(<a href="(.*?)".*?</a>\) +?\(<a href=', parser_text)
- if nextpage != []:
- parser_text = site.getUrl(nextpage[0].replace('&', '&'))
- continue
- else:
- break
-
-def ProtectedPages(namespace = 0):
- """ Return only the wiki page object and not the tuple with all the data as above """
- for data in ProtectedPagesData(namespace):
- yield wikipedia.Page(wikipedia.getSite(), data[0])
-
def debugQuest(site, page):
quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?')
pathWiki = site.family.nicepath(site.lang)
@@ -226,6 +199,8 @@
moveBlockCheck = False; genFactory = pagegenerators.GeneratorFactory()
# To prevent Infinite loops
errorCount = 0
+ # Load the right site
+ site = wikipedia.getSite()
# Loading the default options.
for arg in wikipedia.handleArgs():
if arg == '-always':
@@ -236,9 +211,15 @@
debug = True
elif arg.startswith('-protectedpages'):
if len(arg) == 15:
- generator = ProtectedPages()
+ generator = site.protectedpages(namespace = 0)
else:
- generator = ProtectedPages(int(arg[16:]))
+ generator = site.protectedpages(namespace = int(arg[16:]))
+ elif arg.startswith('-moveprotected'):
+ if len(arg) == 14:
+ generator = site.protectedpages(namespace = 0, type = 'move')
+ else:
+ generator = site.protectedpages(namespace = int(arg[16:]),
+ type = 'move')
elif arg.startswith('-page'):
if len(arg) == 5:
generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
@@ -246,8 +227,6 @@
generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
else:
generator = genFactory.handleArg(arg)
- # Load the right site
- site = wikipedia.getSite()
# Take the right templates to use, the category and the comment
TSP = wikipedia.translate(site, templateSemiProtection)
TTP = wikipedia.translate(site, templateTotalProtection)
@@ -284,15 +263,27 @@
if debug:
debugQuest(site, page)
continue
+ """
+ # This check does not work :
+ # PreloadingGenerator cannot set correctly page.editRestriction
+ # (see bug #1949476 )
if not page.canBeEdited():
- wikipedia.output("%s is protected : this account can't edit it! Skipping..." % pagename)
- continue
+ wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename)
+ continue
+ """
+ editRestr = restrictions['edit']
+ if editRestr and editRestr[0] == 'sysop':
+ try:
+ config.sysopnames[site.family.name][site.lang]
+ except:
+ wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename)
+ continue
+
# Understand, according to the template in the page, what should be the protection
# and compare it with what there really is.
TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP)
# Only to see if the text is the same or not...
oldtext = text
- editRestr = restrictions['edit']
# keep track of the changes for each step (edit then move)
changes = -1
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2008-04-24 17:33:36 UTC (rev 5270)
+++ trunk/pywikipedia/family.py 2008-04-25 08:32:54 UTC (rev 5271)
@@ -2986,6 +2986,9 @@
def lonelypages_address(self, code, limit=500):
return "%s?title=%s:Lonelypages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
+ def protectedpages_address(self, code, limit=500):
+ return "%s?title=%s:ProtectedPages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
+
def unwatchedpages_address(self, code, limit=500):
return "%s?title=%s:Unwatchedpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-04-24 17:33:36 UTC (rev 5270)
+++ trunk/pywikipedia/wikipedia.py 2008-04-25 08:32:54 UTC (rev 5271)
@@ -3773,6 +3773,7 @@
search(query): query results from Special:Search
allpages(): Special:Allpages
prefixindex(): Special:Prefixindex
+ protectedpages(): Special:ProtectedPages
newpages(): Special:Newpages
newimages(): Special:Log&type=upload
longpages(): Special:Longpages
@@ -3843,6 +3844,7 @@
deadendpages_address: Special:Deadendpages.
ancientpages_address: Special:Ancientpages.
lonelypages_address: Special:Lonelypages.
+ protectedpages_address: Special:ProtectedPages
unwatchedpages_address: Special:Unwatchedpages.
uncategorizedcategories_address: Special:Uncategorizedcategories.
uncategorizedimages_address: Special:Uncategorizedimages.
@@ -4930,6 +4932,34 @@
else:
break
+ def protectedpages(self, namespace = None, type = 'edit', lvl = 0):
+ """ Yield all the protected pages, using Special:ProtectedPages
+ * namespace is a namespace number
+ * type can be 'edit' or 'move
+ * lvl : protection level, can be 0, 'autoconfirmed', or 'sysop'
+ """
+ # Avoid problems of encoding and stuff like that, let it divided please
+ url = self.protectedpages_address()
+ url += '&type=%s&level=%s' % (type, lvl)
+ if namespace != None: # /!\ if namespace seems simpler, but returns false when ns=0
+
+ url += '&namespace=%s' % namespace
+ parser_text = self.getUrl(url)
+ while 1:
+ #<li><a href="/wiki/Pagina_principale" title="Pagina principale">Pagina principale</a> <small>(6.522 byte)</small> (protetta)</li>
+ m = re.findall(r'<li><a href=".*?" title=".*?">(.*?)</a>.*?<small>\((.*?)\)</small>.*?\((.*?)\)</li>', parser_text)
+ for data in m:
+ title = data[0]
+ size = data[1]
+ status = data[2]
+ yield Page(self, title)
+ nextpage = re.findall(r'<.ul>\(.*?\).*?\(.*?\).*?\(<a href="(.*?)".*?</a>\) +?\(<a href=', parser_text)
+ if nextpage != []:
+ parser_text = self.getUrl(nextpage[0].replace('&', '&'))
+ continue
+ else:
+ break
+
def linksearch(self, siteurl, limit=500):
"""Yield Pages from results of Special:Linksearch for 'siteurl'."""
if siteurl.startswith('*.'):
@@ -5223,6 +5253,10 @@
"""Return path to Special:Lonelypages."""
return self.family.lonelypages_address(self.lang, n)
+ def protectedpages_address(self, n=500):
+ """Return path to Special:ProtectedPages"""
+ return self.family.protectedpages_address(self.lang, n)
+
def unwatchedpages_address(self, n=500):
"""Return path to Special:Unwatchedpages."""
return self.family.unwatchedpages_address(self.lang, n)
Hello for all,
If I copy en:Template:Bots to huwiki, will it work here automatically?
As far as I see, this template contains nothing except noinclude text. So,
if I don't copy it to huwiki (just a theoretical question), and someone
inserts a non-existing "nobots" template into his user page /talk page (both
needed separately?), will it work as well?
Bináris
Revision: 5268
Author: russblau
Date: 2008-04-24 13:50:10 +0000 (Thu, 24 Apr 2008)
Log Message:
-----------
further implementation of getrevisions; documentation; refactoring.
Modified Paths:
--------------
branches/rewrite/pywikibot/README-conversion.txt
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/README-conversion.txt
===================================================================
--- branches/rewrite/pywikibot/README-conversion.txt 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/README-conversion.txt 2008-04-24 13:50:10 UTC (rev 5268)
@@ -12,15 +12,27 @@
bot.
With pywikipedia scripts were importing "wikipedia" or "pagegenerators"
-librairies; pywikibot is now written as a standard module.
+libraries; pywikibot is now written as a standard package, and other modules
+are contained within it (e.g., pywikibot.site contains Site classes). However,
+most commonly-used names are imported into the pywikibot namespace, so that
+module names don't need to be used unless specified in the documentation.
+
(To use it, just import "pywikibot", assuming that pywikibot/ is in sys.path)
== Python librairies ==
-You will need, to run pywikibot, httplib2 and setuptools
-* httplib2 : http://code.google.com/p/httplib2/
+[Note: the goal will be to package pywikibot with setuptools easy_install,
+so that these dependencies will be loaded automatically when the package is
+installed, and users won't need to worry about this...]
+
+To run pywikibot, you will need the httplib2, simplejson, and setuptools packages--
+* httplib2 : http://code.google.com/p/httplib2/
* setuptools : http://pypi.python.org/pypi/setuptools/
+* simplejson : http://https://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7.1/docs/i…
+or, if you already have setuptools installed, just execute 'easy_install httplib2'
+and 'easy_install simplejson'
+
If you run into errors involving httplib2.urlnorm, update httplib2 to
0.4.0 (Ubuntu package python-httlib2 for example, is outdated)
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/__init__.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -60,7 +60,7 @@
getSite = Site # alias for backwards-compability
-from page import Page, ImagePage, Category
+from page import Page, ImagePage, Category, Link
# DEBUG
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/data/api.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -329,22 +329,7 @@
"""
p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns'])
- if "pageid" in pagedata:
- self._pageid = int(pagedata['pageid'])
- elif "missing" in pagedata:
- self._pageid = 0 # Non-existent page
- else:
- raise AssertionError(
- "Page %s has neither 'pageid' nor 'missing' attribute"
- % pagedata['title'])
- if 'lastrevid' in pagedata:
- p._revid = pagedata['lastrevid']
- if 'touched' in pagedata:
- p._timestamp = pagedata['touched']
- if 'protection' in pagedata:
- p._protection = {}
- for item in pagedata['protection']:
- p._protection[item['type']] = item['level']
+ update_page(p, pagedata)
return p
@@ -366,8 +351,12 @@
class PropertyGenerator(object):
- """Generator for queries of type action=query&property=..."""
+ """Generator for queries of type action=query&property=...
+ Note that this generator yields one or more dict object(s) corresponding
+ to each "page" item(s) from the API response; the calling module has to
+ decide what to do with the contents of the dict."""
+
def __init__(self, prop, **kwargs):
"""
Required and optional parameters are as for C{Request}, except that
@@ -384,7 +373,7 @@
if self.limits[prop] and kwargs.pop("getAll", False):
self.request['g'+self.limits[generator]] = "max"
self.site = self.request.site
- self.resultkey = prop # element to look for in result
+ self.resultkey = prop
# dict mapping property types to their limit parameter names
limits = {'revisions': 'rvlimit',
@@ -410,21 +399,8 @@
if not ("query" in self.data and "pages" in self.data["query"]):
raise StopIteration
pagedata = self.data["query"]["pages"].values()
- assert len(pagedata)==1
- pagedata = pagedata[0]
- if not self.resultkey in pagedata:
- raise StopIteration
- if isinstance(pagedata[self.resultkey], dict):
- for v in pagedata[self.resultkey].itervalues():
- yield v
- elif isinstance(pagedata[self.resultkey], list):
- for v in pagedata[self.resultkey]:
- yield v
- else:
- raise APIError("Unknown",
- "Unknown format in ['%s'] value."
- % self.resultkey,
- data=pagedata[self.resultkey])
+ for item in pagedata:
+ yield item
if not "query-continue" in self.data:
return
if not self.resultkey in self.data["query-continue"]:
@@ -472,6 +448,32 @@
pywikibot.cookie_jar.save()
+def update_page(page, pagedict):
+ """Update attributes of Page object page, based on query data in pagequery
+
+ @param page: object to be updated
+ @type page: Page
+ @param pagedict: the contents of a "page" element of a query response
+ @type pagedict: dict
+
+ """
+ if "pageid" in pagedict:
+ page._pageid = int(pagedict['pageid'])
+ elif "missing" in pagedict:
+ page._pageid = 0 # Non-existent page
+ else:
+ raise AssertionError(
+ "Page %s has neither 'pageid' nor 'missing' attribute"
+ % pagedict['title'])
+ if 'lastrevid' in pagedict:
+ page._revid = pagedict['lastrevid']
+ if 'touched' in pagedict:
+ page._timestamp = pagedict['touched']
+ if 'protection' in pagedict:
+ page._protection = {}
+ for item in pagedict['protection']:
+ page._protection[item['type']] = item['level'], item['expiry']
+
if __name__ == "__main__":
from pywikibot import Site
logging.getLogger().setLevel(logging.DEBUG)
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/page.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -678,8 +678,8 @@
limit = None
else:
limit = revCount
- return self.site().getrevisions(self, withText=False,
- older=not reverseOrder, limit=limit)
+ return self.site().getrevisions(self, getText=False,
+ rvdir=not reverseOrder, limit=limit)
def getVersionHistoryTable(self, forceReload=False, reverseOrder=False,
getAll=False, revCount=500):
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-04-23 22:00:06 UTC (rev 5267)
+++ branches/rewrite/pywikibot/site.py 2008-04-24 13:50:10 UTC (rev 5268)
@@ -491,8 +491,12 @@
"info", inprop="protection|talkid|subjectid",
titles=page.title(withSection=False
).encode(self.encoding()))
- for item in query():
- pass #FIXME
+ for pageitem in query:
+ if pageitem['title'] != page.title(withSection=False):
+ raise RuntimeError(
+ "page_exists: Query on %s returned data on '%s'"
+ % (page, pageitem['title']))
+ page._pageid = pageitem['pageid']
return page._pageid > 0
# following group of methods map more-or-less directly to API queries
@@ -621,30 +625,80 @@
return cmgen
def getrevisions(self, page=None, getText=False, revids=None,
- older=True, limit=None, sysop=False, user=None,
- excludeuser=None):
+ limit=None, startid=None, endid=None, starttime=None,
+ endtime=None, rvdir=None, user=None, excludeuser=None,
+ section=None, sysop=False):
"""Retrieve and store revision information.
- @param page: retrieve the history of this Page (required unless ids
+ By default, retrieves the last (current) revision of the page,
+ I{unless} any of the optional parameters revids, startid, endid,
+ starttime, endtime, rvdir, user, excludeuser, or limit are
+ specified. Unless noted below, all parameters not specified
+ default to False.
+
+ If rvdir is False or not specified, startid must be greater than
+ endid if both are specified; likewise, starttime must be greater
+ than endtime. If rvdir is True, these relationships are reversed.
+
+ @param page: retrieve revisions of this Page (required unless ids
is specified)
- @param getText: if True, retrieve the wiki-text of each revision as
- well
+ @param getText: if True, retrieve the wiki-text of each revision;
+ otherwise, only retrieve the revision metadata (default)
+ @param section: if specified, retrieve only this section of the text
+ (getText must be True); section must be given by number (top of
+ the article is section 0), not name
+ @type section: int
@param revids: retrieve only the specified revision ids (required
unless page is specified)
- @param older: if True, retrieve newest revisions first; otherwise,
- retrieve oldest revisions first
- @param limit: if specified, retrieve no more than this number of
- revisions (defaults to latest revision only)
+ @type revids: list of ints
+ @param limit: Retrieve no more than this number of revisions
@type limit: int
+ @param startid: retrieve revisions starting with this revid
+ @param endid: stop upon retrieving this revid
+ @param starttime: retrieve revisions starting at this timestamp
+ @param endtime: stop upon reaching this timestamp
+ @param rvdir: if false, retrieve newest revisions first (default);
+ if true, retrieve earliest first
@param user: retrieve only revisions authored by this user
@param excludeuser: retrieve all revisions not authored by this user
@param sysop: if True, switch to sysop account (if available) to
retrieve this page
"""
+ latest = (revids is None and
+ startid is None and
+ endid is None and
+ starttime is None and
+ endtime is None and
+ rvdir is None and
+ user is None and
+ excludeuser is None and
+ limit is None) # if True, we are retrieving current revision
+
+ # check for invalid argument combinations
if page is None and revids is None:
raise ValueError(
- "getrevisions needs either page or revids argument.")
+ "getrevisions: either page or revids argument required")
+ if (startid is not None or endid is not None) and \
+ (starttime is not None or endtime is not None):
+ raise ValueError(
+ "getrevisions: startid/endid combined with starttime/endtime")
+ if starttime is not None and endtime is not None:
+ if rvdir and starttime >= endtime:
+ raise ValueError(
+ "getrevisions: starttime > endtime with rvdir=True")
+ if (not rvdir) and endtime >= starttime:
+ raise ValueError(
+ "getrevisions: endtime > starttime with rvdir=False")
+ if startid is not None and endid is not None:
+ if rvdir and startid >= endid:
+ raise ValueError(
+ "getrevisions: startid > endid with rvdir=True")
+ if (not rvdir) and endid >= startid:
+ raise ValueError(
+ "getrevisions: endid > startid with rvdir=False")
+
+ # assemble API request
if revids is None:
rvtitle = page.title(withSection=False).encode(self.encoding())
rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle)
@@ -654,28 +708,50 @@
if getText:
rvgen.request[u"rvprop"] = \
u"ids|flags|timestamp|user|comment|content"
- if page.section():
- rvgen.request[u"rvsection"] = unicode(page.section())
+ if section is not None:
+ rvgen.request[u"rvsection"] = unicode(section)
if limit:
rvgen.request[u"rvlimit"] = unicode(limit)
- if not older:
+ if rvdir:
rvgen.request[u"rvdir"] = u"newer"
+ elif rvdir is not None:
+ rvgen.request[u"rvdir"] = u"older"
+ if startid:
+ rvgen.request[u"rvstartid"] = startid
+ if endid:
+ rvgen.request[u"rvendid"] = endid
+ if starttime:
+ rvgen.request[u"rvstart"] = starttime
+ if endtime:
+ rvgen.request[u"rvend"] = endtime
if user:
rvgen.request[u"rvuser"] = user
elif excludeuser:
rvgen.request[u"rvexcludeuser"] = excludeuser
- # TODO if sysop:
- for rev in rvgen:
- revision = pywikibot.page.Revision(revid=rev['revid'],
- timestamp=rev['timestamp'],
- user=rev['user'],
- anon=rev.has_key('anon'),
- comment=rev.get('comment', u''),
- minor=rev.has_key('minor'),
- text=rev.get('*', None))
- page._revisions[revision.revid] = revision
- if revids is None and limit is None and user is None and excludeuser is None:
- page._revid = revision.revid
+ # TODO if sysop: something
+ for pagedata in rvgen:
+ if page is not None:
+ if pagedata['title'] != page.title(withSection=False):
+ raise RuntimeError(
+ "getrevisions: Query on %s returned data on '%s'"
+ % (page, pagedata['title']))
+ else:
+ page = Page(self, pagedata['title'])
+ api.update_page(page, pagedata)
+
+ for rev in pagedata['revisions']:
+ revision = pywikibot.page.Revision(
+ revid=rev['revid'],
+ timestamp=rev['timestamp'],
+ user=rev['user'],
+ anon=rev.has_key('anon'),
+ comment=rev.get('comment', u''),
+ minor=rev.has_key('minor'),
+ text=rev.get('*', None)
+ )
+ page._revisions[revision.revid] = revision
+ if latest:
+ page._revid = revision.revid
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####