Revision: 8656
Author: xqt
Date: 2010-10-15 13:18:41 +0000 (Fri, 15 Oct 2010)
Log Message:
-----------
Bugfix for missing ]] in links to sections (bug #3087909 coming with r8539)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2010-10-15 05:13:09 UTC (rev 8655)
+++ trunk/pywikipedia/wikipedia.py 2010-10-15 13:18:41 UTC (rev 8656)
@@ -457,6 +457,15 @@
If decode is True, decodes the section title
"""
title = self._title
+ if decode or asLink:
+ begin = title.find('#')
+ if begin != -1:
+ anchor = self.section(underscore=underscore, decode=True)
+ try:
+ title = title[:begin + 1] + anchor
+ except TypeError:
+ print title, begin, anchor
+ raise
if asLink:
if allowInterwiki and (forceInterwiki or self._site != getSite()):
colon = ""
@@ -472,15 +481,6 @@
title = u'[[:%s]]' % title
else:
title = u'[[%s]]' % title
- if decode or asLink:
- begin = title.find('#')
- if begin != -1:
- anchor = self.section(underscore = underscore, decode = True)
- try:
- title = title[:begin + 1] + anchor
- except TypeError:
- print title, begin, anchor
- raise
if savetitle or asLink:
# Ensure there's no wiki syntax in the title
title = title.replace(u"''", u'%27%27')
Revision: 8653
Author: xqt
Date: 2010-10-14 21:04:45 +0000 (Thu, 14 Oct 2010)
Log Message:
-----------
eol-syle, keywords
Modified Paths:
--------------
trunk/pywikipedia/botlist.py
Property Changed:
----------------
trunk/pywikipedia/botlist.py
Modified: trunk/pywikipedia/botlist.py
===================================================================
--- trunk/pywikipedia/botlist.py 2010-10-14 20:44:04 UTC (rev 8652)
+++ trunk/pywikipedia/botlist.py 2010-10-14 21:04:45 UTC (rev 8653)
@@ -1,150 +1,150 @@
-# -*- coding: utf-8 -*-
-"""
-Allows access to the site's bot user list.
-
-The function refresh() downloads the current bot user list and saves
-it to disk. It is run automatically when a bot first tries to get this
-data.
-"""
-
-# (C) Daniel Herding, 2005
-# (C) Dr. Trigon, 2009-2010
-#
-# DrTrigonBot: http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot
-#
-# Distributed under the terms of the MIT license.
-#
-__version__='$Id: dtbext_botlist.py 0.3.0040 2010-10-02 20:13 drtrigon $'
-#
-
-import re, sys, pickle
-import os.path
-import time
-import wikipedia as pywikibot
-
-cache = {}
-
-def get(site = None):
- if site is None:
- site = pywikibot.getSite()
- if site in cache:
- # Use cached copy if it exists.
- botlist = cache[site]
- else:
- fn = pywikibot.config.datafilepath('botlists',
- 'botlist-%s-%s.dat' % (site.family.name, site.lang))
- try:
- # find out how old our saved dump is (in seconds)
- file_age = time.time() - os.path.getmtime(fn)
- # if it's older than 1 day, reload it
- if file_age > 1 * 24 * 60 * 60:
- pywikibot.output(u'Copy of bot user list is one day old, reloading')
- refresh(site)
- except OSError:
- # no saved botlist exists yet, retrieve one
- refresh(site)
- f = open(fn, 'r')
- botlist = pickle.load(f)
- f.close()
- # create cached copy
- cache[site] = botlist
- return botlist
-
-def isBot(user, site=None):
- botlist = get(site)
- return user in botlist
-
-def refresh(site, sysop=False, witheditsonly=True):
- #if not site.has_api() or site.versionnumber() < 10:
- # _refreshOld(site)
-
- # get botlist special page's URL
- if not site.loggedInAs(sysop=sysop):
- site.forceLogin(sysop=sysop)
-
- params = {
- 'action': 'query',
- 'list': 'allusers',
- 'augroup': 'bot',
- }
- if witheditsonly:
- params['auwitheditsonly'] = ''
-
- pywikibot.output(u'Retrieving bot user list for %s via API.' % repr(site))
- pywikibot.put_throttle() # It actually is a get, but a heavy one.
- botlist = []
- while True:
- data = pywikibot.query.GetData(params, site, sysop=sysop)
- if 'error' in data:
- raise RuntimeError('ERROR: %s' % data)
- botlist.extend([w['name'] for w in data['query']['allusers']])
-
- if 'query-continue' in data:
- params['aufrom'] = data['query-continue']['allusers']['aufrom']
- else:
- break
-
- # Save the botlist to disk
- # The file is stored in the botlists subdir. Create if necessary.
- if sysop:
- f = open(pywikibot.config.datafilepath('botlists',
- 'botlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 'w')
- else:
- f = open(pywikibot.config.datafilepath('botlists',
- 'botlist-%s-%s.dat' % (site.family.name, site.lang)), 'w')
- pickle.dump(botlist, f)
- f.close()
-
-#def refresh_all(new = False, sysop=False):
-# if new:
-# import config
-# pywikibot.output('Downloading All bot user lists for your accounts in user-config.py');
-# for family in config.usernames:
-# for lang in config.usernames[ family ]:
-# refresh(pywikibot.getSite( code = lang, fam = family ), sysop=sysop )
-# for family in config.sysopnames:
-# for lang in config.sysopnames[ family ]:
-# refresh(pywikibot.getSite( code = lang, fam = family ), sysop=sysop )
-#
-# else:
-# import dircache, time
-# filenames = dircache.listdir(pywikibot.config.datafilepath('botlists'))
-# botlist_filenameR = re.compile('botlist-([a-z\-:]+).dat')
-# for filename in filenames:
-# match = botlist_filenameR.match(filename)
-# if match:
-# arr = match.group(1).split('-')
-# family = arr[0]
-# lang = '-'.join(arr[1:])
-# refresh(pywikibot.getSite(code = lang, fam = family))
-#
-#def main():
-# all = False
-# new = False
-# sysop = False
-# for arg in pywikibot.handleArgs():
-# if arg == '-all' or arg == '-update':
-# all = True
-# elif arg == '-new':
-# new = True
-# elif arg == '-sysop':
-# sysop = True
-# if all:
-# refresh_all(sysop=sysop)
-# elif new:
-# refresh_all(new, sysop=sysop)
-# else:
-# refresh(pywikibot.getSite(), sysop=sysop)
-#
-# botlist = get(pywikibot.getSite())
-# pywikibot.output(u'%i pages in the bot user list.' % len(botlist))
-# for pageName in botlist:
-# pywikibot.output( pageName, toStdout = True )
-#
-#if __name__ == "__main__":
-# try:
-# main()
-# finally:
-# pywikibot.stopme()
-
-
+# -*- coding: utf-8 -*-
+"""
+Allows access to the site's bot user list.
+
+The function refresh() downloads the current bot user list and saves
+it to disk. It is run automatically when a bot first tries to get this
+data.
+"""
+
+# (C) Daniel Herding, 2005
+# (C) Dr. Trigon, 2009-2010
+#
+# DrTrigonBot: http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id$'
+#
+
+import re, sys, pickle
+import os.path
+import time
+import wikipedia as pywikibot
+
+cache = {}
+
+def get(site = None):
+ if site is None:
+ site = pywikibot.getSite()
+ if site in cache:
+ # Use cached copy if it exists.
+ botlist = cache[site]
+ else:
+ fn = pywikibot.config.datafilepath('botlists',
+ 'botlist-%s-%s.dat' % (site.family.name, site.lang))
+ try:
+ # find out how old our saved dump is (in seconds)
+ file_age = time.time() - os.path.getmtime(fn)
+ # if it's older than 1 day, reload it
+ if file_age > 1 * 24 * 60 * 60:
+ pywikibot.output(u'Copy of bot user list is one day old, reloading')
+ refresh(site)
+ except OSError:
+ # no saved botlist exists yet, retrieve one
+ refresh(site)
+ f = open(fn, 'r')
+ botlist = pickle.load(f)
+ f.close()
+ # create cached copy
+ cache[site] = botlist
+ return botlist
+
+def isBot(user, site=None):
+ botlist = get(site)
+ return user in botlist
+
+def refresh(site, sysop=False, witheditsonly=True):
+ #if not site.has_api() or site.versionnumber() < 10:
+ # _refreshOld(site)
+
+ # get botlist special page's URL
+ if not site.loggedInAs(sysop=sysop):
+ site.forceLogin(sysop=sysop)
+
+ params = {
+ 'action': 'query',
+ 'list': 'allusers',
+ 'augroup': 'bot',
+ }
+ if witheditsonly:
+ params['auwitheditsonly'] = ''
+
+ pywikibot.output(u'Retrieving bot user list for %s via API.' % repr(site))
+ pywikibot.put_throttle() # It actually is a get, but a heavy one.
+ botlist = []
+ while True:
+ data = pywikibot.query.GetData(params, site, sysop=sysop)
+ if 'error' in data:
+ raise RuntimeError('ERROR: %s' % data)
+ botlist.extend([w['name'] for w in data['query']['allusers']])
+
+ if 'query-continue' in data:
+ params['aufrom'] = data['query-continue']['allusers']['aufrom']
+ else:
+ break
+
+ # Save the botlist to disk
+ # The file is stored in the botlists subdir. Create if necessary.
+ if sysop:
+ f = open(pywikibot.config.datafilepath('botlists',
+ 'botlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 'w')
+ else:
+ f = open(pywikibot.config.datafilepath('botlists',
+ 'botlist-%s-%s.dat' % (site.family.name, site.lang)), 'w')
+ pickle.dump(botlist, f)
+ f.close()
+
+#def refresh_all(new = False, sysop=False):
+# if new:
+# import config
+# pywikibot.output('Downloading All bot user lists for your accounts in user-config.py');
+# for family in config.usernames:
+# for lang in config.usernames[ family ]:
+# refresh(pywikibot.getSite( code = lang, fam = family ), sysop=sysop )
+# for family in config.sysopnames:
+# for lang in config.sysopnames[ family ]:
+# refresh(pywikibot.getSite( code = lang, fam = family ), sysop=sysop )
+#
+# else:
+# import dircache, time
+# filenames = dircache.listdir(pywikibot.config.datafilepath('botlists'))
+# botlist_filenameR = re.compile('botlist-([a-z\-:]+).dat')
+# for filename in filenames:
+# match = botlist_filenameR.match(filename)
+# if match:
+# arr = match.group(1).split('-')
+# family = arr[0]
+# lang = '-'.join(arr[1:])
+# refresh(pywikibot.getSite(code = lang, fam = family))
+#
+#def main():
+# all = False
+# new = False
+# sysop = False
+# for arg in pywikibot.handleArgs():
+# if arg == '-all' or arg == '-update':
+# all = True
+# elif arg == '-new':
+# new = True
+# elif arg == '-sysop':
+# sysop = True
+# if all:
+# refresh_all(sysop=sysop)
+# elif new:
+# refresh_all(new, sysop=sysop)
+# else:
+# refresh(pywikibot.getSite(), sysop=sysop)
+#
+# botlist = get(pywikibot.getSite())
+# pywikibot.output(u'%i pages in the bot user list.' % len(botlist))
+# for pageName in botlist:
+# pywikibot.output( pageName, toStdout = True )
+#
+#if __name__ == "__main__":
+# try:
+# main()
+# finally:
+# pywikibot.stopme()
+
+
Property changes on: trunk/pywikipedia/botlist.py
___________________________________________________________________
Added: svn:keywords
+ Author Date Id Revision
Added: svn:eol-style
+ native
Revision: 8652
Author: xqt
Date: 2010-10-14 20:44:04 +0000 (Thu, 14 Oct 2010)
Log Message:
-----------
possibility to compare against page.title() instead
of page.titleWithoutNamespace() in RegexFilterPageGenerator (path bug #3084727 submitted by DrTrigon. Thanks)
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2010-10-14 20:34:24 UTC (rev 8651)
+++ trunk/pywikipedia/pagegenerators.py 2010-10-14 20:44:04 UTC (rev 8652)
@@ -1160,25 +1160,36 @@
seenPages[_page] = True
yield page
-def RegexFilterPageGenerator(generator, regex, inverse=False):
+def RegexFilterPageGenerator(generator, regex, inverse=False, ignore_namespace=True):
"""
Wraps around another generator. Yields only those pages, the titles of
which are positively matched to any regex in list. If invert is False,
yields all pages matched by any regex, if True, yields all pages matched
- none of the regex.
+ none of the regex. If ignore_namespace is False, the whole page title
+ is compared.
"""
# test for backwards compatibility
if isinstance(regex, basestring):
regex = [regex]
- reg = [ re.compile(r, re.I) for r in regex ]
+ # test if regex is already compiled
+ if isinstance(regex[0], basestring):
+ reg = [ re.compile(r, re.I) for r in regex ]
+ else:
+ reg = regex
for page in generator:
+ # get the page title
+ if ignore_namespace:
+ title = page.titleWithoutNamespace()
+ else:
+ title = page.title()
+
if inverse:
# yield page if NOT matched by all regex
skip = False
for r in reg:
- if r.match(page.titleWithoutNamespace()):
+ if r.match(title):
skip = True
break
if not skip:
@@ -1186,7 +1197,7 @@
else:
# yield page if matched by any regex
for r in reg:
- if r.match(page.titleWithoutNamespace()):
+ if r.match(title):
yield page
break