Revision: 5546 Author: cosoleto Date: 2008-06-10 09:26:21 +0000 (Tue, 10 Jun 2008)
Log Message: ----------- Error in my previous commit. Bad offset in file when 'no title' error occur.
Modified Paths: -------------- trunk/pywikipedia/pagefromfile.py
Modified: trunk/pywikipedia/pagefromfile.py =================================================================== --- trunk/pywikipedia/pagefromfile.py 2008-06-09 13:29:15 UTC (rev 5545) +++ trunk/pywikipedia/pagefromfile.py 2008-06-10 09:26:21 UTC (rev 5546) @@ -48,6 +48,8 @@
class NoTitle(Exception): """No title found""" + def __init__(self, offset): + self.offset = offset
class PageFromFileRobot: """ @@ -221,7 +223,6 @@ position = 0 length = 0 while True: - position += length try: length, title, contents = self.findpage(text[position:]) except AttributeError: @@ -230,10 +231,12 @@ else: wikipedia.output(u'End of file.') break - except NoTitle: + except NoTitle, err: wikipedia.output(u'\nNo title found - skipping a page.') + position += err.offset continue
+ position += length yield title, contents
def findpage(self, text): @@ -251,7 +254,7 @@ #Remove title (to allow creation of redirects) contents = titleR.sub('', contents, count = 1) except AttributeError: - raise NoTitle + raise NoTitle(location.end()) else: return location.end(), title, contents