Revision: 5545
Author: cosoleto
Date: 2008-06-09 13:29:15 +0000 (Mon, 09 Jun 2008)
Log Message:
-----------
bugfix: when 'no title found, skipping a page' messagge was printed script execution was break. Added end of file detection, instead of printing a not clear 'Start or end marker not found.' message. More user-friedly messages.
Modified Paths:
--------------
trunk/pywikipedia/pagefromfile.py
Modified: trunk/pywikipedia/pagefromfile.py
===================================================================
--- trunk/pywikipedia/pagefromfile.py 2008-06-09 10:49:57 UTC (rev 5544)
+++ trunk/pywikipedia/pagefromfile.py 2008-06-09 13:29:15 UTC (rev 5545)
@@ -46,6 +46,9 @@
import re, codecs
import wikipedia, config
+class NoTitle(Exception):
+ """No title found"""
+
class PageFromFileRobot:
"""
Responsible for writing pages to the wiki, with the titles and contents
@@ -152,8 +155,8 @@
comment_bottom = comment + " - " + wikipedia.translate(mysite, self.msg_bottom)
comment_force = comment + " *** " + wikipedia.translate(mysite, self.msg_force) + " ***"
- #Remove trailing newlines (cause troubles when creating redirects)
- contents = re.sub('^[\r\n]*','',contents)
+ # Remove trailing newlines (cause troubles when creating redirects)
+ contents = re.sub('^[\r\n]*','', contents)
if page.exists():
if self.append == "Top":
@@ -207,38 +210,48 @@
self.notitle = notitle
def run(self):
- f = codecs.open(self.filename, 'r', encoding = config.textfile_encoding)
+ wikipedia.output('Reading \'%s\'...' % self.filename)
+ try:
+ f = codecs.open(self.filename, 'r', encoding = config.textfile_encoding)
+ except IOError, err:
+ print err
+ return
+
text = f.read()
position = 0
+ length = 0
while True:
- length, title, contents = self.findpage(text[position:])
- if length == 0:
+ position += length
+ try:
+ length, title, contents = self.findpage(text[position:])
+ except AttributeError:
+ if not length:
+ wikipedia.output(u'\nStart or end marker not found.')
+ else:
+ wikipedia.output(u'End of file.')
break
- else:
- position += length
- yield title, contents
+ except NoTitle:
+ wikipedia.output(u'\nNo title found - skipping a page.')
+ continue
+ yield title, contents
+
def findpage(self, text):
pageR = re.compile(self.pageStartMarker + "(.*?)" + self.pageEndMarker, re.DOTALL)
titleR = re.compile(self.titleStartMarker + "(.*?)" + self.titleEndMarker)
+ location = pageR.search(text)
+ if self.include:
+ contents = location.group()
+ else:
+ contents = location.group(1)
try:
- location = pageR.search(text)
- if self.include:
- contents = location.group()
- else:
- contents = location.group(1)
- except AttributeError:
- wikipedia.output(u'\nStart or end marker not found.')
- return 0, None, None
- try:
title = titleR.search(contents).group(1)
if self.notitle:
#Remove title (to allow creation of redirects)
contents = titleR.sub('', contents, count = 1)
except AttributeError:
- wikipedia.output(u'\nNo title found - skipping a page.')
- return 0, None, None
+ raise NoTitle
else:
return location.end(), title, contents