[Pywikipedia-l] SVN: [5301] trunk/pywikipedia/reflinks.py
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Sat May 3 13:05:23 UTC 2008
Revision: 5301
Author: nicdumz
Date: 2008-05-03 13:05:23 +0000 (Sat, 03 May 2008)
Log Message:
-----------
Reminder :
Do not commit to fix something you were reported broken when working on some other stuff.
Modified Paths:
--------------
trunk/pywikipedia/reflinks.py
Modified: trunk/pywikipedia/reflinks.py
===================================================================
--- trunk/pywikipedia/reflinks.py 2008-05-03 12:48:24 UTC (rev 5300)
+++ trunk/pywikipedia/reflinks.py 2008-05-03 13:05:23 UTC (rev 5301)
@@ -90,7 +90,8 @@
ur'^\[\]\s<>"]+\([^\[\]\s<>"]+[^\[\]\s\.:;\\,<>\?"]+|'+
# unbracketed without ()
ur'[^\[\]\s<>"]+[^\[\]\s\)\.:;\\,<>\?"]+|[^\[\]\s<>"]+))[!?,\s]*\]?\s*</ref>')
-listof404pages = 'http://www.twoevils.org/files/wikipedia/404-links.txt'
+#http://www.twoevils.org/files/wikipedia/404-links.txt.gz
+listof404pages = '404-links.txt'
class XmlDumpPageGenerator:
def __init__(self, xmlFilename, xmlStart, namespaces):
@@ -286,7 +287,11 @@
Runs the Bot
"""
wikipedia.setAction(wikipedia.translate(self.site, msg))
- deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read()
+ try:
+ deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read()
+ except IOError:
+ wikipedia.output('You need to download http://www.twoevils.org/files/wikipedia/404-links.txt.gz and to ungzip it in the same directory')
+ raise
socket.setdefaulttimeout(30)
editedpages = 0
for page in self.generator:
@@ -322,7 +327,7 @@
headers = f.info()
contentType = headers.getheader('Content-Type')
if contentType and not self.MIME.search(contentType):
- if ref.link.lower().endswith('.pdf') and not ignorepdf:
+ if ref.link.lower().endswith('.pdf') and not self.ignorepdf:
# If file has a PDF suffix
self.getPDFTitle(ref, f)
else:
More information about the Pywikipedia-l
mailing list