[Pywikipedia-l] SVN: [5301] trunk/pywikipedia/reflinks.py

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Sat May 3 13:05:23 UTC 2008


Revision: 5301
Author:   nicdumz
Date:     2008-05-03 13:05:23 +0000 (Sat, 03 May 2008)

Log Message:
-----------
Reminder :

Do not commit to fix something you were reported broken when working on some other stuff.

Modified Paths:
--------------
    trunk/pywikipedia/reflinks.py

Modified: trunk/pywikipedia/reflinks.py
===================================================================
--- trunk/pywikipedia/reflinks.py	2008-05-03 12:48:24 UTC (rev 5300)
+++ trunk/pywikipedia/reflinks.py	2008-05-03 13:05:23 UTC (rev 5301)
@@ -90,7 +90,8 @@
 	ur'^\[\]\s<>"]+\([^\[\]\s<>"]+[^\[\]\s\.:;\\,<>\?"]+|'+
 	# unbracketed without ()
 	ur'[^\[\]\s<>"]+[^\[\]\s\)\.:;\\,<>\?"]+|[^\[\]\s<>"]+))[!?,\s]*\]?\s*</ref>')
-listof404pages = 'http://www.twoevils.org/files/wikipedia/404-links.txt'
+#http://www.twoevils.org/files/wikipedia/404-links.txt.gz
+listof404pages = '404-links.txt'
 
 class XmlDumpPageGenerator:
     def __init__(self, xmlFilename, xmlStart, namespaces):
@@ -286,7 +287,11 @@
         Runs the Bot
         """
         wikipedia.setAction(wikipedia.translate(self.site, msg))
-        deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read() 
+        try:
+            deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read() 
+        except IOError:
+            wikipedia.output('You need to download http://www.twoevils.org/files/wikipedia/404-links.txt.gz and to ungzip it in the same directory')
+            raise
         socket.setdefaulttimeout(30)
         editedpages = 0
         for page in self.generator:
@@ -322,7 +327,7 @@
                     headers = f.info()
                     contentType = headers.getheader('Content-Type')
                     if contentType and not self.MIME.search(contentType):
-                        if ref.link.lower().endswith('.pdf') and not ignorepdf:
+                        if ref.link.lower().endswith('.pdf') and not self.ignorepdf:
                             # If file has a PDF suffix
                             self.getPDFTitle(ref, f)
                         else:





More information about the Pywikipedia-l mailing list