[Pywikipedia-l] SVN: [5573] trunk/pywikipedia/nowcommons.py - pywikibot

15 Jun 2008

Revision: 5573
Author:   filnik
Date:     2008-06-15 15:23:41 +0000 (Sun, 15 Jun 2008)
Log Message:
-----------
Fixing the catching of the new images, now all will be found without errors (or at least I hope so)
Modified Paths:
--------------
    trunk/pywikipedia/nowcommons.py
Modified: trunk/pywikipedia/nowcommons.py
===================================================================

--- trunk/pywikipedia/nowcommons.py	2008-06-15 15:23:24 UTC (rev 5572)
+++ trunk/pywikipedia/nowcommons.py	2008-06-15 15:23:41 UTC (rev 5573)
@@ -152,7 +152,7 @@
 # if your project has images like that, put the word often used here to skip them
 word_to_skip = {
     'en': [],
-    'it': ['stemma', 'stub'],
+    'it': ['stemma', 'stub', 'hill40 '],
     }
#nowCommonsMessage = imagetransfer.nowCommonsMessage
@@ -176,6 +176,7 @@
         lang = self.site.lang
         num_page = 0
         word_to_skip_translated = wikipedia.translate(self.site, word_to_skip)
+        images_processed = list()
         while 1:
             url = 'http://toolserver.org/~multichill/nowcommons.php?language=%s&page=%s&...' % (lang, num_page)
             HTML_text = self.site.getUrl(url, no_hostname = True)
@@ -183,10 +184,15 @@
             reg += r'<[Aa] href="(?P<urlcommons>http://commons.wikimedia.org/.*?)%22%3EImage:(?P<imagecommons>.*?)</[Aa]> +?</td><td>'
             regex = re.compile(reg, re.UNICODE)
             found_something = False
+            change_page = True
             for x in regex.finditer(HTML_text):
-                found_something = True
+                found_something = True                
                 image_local = x.group('imagelocal')
                 image_commons = x.group('imagecommons')
+                if image_local in images_processed:
+                    continue
+                change_page = False
+                images_processed.append(image_local)
                 # Skip images that have something in the title (useful for it.wiki)
                 image_to_skip = False
                 for word in word_to_skip_translated:
@@ -208,7 +214,9 @@
                     yield [image_local, image_commons]
                 else:
                     continue
-            num_page += 1
+            # The page is dinamically updated, so we may don't need to change it
+            if change_page:
+                num_page += 1
             # If no image found means that there aren't anymore, break.
             if not found_something:
                 break