[Pywikipedia-l] SVN: [4683] trunk/pywikipedia

cosoleto at svn.wikimedia.org cosoleto at svn.wikimedia.org
Mon Dec 10 03:07:43 UTC 2007


Revision: 4683
Author:   cosoleto
Date:     2007-12-10 03:07:37 +0000 (Mon, 10 Dec 2007)

Log Message:
-----------
code cleanup

Modified Paths:
--------------
    trunk/pywikipedia/add_text.py
    trunk/pywikipedia/checkimages.py
    trunk/pywikipedia/pagegenerators.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py	2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/add_text.py	2007-12-10 03:07:37 UTC (rev 4683)
@@ -154,12 +154,6 @@
             generator = untaggedGenerator(untaggedProject)
         elif arg == '-up':
             up = True
-        elif arg.startswith('-newimages'):
-            if len(arg) == 10:
-                limit = wikipedia.input(u'How many images do you want to check?')
-            else:
-                limit = arg[11:]
-            generator = pagegenerators.newImages(limit, wikipedia.getSite())
         elif arg == '-always':
             always = True
         else:

Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py	2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/checkimages.py	2007-12-10 03:07:37 UTC (rev 4683)
@@ -704,7 +704,7 @@
 				generator =  mainClass.untaggedGenerator(projectUntagged, rep_page, com)
 				normal = False
 			if normal == True:
-				generator = pagegenerators.newImages(limit, site)
+				generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site)
 			if urlUsed == True and regexGen == True:
 				textRegex = pagetext(regexPageUrl)
 			elif regexGen == True:

Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py	2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/pagegenerators.py	2007-12-10 03:07:37 UTC (rev 4683)
@@ -195,10 +195,10 @@
     for page in site.uncategorizedimages(number=number, repeat=repeat):
         yield page
 
-def newImages(limit = 50, site = None, repeat = False):
+def NewimagesPageGenerator(number = 100, repeat = False, site = None):
     if site is None:
         site = wikipedia.getSite()
-    for page in site.newImages(limit, repeat=repeat):
+    for page in site.newimages(number, repeat=repeat):
         yield page			
 
 def UnCategorizedPageGenerator(number = 100, repeat = False, site = None):
@@ -772,17 +772,14 @@
                 if namespace:
                     prefix = prefix[colon+1:]
             gen = PrefixingPageGenerator(prefix = prefix, namespace = namespace)
-        elif arg.startswith('-newimages'):
-            if len(arg) == 10:
-                limit = wikipedia.input(u'How many images do you want to check?')
-            else:
-                limit = arg[11:]
-            gen = newImages(limit, wikipedia.getSite())
         elif arg.startswith('-new'):
             if len(arg) >=5:
               gen = NewpagesPageGenerator(number = int(arg[5:]))
             else:
               gen = NewpagesPageGenerator(number = 60)
+        elif arg.startswith('-newimages'):
+            limit = arg[11:] or wikipedia.input(u'How many images do you want to check?')
+            gen = NewimagesPageGenerator(number = limit)
         elif arg.startswith('-search'):
             mediawikiQuery = arg[8:]
             if not mediawikiQuery:

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/wikipedia.py	2007-12-10 03:07:37 UTC (rev 4683)
@@ -1123,7 +1123,7 @@
         """
         # Fetch a page to get an edit token. If we already have
         # fetched a page, this will do nothing, because get() is cached.
-        # Disabled in r4027
+        # Disabled in r4028
         #try:
         #    self.site().sandboxpage.get(force = True, get_redirect = True)
         #except NoPage:
@@ -3468,7 +3468,7 @@
         search(query): query results from Special:Search
         allpages(): Special:Allpages
         newpages(): Special:Newpages
-        newImages(): Special:Log&type=upload
+        newimages(): Special:Log&type=upload
         longpages(): Special:Longpages
         shortpages(): Special:Shortpages
         categories(): Special:Categories (yields Category objects)
@@ -4214,19 +4214,18 @@
             if not repeat:
                 break
 
-    def newImages(self, limit = 50, repeat = False):
+    def newimages(self, number = 10, repeat = False):
         """Yield ImagePages from Special:Log&type=upload"""
         # Url of the new images
-        url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % int(limit)
+        url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % number
         # Get the HTML text
         html = self.getUrl(url)
         image_namespace = self.image_namespace()
         regexp = re.compile(
             r'(?P<new>class=\"new\" |)title=\"%s:(?P<image>.*?)\.(?P<ext>\w\w\w|jpeg)\">.*?</a>\".*?(?:<span class=\"comment\">.*?|)</li>' % image_namespace,
             re.UNICODE)
-        pos = 0
-        seen = list()
-        ext_list = list()
+        seen = set()
+
         while True:
             for m in regexp.finditer(html):
                 new = m.group('new')
@@ -4234,16 +4233,14 @@
                 ext = m.group('ext')
                 # This prevent pages with strange characters. They will be loaded without problem.
                 image =  "%s.%s" % (im, ext)
-                if new != '':
-                    output(u"Skipping %s because it has been deleted." % image)
-                    if image not in seen:
-                        seen.append(image)
                 if image not in seen:
-                    seen.append(image)
-                    page = Page(self, 'Image:%s' % image)
+                    seen.add(image)
+                    if new != '':
+                        output(u"Image \'%s\' has been deleted." % image)
+                        continue
+                    page = ImagePage(self, image)
                     yield page
-            if not repeat:            
-                output(u"\t\t>> All images checked. <<")
+            if not repeat:
                 break
 
     def uncategorizedimages(self, number = 10, repeat = False):





More information about the Pywikipedia-l mailing list