[Pywikipedia-l] SVN: [4683] trunk/pywikipedia
cosoleto at svn.wikimedia.org
cosoleto at svn.wikimedia.org
Mon Dec 10 03:07:43 UTC 2007
Revision: 4683
Author: cosoleto
Date: 2007-12-10 03:07:37 +0000 (Mon, 10 Dec 2007)
Log Message:
-----------
code cleanup
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
trunk/pywikipedia/checkimages.py
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/add_text.py 2007-12-10 03:07:37 UTC (rev 4683)
@@ -154,12 +154,6 @@
generator = untaggedGenerator(untaggedProject)
elif arg == '-up':
up = True
- elif arg.startswith('-newimages'):
- if len(arg) == 10:
- limit = wikipedia.input(u'How many images do you want to check?')
- else:
- limit = arg[11:]
- generator = pagegenerators.newImages(limit, wikipedia.getSite())
elif arg == '-always':
always = True
else:
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/checkimages.py 2007-12-10 03:07:37 UTC (rev 4683)
@@ -704,7 +704,7 @@
generator = mainClass.untaggedGenerator(projectUntagged, rep_page, com)
normal = False
if normal == True:
- generator = pagegenerators.newImages(limit, site)
+ generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site)
if urlUsed == True and regexGen == True:
textRegex = pagetext(regexPageUrl)
elif regexGen == True:
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/pagegenerators.py 2007-12-10 03:07:37 UTC (rev 4683)
@@ -195,10 +195,10 @@
for page in site.uncategorizedimages(number=number, repeat=repeat):
yield page
-def newImages(limit = 50, site = None, repeat = False):
+def NewimagesPageGenerator(number = 100, repeat = False, site = None):
if site is None:
site = wikipedia.getSite()
- for page in site.newImages(limit, repeat=repeat):
+ for page in site.newimages(number, repeat=repeat):
yield page
def UnCategorizedPageGenerator(number = 100, repeat = False, site = None):
@@ -772,17 +772,14 @@
if namespace:
prefix = prefix[colon+1:]
gen = PrefixingPageGenerator(prefix = prefix, namespace = namespace)
- elif arg.startswith('-newimages'):
- if len(arg) == 10:
- limit = wikipedia.input(u'How many images do you want to check?')
- else:
- limit = arg[11:]
- gen = newImages(limit, wikipedia.getSite())
elif arg.startswith('-new'):
if len(arg) >=5:
gen = NewpagesPageGenerator(number = int(arg[5:]))
else:
gen = NewpagesPageGenerator(number = 60)
+ elif arg.startswith('-newimages'):
+ limit = arg[11:] or wikipedia.input(u'How many images do you want to check?')
+ gen = NewimagesPageGenerator(number = limit)
elif arg.startswith('-search'):
mediawikiQuery = arg[8:]
if not mediawikiQuery:
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-12-09 20:43:47 UTC (rev 4682)
+++ trunk/pywikipedia/wikipedia.py 2007-12-10 03:07:37 UTC (rev 4683)
@@ -1123,7 +1123,7 @@
"""
# Fetch a page to get an edit token. If we already have
# fetched a page, this will do nothing, because get() is cached.
- # Disabled in r4027
+ # Disabled in r4028
#try:
# self.site().sandboxpage.get(force = True, get_redirect = True)
#except NoPage:
@@ -3468,7 +3468,7 @@
search(query): query results from Special:Search
allpages(): Special:Allpages
newpages(): Special:Newpages
- newImages(): Special:Log&type=upload
+ newimages(): Special:Log&type=upload
longpages(): Special:Longpages
shortpages(): Special:Shortpages
categories(): Special:Categories (yields Category objects)
@@ -4214,19 +4214,18 @@
if not repeat:
break
- def newImages(self, limit = 50, repeat = False):
+ def newimages(self, number = 10, repeat = False):
"""Yield ImagePages from Special:Log&type=upload"""
# Url of the new images
- url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % int(limit)
+ url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % number
# Get the HTML text
html = self.getUrl(url)
image_namespace = self.image_namespace()
regexp = re.compile(
r'(?P<new>class=\"new\" |)title=\"%s:(?P<image>.*?)\.(?P<ext>\w\w\w|jpeg)\">.*?</a>\".*?(?:<span class=\"comment\">.*?|)</li>' % image_namespace,
re.UNICODE)
- pos = 0
- seen = list()
- ext_list = list()
+ seen = set()
+
while True:
for m in regexp.finditer(html):
new = m.group('new')
@@ -4234,16 +4233,14 @@
ext = m.group('ext')
# This prevent pages with strange characters. They will be loaded without problem.
image = "%s.%s" % (im, ext)
- if new != '':
- output(u"Skipping %s because it has been deleted." % image)
- if image not in seen:
- seen.append(image)
if image not in seen:
- seen.append(image)
- page = Page(self, 'Image:%s' % image)
+ seen.add(image)
+ if new != '':
+ output(u"Image \'%s\' has been deleted." % image)
+ continue
+ page = ImagePage(self, image)
yield page
- if not repeat:
- output(u"\t\t>> All images checked. <<")
+ if not repeat:
break
def uncategorizedimages(self, number = 10, repeat = False):
More information about the Pywikipedia-l
mailing list