Revision: 4683 Author: cosoleto Date: 2007-12-10 03:07:37 +0000 (Mon, 10 Dec 2007)
Log Message: ----------- code cleanup
Modified Paths: -------------- trunk/pywikipedia/add_text.py trunk/pywikipedia/checkimages.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/add_text.py =================================================================== --- trunk/pywikipedia/add_text.py 2007-12-09 20:43:47 UTC (rev 4682) +++ trunk/pywikipedia/add_text.py 2007-12-10 03:07:37 UTC (rev 4683) @@ -154,12 +154,6 @@ generator = untaggedGenerator(untaggedProject) elif arg == '-up': up = True - elif arg.startswith('-newimages'): - if len(arg) == 10: - limit = wikipedia.input(u'How many images do you want to check?') - else: - limit = arg[11:] - generator = pagegenerators.newImages(limit, wikipedia.getSite()) elif arg == '-always': always = True else:
Modified: trunk/pywikipedia/checkimages.py =================================================================== --- trunk/pywikipedia/checkimages.py 2007-12-09 20:43:47 UTC (rev 4682) +++ trunk/pywikipedia/checkimages.py 2007-12-10 03:07:37 UTC (rev 4683) @@ -704,7 +704,7 @@ generator = mainClass.untaggedGenerator(projectUntagged, rep_page, com) normal = False if normal == True: - generator = pagegenerators.newImages(limit, site) + generator = pagegenerators.NewimagesPageGenerator(number = limit, site = site) if urlUsed == True and regexGen == True: textRegex = pagetext(regexPageUrl) elif regexGen == True:
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-12-09 20:43:47 UTC (rev 4682) +++ trunk/pywikipedia/pagegenerators.py 2007-12-10 03:07:37 UTC (rev 4683) @@ -195,10 +195,10 @@ for page in site.uncategorizedimages(number=number, repeat=repeat): yield page
-def newImages(limit = 50, site = None, repeat = False): +def NewimagesPageGenerator(number = 100, repeat = False, site = None): if site is None: site = wikipedia.getSite() - for page in site.newImages(limit, repeat=repeat): + for page in site.newimages(number, repeat=repeat): yield page
def UnCategorizedPageGenerator(number = 100, repeat = False, site = None): @@ -772,17 +772,14 @@ if namespace: prefix = prefix[colon+1:] gen = PrefixingPageGenerator(prefix = prefix, namespace = namespace) - elif arg.startswith('-newimages'): - if len(arg) == 10: - limit = wikipedia.input(u'How many images do you want to check?') - else: - limit = arg[11:] - gen = newImages(limit, wikipedia.getSite()) elif arg.startswith('-new'): if len(arg) >=5: gen = NewpagesPageGenerator(number = int(arg[5:])) else: gen = NewpagesPageGenerator(number = 60) + elif arg.startswith('-newimages'): + limit = arg[11:] or wikipedia.input(u'How many images do you want to check?') + gen = NewimagesPageGenerator(number = limit) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery:
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-12-09 20:43:47 UTC (rev 4682) +++ trunk/pywikipedia/wikipedia.py 2007-12-10 03:07:37 UTC (rev 4683) @@ -1123,7 +1123,7 @@ """ # Fetch a page to get an edit token. If we already have # fetched a page, this will do nothing, because get() is cached. - # Disabled in r4027 + # Disabled in r4028 #try: # self.site().sandboxpage.get(force = True, get_redirect = True) #except NoPage: @@ -3468,7 +3468,7 @@ search(query): query results from Special:Search allpages(): Special:Allpages newpages(): Special:Newpages - newImages(): Special:Log&type=upload + newimages(): Special:Log&type=upload longpages(): Special:Longpages shortpages(): Special:Shortpages categories(): Special:Categories (yields Category objects) @@ -4214,19 +4214,18 @@ if not repeat: break
- def newImages(self, limit = 50, repeat = False): + def newimages(self, number = 10, repeat = False): """Yield ImagePages from Special:Log&type=upload""" # Url of the new images - url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % int(limit) + url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % number # Get the HTML text html = self.getUrl(url) image_namespace = self.image_namespace() regexp = re.compile( r'(?P<new>class="new" |)title="%s:(?P<image>.*?).(?P<ext>\w\w\w|jpeg)">.*?</a>".*?(?:<span class="comment">.*?|)</li>' % image_namespace, re.UNICODE) - pos = 0 - seen = list() - ext_list = list() + seen = set() + while True: for m in regexp.finditer(html): new = m.group('new') @@ -4234,16 +4233,14 @@ ext = m.group('ext') # This prevent pages with strange characters. They will be loaded without problem. image = "%s.%s" % (im, ext) - if new != '': - output(u"Skipping %s because it has been deleted." % image) - if image not in seen: - seen.append(image) if image not in seen: - seen.append(image) - page = Page(self, 'Image:%s' % image) + seen.add(image) + if new != '': + output(u"Image '%s' has been deleted." % image) + continue + page = ImagePage(self, image) yield page - if not repeat: - output(u"\t\t>> All images checked. <<") + if not repeat: break
def uncategorizedimages(self, number = 10, repeat = False):
pywikipedia-l@lists.wikimedia.org